Implement lists; Fix header text

Removed the extra hash from the header text by counting correctly.

Implemented lists!  Both unordered and ordered lists have been
implemented, and they can even be mixed.

Lists starting with letters or roman numerals is not implemented, and
probably won't be.
This commit is contained in:
Zorchenhimer 2023-11-26 17:34:35 -05:00
parent abc99b02e6
commit 7df25d60dd
5 changed files with 244 additions and 30 deletions

View File

@ -160,7 +160,7 @@ newWhitespaceToken(Lexer* l)
tok->literal[i] = ch; tok->literal[i] = ch;
} }
tok->literal[count] = '\0'; tok->literal[count] = '\0';
tok->length = count; tok->length = count; // TODO: normalize tab lengths to spaces
tok->next = NULL; tok->next = NULL;
return tok; return tok;
} }

9
main.c
View File

@ -110,6 +110,14 @@ main(int argc, const char** argv)
} }
break; break;
case NT_List:
{
ListNode* lnode = (ListNode*)node;
ListDebugPrint(lnode, 0);
printf("\n");
}
break;
default: default:
printf("%s\n", NodeTypeString(node->type)); printf("%s\n", NodeTypeString(node->type));
} }
@ -120,6 +128,7 @@ main(int argc, const char** argv)
node = firstNode; node = firstNode;
firstNode = NULL; firstNode = NULL;
if (node != NULL)
while ((node = FreeNode(node)) != NULL); while ((node = FreeNode(node)) != NULL);
if (node != NULL) if (node != NULL)

234
node.c
View File

@ -11,6 +11,9 @@ static char stringBuff[STRING_BUFF_SIZE];
Node* parseHeader(Token** firstToken); Node* parseHeader(Token** firstToken);
Node* parseCodeBlock(Token** firstToken); Node* parseCodeBlock(Token** firstToken);
Node* parseParagraph(Token** startToken); Node* parseParagraph(Token** startToken);
Node* parseList(Token** startToken, int currentLevel, int* returnLevel);
char* tokenLineToString(Token** startToken);
Node* Node*
ParseNodes(Token* firstToken) ParseNodes(Token* firstToken)
@ -39,7 +42,25 @@ ParseNodes(Token* firstToken)
currentNode = parseCodeBlock(&currentToken); currentNode = parseCodeBlock(&currentToken);
break; break;
case TT_DASH:
{
int r = 0;
currentNode = parseList(&currentToken, 0, &r);
}
break;
case TT_NUMBER:
if (currentToken->next != NULL && currentToken->next->type == TT_PERIOD)
{
int r = 0;
currentNode = parseList(&currentToken, 0, &r);
}
else
currentNode = parseParagraph(&currentToken);
break;
case TT_EOF: case TT_EOF:
FreeToken(currentToken);
return firstNode; return firstNode;
default: // paragraph start? default: // paragraph start?
@ -53,6 +74,13 @@ ParseNodes(Token* firstToken)
if (currentNode == NULL) if (currentNode == NULL)
continue; continue;
if (currentNode->type == NT_Error)
{
ErrorNode* enode = (ErrorNode*)currentNode;
printf("hit an error: %s\n", enode->error);
return firstNode;
}
if (prevNode != NULL) if (prevNode != NULL)
prevNode->next = currentNode; prevNode->next = currentNode;
@ -71,8 +99,8 @@ parseHeader(Token** startToken)
Token* t = *startToken; Token* t = *startToken;
// Count the number of TT_HASH tokens // Count the number of TT_HASH tokens
int count = 1; int count = 0;
while (t->next != NULL && t->next->type == TT_HASH) while (t->next != NULL && t->type == TT_HASH)
{ {
count++; count++;
t = FreeToken(t); t = FreeToken(t);
@ -96,23 +124,7 @@ parseHeader(Token** startToken)
return NULL; return NULL;
} }
Token* end = t; char* strbuff = tokenLineToString(&t);
int len = 0;
// find header text size
while (end->type != TT_NEWLINE && end->type != TT_EOF) {
len += end->length;
end = end->next;
}
char* strbuff = malloc(len+1);
strbuff[0] = '\0';
while(t != end) {
strncat(strbuff, t->literal, t->length);
t = FreeToken(t);
}
*startToken = t; *startToken = t;
HeaderNode* retval = malloc(sizeof(HeaderNode)); HeaderNode* retval = malloc(sizeof(HeaderNode));
@ -320,6 +332,90 @@ paragraphEnd:
return (Node*)pnode; return (Node*)pnode;
} }
Node*
parseList(Token** startToken, int currentLevel, int* returnLevel)
{
Token* t = *startToken;
ListNode* lnode = malloc(sizeof(ListNode));
lnode->next = NULL;
lnode->nextItem = NULL;
lnode->children = NULL;
lnode->type = NT_List;
//printf("[parseList] t->type:%s\n", TokenTypeString(t->type));
printf("\n");
switch (t->type)
{
case TT_DASH:
lnode->ltype = LT_Unordered;
break;
case TT_NUMBER:
lnode->ltype = LT_NumericOrdered;
t = FreeToken(t); // consume the number
break;
default:
{
char* estr = malloc(STRING_BUFF_SIZE);
snprintf(estr, STRING_BUFF_SIZE,"Unknown list type: '%s'", TokenString(t));
printf("unknown list type %s\n", TokenString(t));
ErrorNode* err = malloc(sizeof(ErrorNode));
err->type = NT_Error;
err->next = NULL;
err->error = estr;
return (Node*)err;
}
}
t = FreeToken(t); // consume the dash or period after number
//printf("before tokenLineToString\n");
lnode->rawText = tokenLineToString(&t);
printf("level rawText:%s\n", lnode->rawText);
int nextLevel = 0;
// count whitespace and recurse at given level
while (t->type == TT_WHITESPACE)
{
nextLevel += t->length;
t = FreeToken(t);
}
printf("currentLevel:%d nextLevel:%d\n", currentLevel, nextLevel);
if (t == NULL || t->type == TT_EOF || (nextLevel == 0 && t->type == TT_NEWLINE))
goto levelDone;
if (nextLevel > currentLevel)
{
// parse children
lnode->children = (struct ListNode*)parseList(&t, nextLevel, returnLevel);
nextLevel = *returnLevel;
}
if (t == NULL || t->type == TT_EOF || (nextLevel == 0 && t->type == TT_NEWLINE))
goto levelDone;
if (nextLevel == currentLevel)
{
// parse next
lnode->nextItem = (struct ListNode*)parseList(&t, nextLevel, returnLevel);
nextLevel = *returnLevel;
}
if (nextLevel < currentLevel)
{
}
// only at end of current list level
// ie, nextLevel < currentLevel
levelDone:
*startToken = t;
*returnLevel = nextLevel;
return (Node*)lnode;
}
char* char*
NodeTypeString(NodeType t) NodeTypeString(NodeType t)
{ {
@ -334,10 +430,8 @@ NodeTypeString(NodeType t)
return "NT_Header4"; return "NT_Header4";
case NT_Paragraph: case NT_Paragraph:
return "NT_Paragraph"; return "NT_Paragraph";
case NT_UnorderedList: case NT_List:
return "NT_UnorderedList"; return "NT_List";
case NT_OrderedList:
return "NT_OrderedList";
case NT_InlineCode: case NT_InlineCode:
return "NT_InlineCode"; return "NT_InlineCode";
case NT_BlockCode: case NT_BlockCode:
@ -370,10 +464,59 @@ ParagraphTypeString(ParagraphType t)
return "UNKNOWN"; return "UNKNOWN";
} }
char*
ListTypeString(ListType t)
{
switch (t)
{
case LT_Unordered:
return "LT_Unordered";
case LT_NumericOrdered:
return "LT_NumericOrdered";
case LT_AlphaOrdered: // a) b) c) etc.
return "LT_AlphaOrdered";
default:
return "UNKNOWN";
}
}
void
ListDebugPrint(ListNode* lnode, int currentLevel)
{
if (lnode == NULL)
return;
printf("%*s{ListNode ltype:%s level:%d rawText:%s\n",
currentLevel*4, "",
ListTypeString(lnode->ltype),
currentLevel,
lnode->rawText
);
if (lnode->children != NULL)
ListDebugPrint((ListNode*)lnode->children, currentLevel+1);
if (lnode->nextItem != NULL)
ListDebugPrint((ListNode*)lnode->nextItem, currentLevel);
}
void
freeListNodes(ListNode* lnode)
{
if (lnode->children != NULL)
freeListNodes((ListNode*)lnode->children);
if (lnode->nextItem != NULL)
freeListNodes((ListNode*)lnode->nextItem);
free(lnode->rawText);
free(lnode);
}
Node* Node*
FreeNode(Node* node) FreeNode(Node* node)
{ {
Node* next = node->next; Node* next = node->next;
switch (node->type) switch (node->type)
{ {
case NT_Header1: case NT_Header1:
@ -395,14 +538,55 @@ FreeNode(Node* node)
while ((t = FreeToken(t)) != NULL); while ((t = FreeToken(t)) != NULL);
} }
break; break;
case NT_UnorderedList:
case NT_OrderedList:
case NT_InlineCode: case NT_InlineCode:
case NT_Bold: case NT_Bold:
case NT_Underline: case NT_Underline:
assert(0 && "//TODO"); assert(0 && "//TODO");
break; break;
case NT_List:
{
ListNode* lnode = (ListNode*)node;
freeListNodes(lnode);
node = NULL;
} }
break;
}
if (node != NULL)
free(node); free(node);
return next; return next;
} }
char*
tokenLineToString(Token** startToken)
{
Token* t = *startToken;
// Trim leading whitespace
while (t->next != NULL && t->type == TT_WHITESPACE)
{
t = FreeToken(t);
}
Token* end = t;
int len = 0;
// find text size
while (end->type != TT_NEWLINE && end->type != TT_EOF) {
len += end->length;
end = end->next;
}
char* strbuff = malloc(len+1);
strbuff[0] = '\0';
while(t != end) {
strncat(strbuff, t->literal, t->length);
t = FreeToken(t);
}
if (t->type == TT_NEWLINE)
t = FreeToken(t);
*startToken = t;
return strbuff;
}

21
node.h
View File

@ -17,8 +17,7 @@ typedef enum {
// Container elements // Container elements
// can contain text modifiers // can contain text modifiers
NT_Paragraph, NT_Paragraph,
NT_UnorderedList, NT_List,
NT_OrderedList,
// Contained elements (cannot be bare) // Contained elements (cannot be bare)
// text modifiers // text modifiers
@ -30,6 +29,11 @@ typedef enum {
NT_Error, NT_Error,
} NodeType; } NodeType;
typedef enum {
LT_Unordered,
LT_NumericOrdered,
LT_AlphaOrdered, // a) b) c) etc.
} ListType;
typedef struct Node { typedef struct Node {
NodeType type; NodeType type;
@ -54,6 +58,17 @@ typedef struct {
char* error; char* error;
} ErrorNode; } ErrorNode;
typedef struct {
NodeType type;
struct Node* next;
ListType ltype;
char* rawText;
struct ListNode* nextItem;
struct ListNode* children;
} ListNode;
typedef enum { typedef enum {
PT_Standard, PT_Standard,
PT_Quote, PT_Quote,
@ -70,6 +85,8 @@ typedef struct {
Node* ParseNodes(Token* firstToken); Node* ParseNodes(Token* firstToken);
char* NodeTypeString(NodeType t); char* NodeTypeString(NodeType t);
char* ParagraphTypeString(ParagraphType t); char* ParagraphTypeString(ParagraphType t);
char* ListTypeString(ListType t);
Node* FreeNode(Node* node); Node* FreeNode(Node* node);
void ListDebugPrint(ListNode* lnode, int currentLevel);
#endif #endif

View File

@ -34,6 +34,8 @@ one two
- List item four. - List item four.
1. Ordered list one 1. Ordered list one
- this one
- that one
1. Ordered list two 1. Ordered list two
1. Ordered list three 1. Ordered list three
1. Ordered list four 1. Ordered list four
@ -41,6 +43,8 @@ one two
- Toplevel one - Toplevel one
- Second level one - Second level one
- Second level two - Second level two
- Second level three
- Second level four
- Toplevel two - Toplevel two
- Second level one - Second level one
- Third level - Third level