From 7df25d60dd33d1e26bc99ffc7e188f5799f99efe Mon Sep 17 00:00:00 2001 From: Zorchenhimer Date: Sun, 26 Nov 2023 17:34:35 -0500 Subject: [PATCH] Implement lists; Fix header text Removed the extra hash from the header text by counting correctly. Implemented lists! Both unordered and ordered lists have been implemented, and they can even be mixed. Lists starting with letters or roman numerals is not implemented, and probably won't be. --- lexer.c | 2 +- main.c | 11 ++- node.c | 236 ++++++++++++++++++++++++++++++++++++++++++++++++------ node.h | 21 ++++- sample.md | 4 + 5 files changed, 244 insertions(+), 30 deletions(-) diff --git a/lexer.c b/lexer.c index cdf809d..f6f716b 100644 --- a/lexer.c +++ b/lexer.c @@ -160,7 +160,7 @@ newWhitespaceToken(Lexer* l) tok->literal[i] = ch; } tok->literal[count] = '\0'; - tok->length = count; + tok->length = count; // TODO: normalize tab lengths to spaces tok->next = NULL; return tok; } diff --git a/main.c b/main.c index a2c937c..c7cfbf3 100644 --- a/main.c +++ b/main.c @@ -110,6 +110,14 @@ main(int argc, const char** argv) } break; + case NT_List: + { + ListNode* lnode = (ListNode*)node; + ListDebugPrint(lnode, 0); + printf("\n"); + } + break; + default: printf("%s\n", NodeTypeString(node->type)); } @@ -120,7 +128,8 @@ main(int argc, const char** argv) node = firstNode; firstNode = NULL; - while ((node = FreeNode(node)) != NULL); + if (node != NULL) + while ((node = FreeNode(node)) != NULL); if (node != NULL) printf("last node != NULL\n"); diff --git a/node.c b/node.c index e3bf36d..8996050 100644 --- a/node.c +++ b/node.c @@ -11,6 +11,9 @@ static char stringBuff[STRING_BUFF_SIZE]; Node* parseHeader(Token** firstToken); Node* parseCodeBlock(Token** firstToken); Node* parseParagraph(Token** startToken); +Node* parseList(Token** startToken, int currentLevel, int* returnLevel); + +char* tokenLineToString(Token** startToken); Node* ParseNodes(Token* firstToken) @@ -39,7 +42,25 @@ ParseNodes(Token* firstToken) currentNode = parseCodeBlock(¤tToken); break; + case TT_DASH: + { + int r = 0; + currentNode = parseList(¤tToken, 0, &r); + } + break; + + case TT_NUMBER: + if (currentToken->next != NULL && currentToken->next->type == TT_PERIOD) + { + int r = 0; + currentNode = parseList(¤tToken, 0, &r); + } + else + currentNode = parseParagraph(¤tToken); + break; + case TT_EOF: + FreeToken(currentToken); return firstNode; default: // paragraph start? @@ -53,6 +74,13 @@ ParseNodes(Token* firstToken) if (currentNode == NULL) continue; + if (currentNode->type == NT_Error) + { + ErrorNode* enode = (ErrorNode*)currentNode; + printf("hit an error: %s\n", enode->error); + return firstNode; + } + if (prevNode != NULL) prevNode->next = currentNode; @@ -71,8 +99,8 @@ parseHeader(Token** startToken) Token* t = *startToken; // Count the number of TT_HASH tokens - int count = 1; - while (t->next != NULL && t->next->type == TT_HASH) + int count = 0; + while (t->next != NULL && t->type == TT_HASH) { count++; t = FreeToken(t); @@ -96,23 +124,7 @@ parseHeader(Token** startToken) return NULL; } - Token* end = t; - int len = 0; - - // find header text size - while (end->type != TT_NEWLINE && end->type != TT_EOF) { - len += end->length; - end = end->next; - } - - char* strbuff = malloc(len+1); - strbuff[0] = '\0'; - - while(t != end) { - strncat(strbuff, t->literal, t->length); - t = FreeToken(t); - } - + char* strbuff = tokenLineToString(&t); *startToken = t; HeaderNode* retval = malloc(sizeof(HeaderNode)); @@ -320,6 +332,90 @@ paragraphEnd: return (Node*)pnode; } +Node* +parseList(Token** startToken, int currentLevel, int* returnLevel) +{ + Token* t = *startToken; + ListNode* lnode = malloc(sizeof(ListNode)); + lnode->next = NULL; + lnode->nextItem = NULL; + lnode->children = NULL; + lnode->type = NT_List; + + //printf("[parseList] t->type:%s\n", TokenTypeString(t->type)); + printf("\n"); + switch (t->type) + { + case TT_DASH: + lnode->ltype = LT_Unordered; + break; + + case TT_NUMBER: + lnode->ltype = LT_NumericOrdered; + t = FreeToken(t); // consume the number + break; + + default: + { + + char* estr = malloc(STRING_BUFF_SIZE); + snprintf(estr, STRING_BUFF_SIZE,"Unknown list type: '%s'", TokenString(t)); + + printf("unknown list type %s\n", TokenString(t)); + ErrorNode* err = malloc(sizeof(ErrorNode)); + err->type = NT_Error; + err->next = NULL; + err->error = estr; + return (Node*)err; + } + } + t = FreeToken(t); // consume the dash or period after number + + //printf("before tokenLineToString\n"); + lnode->rawText = tokenLineToString(&t); + printf("level rawText:%s\n", lnode->rawText); + + int nextLevel = 0; + // count whitespace and recurse at given level + while (t->type == TT_WHITESPACE) + { + nextLevel += t->length; + t = FreeToken(t); + } + printf("currentLevel:%d nextLevel:%d\n", currentLevel, nextLevel); + + if (t == NULL || t->type == TT_EOF || (nextLevel == 0 && t->type == TT_NEWLINE)) + goto levelDone; + + if (nextLevel > currentLevel) + { + // parse children + lnode->children = (struct ListNode*)parseList(&t, nextLevel, returnLevel); + nextLevel = *returnLevel; + } + + if (t == NULL || t->type == TT_EOF || (nextLevel == 0 && t->type == TT_NEWLINE)) + goto levelDone; + + if (nextLevel == currentLevel) + { + // parse next + lnode->nextItem = (struct ListNode*)parseList(&t, nextLevel, returnLevel); + nextLevel = *returnLevel; + } + + if (nextLevel < currentLevel) + { + } + + // only at end of current list level + // ie, nextLevel < currentLevel +levelDone: + *startToken = t; + *returnLevel = nextLevel; + return (Node*)lnode; +} + char* NodeTypeString(NodeType t) { @@ -334,10 +430,8 @@ NodeTypeString(NodeType t) return "NT_Header4"; case NT_Paragraph: return "NT_Paragraph"; - case NT_UnorderedList: - return "NT_UnorderedList"; - case NT_OrderedList: - return "NT_OrderedList"; + case NT_List: + return "NT_List"; case NT_InlineCode: return "NT_InlineCode"; case NT_BlockCode: @@ -370,10 +464,59 @@ ParagraphTypeString(ParagraphType t) return "UNKNOWN"; } +char* +ListTypeString(ListType t) +{ + switch (t) + { + case LT_Unordered: + return "LT_Unordered"; + case LT_NumericOrdered: + return "LT_NumericOrdered"; + case LT_AlphaOrdered: // a) b) c) etc. + return "LT_AlphaOrdered"; + default: + return "UNKNOWN"; + } +} + +void +ListDebugPrint(ListNode* lnode, int currentLevel) +{ + if (lnode == NULL) + return; + + printf("%*s{ListNode ltype:%s level:%d rawText:%s\n", + currentLevel*4, "", + ListTypeString(lnode->ltype), + currentLevel, + lnode->rawText + ); + + if (lnode->children != NULL) + ListDebugPrint((ListNode*)lnode->children, currentLevel+1); + + if (lnode->nextItem != NULL) + ListDebugPrint((ListNode*)lnode->nextItem, currentLevel); +} + +void +freeListNodes(ListNode* lnode) +{ + if (lnode->children != NULL) + freeListNodes((ListNode*)lnode->children); + if (lnode->nextItem != NULL) + freeListNodes((ListNode*)lnode->nextItem); + + free(lnode->rawText); + free(lnode); +} + Node* FreeNode(Node* node) { Node* next = node->next; + switch (node->type) { case NT_Header1: @@ -395,14 +538,55 @@ FreeNode(Node* node) while ((t = FreeToken(t)) != NULL); } break; - case NT_UnorderedList: - case NT_OrderedList: case NT_InlineCode: case NT_Bold: case NT_Underline: assert(0 && "//TODO"); break; + case NT_List: + { + ListNode* lnode = (ListNode*)node; + freeListNodes(lnode); + node = NULL; + } + break; } - free(node); + if (node != NULL) + free(node); return next; } + +char* +tokenLineToString(Token** startToken) +{ + Token* t = *startToken; + + // Trim leading whitespace + while (t->next != NULL && t->type == TT_WHITESPACE) + { + t = FreeToken(t); + } + + Token* end = t; + int len = 0; + + // find text size + while (end->type != TT_NEWLINE && end->type != TT_EOF) { + len += end->length; + end = end->next; + } + + char* strbuff = malloc(len+1); + strbuff[0] = '\0'; + + while(t != end) { + strncat(strbuff, t->literal, t->length); + t = FreeToken(t); + } + + if (t->type == TT_NEWLINE) + t = FreeToken(t); + + *startToken = t; + return strbuff; +} diff --git a/node.h b/node.h index f79ba5a..47e5a66 100644 --- a/node.h +++ b/node.h @@ -17,8 +17,7 @@ typedef enum { // Container elements // can contain text modifiers NT_Paragraph, - NT_UnorderedList, - NT_OrderedList, + NT_List, // Contained elements (cannot be bare) // text modifiers @@ -30,6 +29,11 @@ typedef enum { NT_Error, } NodeType; +typedef enum { + LT_Unordered, + LT_NumericOrdered, + LT_AlphaOrdered, // a) b) c) etc. +} ListType; typedef struct Node { NodeType type; @@ -54,6 +58,17 @@ typedef struct { char* error; } ErrorNode; +typedef struct { + NodeType type; + struct Node* next; + + ListType ltype; + char* rawText; + + struct ListNode* nextItem; + struct ListNode* children; +} ListNode; + typedef enum { PT_Standard, PT_Quote, @@ -70,6 +85,8 @@ typedef struct { Node* ParseNodes(Token* firstToken); char* NodeTypeString(NodeType t); char* ParagraphTypeString(ParagraphType t); +char* ListTypeString(ListType t); Node* FreeNode(Node* node); +void ListDebugPrint(ListNode* lnode, int currentLevel); #endif diff --git a/sample.md b/sample.md index 92d8211..74efe4d 100644 --- a/sample.md +++ b/sample.md @@ -34,6 +34,8 @@ one two - List item four. 1. Ordered list one + - this one + - that one 1. Ordered list two 1. Ordered list three 1. Ordered list four @@ -41,6 +43,8 @@ one two - Toplevel one - Second level one - Second level two + - Second level three + - Second level four - Toplevel two - Second level one - Third level