diff --git a/lexer.c b/lexer.c index cdf809d..f6f716b 100644 --- a/lexer.c +++ b/lexer.c @@ -160,7 +160,7 @@ newWhitespaceToken(Lexer* l) tok->literal[i] = ch; } tok->literal[count] = '\0'; - tok->length = count; + tok->length = count; // TODO: normalize tab lengths to spaces tok->next = NULL; return tok; } diff --git a/main.c b/main.c index a2c937c..c7cfbf3 100644 --- a/main.c +++ b/main.c @@ -110,6 +110,14 @@ main(int argc, const char** argv) } break; + case NT_List: + { + ListNode* lnode = (ListNode*)node; + ListDebugPrint(lnode, 0); + printf("\n"); + } + break; + default: printf("%s\n", NodeTypeString(node->type)); } @@ -120,7 +128,8 @@ main(int argc, const char** argv) node = firstNode; firstNode = NULL; - while ((node = FreeNode(node)) != NULL); + if (node != NULL) + while ((node = FreeNode(node)) != NULL); if (node != NULL) printf("last node != NULL\n"); diff --git a/node.c b/node.c index e3bf36d..8996050 100644 --- a/node.c +++ b/node.c @@ -11,6 +11,9 @@ static char stringBuff[STRING_BUFF_SIZE]; Node* parseHeader(Token** firstToken); Node* parseCodeBlock(Token** firstToken); Node* parseParagraph(Token** startToken); +Node* parseList(Token** startToken, int currentLevel, int* returnLevel); + +char* tokenLineToString(Token** startToken); Node* ParseNodes(Token* firstToken) @@ -39,7 +42,25 @@ ParseNodes(Token* firstToken) currentNode = parseCodeBlock(¤tToken); break; + case TT_DASH: + { + int r = 0; + currentNode = parseList(¤tToken, 0, &r); + } + break; + + case TT_NUMBER: + if (currentToken->next != NULL && currentToken->next->type == TT_PERIOD) + { + int r = 0; + currentNode = parseList(¤tToken, 0, &r); + } + else + currentNode = parseParagraph(¤tToken); + break; + case TT_EOF: + FreeToken(currentToken); return firstNode; default: // paragraph start? @@ -53,6 +74,13 @@ ParseNodes(Token* firstToken) if (currentNode == NULL) continue; + if (currentNode->type == NT_Error) + { + ErrorNode* enode = (ErrorNode*)currentNode; + printf("hit an error: %s\n", enode->error); + return firstNode; + } + if (prevNode != NULL) prevNode->next = currentNode; @@ -71,8 +99,8 @@ parseHeader(Token** startToken) Token* t = *startToken; // Count the number of TT_HASH tokens - int count = 1; - while (t->next != NULL && t->next->type == TT_HASH) + int count = 0; + while (t->next != NULL && t->type == TT_HASH) { count++; t = FreeToken(t); @@ -96,23 +124,7 @@ parseHeader(Token** startToken) return NULL; } - Token* end = t; - int len = 0; - - // find header text size - while (end->type != TT_NEWLINE && end->type != TT_EOF) { - len += end->length; - end = end->next; - } - - char* strbuff = malloc(len+1); - strbuff[0] = '\0'; - - while(t != end) { - strncat(strbuff, t->literal, t->length); - t = FreeToken(t); - } - + char* strbuff = tokenLineToString(&t); *startToken = t; HeaderNode* retval = malloc(sizeof(HeaderNode)); @@ -320,6 +332,90 @@ paragraphEnd: return (Node*)pnode; } +Node* +parseList(Token** startToken, int currentLevel, int* returnLevel) +{ + Token* t = *startToken; + ListNode* lnode = malloc(sizeof(ListNode)); + lnode->next = NULL; + lnode->nextItem = NULL; + lnode->children = NULL; + lnode->type = NT_List; + + //printf("[parseList] t->type:%s\n", TokenTypeString(t->type)); + printf("\n"); + switch (t->type) + { + case TT_DASH: + lnode->ltype = LT_Unordered; + break; + + case TT_NUMBER: + lnode->ltype = LT_NumericOrdered; + t = FreeToken(t); // consume the number + break; + + default: + { + + char* estr = malloc(STRING_BUFF_SIZE); + snprintf(estr, STRING_BUFF_SIZE,"Unknown list type: '%s'", TokenString(t)); + + printf("unknown list type %s\n", TokenString(t)); + ErrorNode* err = malloc(sizeof(ErrorNode)); + err->type = NT_Error; + err->next = NULL; + err->error = estr; + return (Node*)err; + } + } + t = FreeToken(t); // consume the dash or period after number + + //printf("before tokenLineToString\n"); + lnode->rawText = tokenLineToString(&t); + printf("level rawText:%s\n", lnode->rawText); + + int nextLevel = 0; + // count whitespace and recurse at given level + while (t->type == TT_WHITESPACE) + { + nextLevel += t->length; + t = FreeToken(t); + } + printf("currentLevel:%d nextLevel:%d\n", currentLevel, nextLevel); + + if (t == NULL || t->type == TT_EOF || (nextLevel == 0 && t->type == TT_NEWLINE)) + goto levelDone; + + if (nextLevel > currentLevel) + { + // parse children + lnode->children = (struct ListNode*)parseList(&t, nextLevel, returnLevel); + nextLevel = *returnLevel; + } + + if (t == NULL || t->type == TT_EOF || (nextLevel == 0 && t->type == TT_NEWLINE)) + goto levelDone; + + if (nextLevel == currentLevel) + { + // parse next + lnode->nextItem = (struct ListNode*)parseList(&t, nextLevel, returnLevel); + nextLevel = *returnLevel; + } + + if (nextLevel < currentLevel) + { + } + + // only at end of current list level + // ie, nextLevel < currentLevel +levelDone: + *startToken = t; + *returnLevel = nextLevel; + return (Node*)lnode; +} + char* NodeTypeString(NodeType t) { @@ -334,10 +430,8 @@ NodeTypeString(NodeType t) return "NT_Header4"; case NT_Paragraph: return "NT_Paragraph"; - case NT_UnorderedList: - return "NT_UnorderedList"; - case NT_OrderedList: - return "NT_OrderedList"; + case NT_List: + return "NT_List"; case NT_InlineCode: return "NT_InlineCode"; case NT_BlockCode: @@ -370,10 +464,59 @@ ParagraphTypeString(ParagraphType t) return "UNKNOWN"; } +char* +ListTypeString(ListType t) +{ + switch (t) + { + case LT_Unordered: + return "LT_Unordered"; + case LT_NumericOrdered: + return "LT_NumericOrdered"; + case LT_AlphaOrdered: // a) b) c) etc. + return "LT_AlphaOrdered"; + default: + return "UNKNOWN"; + } +} + +void +ListDebugPrint(ListNode* lnode, int currentLevel) +{ + if (lnode == NULL) + return; + + printf("%*s{ListNode ltype:%s level:%d rawText:%s\n", + currentLevel*4, "", + ListTypeString(lnode->ltype), + currentLevel, + lnode->rawText + ); + + if (lnode->children != NULL) + ListDebugPrint((ListNode*)lnode->children, currentLevel+1); + + if (lnode->nextItem != NULL) + ListDebugPrint((ListNode*)lnode->nextItem, currentLevel); +} + +void +freeListNodes(ListNode* lnode) +{ + if (lnode->children != NULL) + freeListNodes((ListNode*)lnode->children); + if (lnode->nextItem != NULL) + freeListNodes((ListNode*)lnode->nextItem); + + free(lnode->rawText); + free(lnode); +} + Node* FreeNode(Node* node) { Node* next = node->next; + switch (node->type) { case NT_Header1: @@ -395,14 +538,55 @@ FreeNode(Node* node) while ((t = FreeToken(t)) != NULL); } break; - case NT_UnorderedList: - case NT_OrderedList: case NT_InlineCode: case NT_Bold: case NT_Underline: assert(0 && "//TODO"); break; + case NT_List: + { + ListNode* lnode = (ListNode*)node; + freeListNodes(lnode); + node = NULL; + } + break; } - free(node); + if (node != NULL) + free(node); return next; } + +char* +tokenLineToString(Token** startToken) +{ + Token* t = *startToken; + + // Trim leading whitespace + while (t->next != NULL && t->type == TT_WHITESPACE) + { + t = FreeToken(t); + } + + Token* end = t; + int len = 0; + + // find text size + while (end->type != TT_NEWLINE && end->type != TT_EOF) { + len += end->length; + end = end->next; + } + + char* strbuff = malloc(len+1); + strbuff[0] = '\0'; + + while(t != end) { + strncat(strbuff, t->literal, t->length); + t = FreeToken(t); + } + + if (t->type == TT_NEWLINE) + t = FreeToken(t); + + *startToken = t; + return strbuff; +} diff --git a/node.h b/node.h index f79ba5a..47e5a66 100644 --- a/node.h +++ b/node.h @@ -17,8 +17,7 @@ typedef enum { // Container elements // can contain text modifiers NT_Paragraph, - NT_UnorderedList, - NT_OrderedList, + NT_List, // Contained elements (cannot be bare) // text modifiers @@ -30,6 +29,11 @@ typedef enum { NT_Error, } NodeType; +typedef enum { + LT_Unordered, + LT_NumericOrdered, + LT_AlphaOrdered, // a) b) c) etc. +} ListType; typedef struct Node { NodeType type; @@ -54,6 +58,17 @@ typedef struct { char* error; } ErrorNode; +typedef struct { + NodeType type; + struct Node* next; + + ListType ltype; + char* rawText; + + struct ListNode* nextItem; + struct ListNode* children; +} ListNode; + typedef enum { PT_Standard, PT_Quote, @@ -70,6 +85,8 @@ typedef struct { Node* ParseNodes(Token* firstToken); char* NodeTypeString(NodeType t); char* ParagraphTypeString(ParagraphType t); +char* ListTypeString(ListType t); Node* FreeNode(Node* node); +void ListDebugPrint(ListNode* lnode, int currentLevel); #endif diff --git a/sample.md b/sample.md index 92d8211..74efe4d 100644 --- a/sample.md +++ b/sample.md @@ -34,6 +34,8 @@ one two - List item four. 1. Ordered list one + - this one + - that one 1. Ordered list two 1. Ordered list three 1. Ordered list four @@ -41,6 +43,8 @@ one two - Toplevel one - Second level one - Second level two + - Second level three + - Second level four - Toplevel two - Second level one - Third level