#include #include #include #include "node.h" #define STRING_BUFF_SIZE 1024 static char stringBuff[STRING_BUFF_SIZE]; Node* parseHeader(Token** firstToken); Node* parseCodeBlock(Token** firstToken); Node* parseParagraph(Token** startToken); Node* parseList(Token** startToken, int currentLevel, int* returnLevel); char* tokenLineToString(Token** startToken); Node* ParseNodes(Token* firstToken) { Node* firstNode = NULL; Token* currentToken = firstToken; Node* prevNode = NULL; while (currentToken != NULL) { Node* currentNode = NULL; switch (currentToken->type) { case TT_NEWLINE: case TT_WHITESPACE: break; case TT_HASH: // start of header currentNode = parseHeader(¤tToken); break; case TT_TRIPLEBACKTICK: currentNode = parseCodeBlock(¤tToken); break; case TT_DASH: { int r = 0; currentNode = parseList(¤tToken, 0, &r); } break; case TT_NUMBER: if (currentToken->next != NULL && currentToken->next->type == TT_PERIOD) { int r = 0; currentNode = parseList(¤tToken, 0, &r); } else currentNode = parseParagraph(¤tToken); break; case TT_EOF: FreeToken(currentToken); return firstNode; default: // paragraph start? currentNode = parseParagraph(¤tToken); break; } if (currentToken->type == TT_NEWLINE || currentToken->type == TT_WHITESPACE) currentToken = FreeToken(currentToken); if (currentNode == NULL) continue; if (currentNode->type == NT_Error) { ErrorNode* enode = (ErrorNode*)currentNode; printf("hit an error: %s\n", enode->error); return firstNode; } if (prevNode != NULL) prevNode->next = currentNode; if (firstNode == NULL) firstNode = currentNode; prevNode = currentNode; } return firstNode; } Node* parseHeader(Token** startToken) { Token* t = *startToken; // Count the number of TT_HASH tokens int count = 0; while (t->next != NULL && t->type == TT_HASH) { count++; t = FreeToken(t); } if (t->next == NULL) { printf("Header missing text"); return NULL; } // Trim leading whitespace while (t->next != NULL && t->type == TT_WHITESPACE) { t = FreeToken(t); } if (t->next == NULL) { printf("Header missing text"); return NULL; } char* strbuff = tokenLineToString(&t); *startToken = t; HeaderNode* retval = malloc(sizeof(HeaderNode)); switch(count) { case 1: retval->type = NT_Header1; break; case 2: retval->type = NT_Header2; break; case 3: retval->type = NT_Header3; break; default: retval->type = NT_Header4; break; } retval->next = NULL; retval->rawText = strbuff; return (Node*)retval; } Node* parseCodeBlock(Token** startToken) { // find closing ticks int tlen = 0; // number of tokens int clen = 0; // number of characters // skip past the opening triple backtick *startToken = FreeToken(*startToken); // skip the first newline while ((*startToken)->type == TT_NEWLINE) { *startToken = FreeToken(*startToken); } // assign this after we skip tokens so we don't have to // re-skip them later. Token* t = *startToken; while (t->next != NULL && t->type != TT_TRIPLEBACKTICK) { if (t->next->type == TT_EOF) { printf("premature EOF"); ErrorNode* err = malloc(sizeof(ErrorNode)); err->type = NT_Error; err->next = NULL; err->error = "premature EOF searching for closing triple backtick"; return (Node*)err; } tlen++; clen += t->length; t = t->next; } t = *startToken; char* strbuff = malloc(sizeof(char)*clen+1); strbuff[0] = '\0'; int i; for(i = 0; i < tlen; i++) { strncat(strbuff, t->literal, t->length); t = FreeToken(t); } /* * Skip past closing triple backtick * This is modifying the *parameter* that was passed in, * so we can return the node and advance the token tree. */ *startToken = FreeToken(t); while ((*startToken)->type == TT_NEWLINE || (*startToken)->type == TT_WHITESPACE) { *startToken = FreeToken(*startToken); } CodeBlockNode* ret = malloc(sizeof(CodeBlockNode)); ret->type = NT_BlockCode; ret->rawText = strbuff; ret->next = NULL; return (Node*)ret; } Node* parseParagraph(Token** startToken) { ParagraphNode* pnode = malloc(sizeof(ParagraphNode)); pnode->next = NULL; pnode->type = NT_Paragraph; Token* t = *startToken; pnode->ptype = PT_Standard; if (t->type == TT_GT) { pnode->ptype = PT_Quote; // consume TT_GT t = FreeToken(t); } pnode->content = t; Token* prevToken = NULL; while(t != NULL) { // Look for the end of the paragraph. if (t->type == TT_NEWLINE && t->next != NULL) { if (t->next->type == TT_WHITESPACE) { // Consume the newline if the next one is a space. t = FreeToken(t); prevToken->next = t; } else { // Convert this token into a whitespace character t->literal[0] = ' '; t->type = TT_WHITESPACE; if (prevToken != NULL) prevToken->next = t; prevToken = t; t = t->next; } if (pnode->ptype == PT_Quote) { if (t->type == TT_GT) { // removes TT_GT t = FreeToken(t); prevToken->next = t; if (t->next != NULL && t->next->type == TT_WHITESPACE) { // removes TT_WHITESPACE t = FreeToken(t); prevToken->next = t; } continue; } goto paragraphEnd; } switch (t->type) { case TT_NEWLINE: case TT_EOF: case TT_TRIPLEBACKTICK: case TT_GT: goto paragraphEnd; break; default: break; } } // TT_NEWLINE check if (prevToken != NULL) prevToken->next = t; prevToken = t; t = t->next; } paragraphEnd: *startToken = t; // on double newlines, this is the second newline. prevToken->next = NULL; // terminate the pnode->content list // remove trailing whitespace prevToken = NULL; t = pnode->content; while(t != NULL) { if (t->type == TT_WHITESPACE) { if(t->next == NULL) { FreeToken(t); prevToken->next = NULL; break; } else if (t->next->type == TT_WHITESPACE) { /* concatinate the two. */ int len = t->length + t->next->length; char* newws = malloc(sizeof(char)*len+1); newws[0] = '\0'; strncat(newws, t->literal, t->length); strncat(newws, t->next->literal, t->next->length); t = FreeToken(t); prevToken->next = t; t->length = len; free(t->literal); t->literal = newws; } } prevToken = t; t = t->next; } return (Node*)pnode; } Node* parseList(Token** startToken, int currentLevel, int* returnLevel) { Token* t = *startToken; ListNode* lnode = malloc(sizeof(ListNode)); lnode->next = NULL; lnode->nextItem = NULL; lnode->children = NULL; lnode->type = NT_List; //printf("[parseList] t->type:%s\n", TokenTypeString(t->type)); printf("\n"); switch (t->type) { case TT_DASH: lnode->ltype = LT_Unordered; break; case TT_NUMBER: lnode->ltype = LT_NumericOrdered; t = FreeToken(t); // consume the number break; default: { char* estr = malloc(STRING_BUFF_SIZE); snprintf(estr, STRING_BUFF_SIZE,"Unknown list type: '%s'", TokenString(t)); printf("unknown list type %s\n", TokenString(t)); ErrorNode* err = malloc(sizeof(ErrorNode)); err->type = NT_Error; err->next = NULL; err->error = estr; return (Node*)err; } } t = FreeToken(t); // consume the dash or period after number //printf("before tokenLineToString\n"); lnode->rawText = tokenLineToString(&t); printf("level rawText:%s\n", lnode->rawText); int nextLevel = 0; // count whitespace and recurse at given level while (t->type == TT_WHITESPACE) { nextLevel += t->length; t = FreeToken(t); } printf("currentLevel:%d nextLevel:%d\n", currentLevel, nextLevel); if (t == NULL || t->type == TT_EOF || (nextLevel == 0 && t->type == TT_NEWLINE)) goto levelDone; if (nextLevel > currentLevel) { // parse children lnode->children = (struct ListNode*)parseList(&t, nextLevel, returnLevel); nextLevel = *returnLevel; } if (t == NULL || t->type == TT_EOF || (nextLevel == 0 && t->type == TT_NEWLINE)) goto levelDone; if (nextLevel == currentLevel) { // parse next lnode->nextItem = (struct ListNode*)parseList(&t, nextLevel, returnLevel); nextLevel = *returnLevel; } if (nextLevel < currentLevel) { } // only at end of current list level // ie, nextLevel < currentLevel levelDone: *startToken = t; *returnLevel = nextLevel; return (Node*)lnode; } char* NodeTypeString(NodeType t) { switch(t) { case NT_Header1: return "NT_Header1"; case NT_Header2: return "NT_Header2"; case NT_Header3: return "NT_Header3"; case NT_Header4: return "NT_Header4"; case NT_Paragraph: return "NT_Paragraph"; case NT_List: return "NT_List"; case NT_InlineCode: return "NT_InlineCode"; case NT_BlockCode: return "NT_BlockCode"; case NT_Bold: return "NT_Bold"; case NT_Underline: return "NT_Underline"; case NT_Error: return "NT_Error"; default: snprintf(stringBuff, 1000, "unknown NodeType: %d", t); return stringBuff; } } char* ParagraphTypeString(ParagraphType t) { switch (t) { case PT_Standard: return "PT_Standard"; case PT_Quote: return "PT_Quote"; case PT_Code: return "PT_Code"; } return "UNKNOWN"; } char* ListTypeString(ListType t) { switch (t) { case LT_Unordered: return "LT_Unordered"; case LT_NumericOrdered: return "LT_NumericOrdered"; case LT_AlphaOrdered: // a) b) c) etc. return "LT_AlphaOrdered"; default: return "UNKNOWN"; } } void ListDebugPrint(ListNode* lnode, int currentLevel) { if (lnode == NULL) return; printf("%*s{ListNode ltype:%s level:%d rawText:%s\n", currentLevel*4, "", ListTypeString(lnode->ltype), currentLevel, lnode->rawText ); if (lnode->children != NULL) ListDebugPrint((ListNode*)lnode->children, currentLevel+1); if (lnode->nextItem != NULL) ListDebugPrint((ListNode*)lnode->nextItem, currentLevel); } void freeListNodes(ListNode* lnode) { if (lnode->children != NULL) freeListNodes((ListNode*)lnode->children); if (lnode->nextItem != NULL) freeListNodes((ListNode*)lnode->nextItem); free(lnode->rawText); free(lnode); } Node* FreeNode(Node* node) { Node* next = node->next; switch (node->type) { case NT_Header1: case NT_Header2: case NT_Header3: case NT_Header4: free(((HeaderNode*)node)->rawText); break; case NT_BlockCode: free(((CodeBlockNode*)node)->rawText); break; case NT_Error: free(((ErrorNode*)node)->error); break; case NT_Paragraph: { ParagraphNode* pnode = (ParagraphNode*)node; Token* t = pnode->content; while ((t = FreeToken(t)) != NULL); } break; case NT_InlineCode: case NT_Bold: case NT_Underline: assert(0 && "//TODO"); break; case NT_List: { ListNode* lnode = (ListNode*)node; freeListNodes(lnode); node = NULL; } break; } if (node != NULL) free(node); return next; } char* tokenLineToString(Token** startToken) { Token* t = *startToken; // Trim leading whitespace while (t->next != NULL && t->type == TT_WHITESPACE) { t = FreeToken(t); } Token* end = t; int len = 0; // find text size while (end->type != TT_NEWLINE && end->type != TT_EOF) { len += end->length; end = end->next; } char* strbuff = malloc(len+1); strbuff[0] = '\0'; while(t != end) { strncat(strbuff, t->literal, t->length); t = FreeToken(t); } if (t->type == TT_NEWLINE) t = FreeToken(t); *startToken = t; return strbuff; }