#include #include #include #include "node.h" #define STRING_BUFF_SIZE 1024 static char stringBuff[STRING_BUFF_SIZE]; Node* parseHeader(Token** firstToken); Node* parseCodeBlock(Token** firstToken); Node* parseParagraph(Token** startToken); Node* ParseNodes(Token* firstToken) { Node* firstNode = NULL; Token* currentToken = firstToken; Node* prevNode = NULL; while (currentToken != NULL) { Node* currentNode = NULL; switch (currentToken->type) { case TT_NEWLINE: case TT_WHITESPACE: break; case TT_HASH: // start of header currentNode = parseHeader(¤tToken); break; case TT_TRIPLEBACKTICK: currentNode = parseCodeBlock(¤tToken); break; case TT_EOF: return firstNode; default: // paragraph start? currentNode = parseParagraph(¤tToken); break; } if (currentToken->type == TT_NEWLINE || currentToken->type == TT_WHITESPACE) currentToken = FreeToken(currentToken); if (currentNode == NULL) continue; if (prevNode != NULL) prevNode->next = currentNode; if (firstNode == NULL) firstNode = currentNode; prevNode = currentNode; } return firstNode; } Node* parseHeader(Token** startToken) { Token* t = *startToken; // Count the number of TT_HASH tokens int count = 1; while (t->next != NULL && t->next->type == TT_HASH) { count++; t = FreeToken(t); } if (t->next == NULL) { printf("Header missing text"); return NULL; } // Trim leading whitespace while (t->next != NULL && t->type == TT_WHITESPACE) { t = FreeToken(t); } if (t->next == NULL) { printf("Header missing text"); return NULL; } Token* end = t; int len = 0; // find header text size while (end->type != TT_NEWLINE && end->type != TT_EOF) { len += end->length; end = end->next; } char* strbuff = malloc(len+1); strbuff[0] = '\0'; while(t != end) { strncat(strbuff, t->literal, t->length); t = FreeToken(t); } *startToken = t; HeaderNode* retval = malloc(sizeof(HeaderNode)); switch(count) { case 1: retval->type = NT_Header1; break; case 2: retval->type = NT_Header2; break; case 3: retval->type = NT_Header3; break; default: retval->type = NT_Header4; break; } retval->next = NULL; retval->rawText = strbuff; return (Node*)retval; } Node* parseCodeBlock(Token** startToken) { // find closing ticks int tlen = 0; // number of tokens int clen = 0; // number of characters // skip past the opening triple backtick *startToken = FreeToken(*startToken); // skip the first newline while ((*startToken)->type == TT_NEWLINE) { *startToken = FreeToken(*startToken); } // assign this after we skip tokens so we don't have to // re-skip them later. Token* t = *startToken; while (t->next != NULL && t->type != TT_TRIPLEBACKTICK) { if (t->next->type == TT_EOF) { printf("premature EOF"); ErrorNode* err = malloc(sizeof(ErrorNode)); err->type = NT_Error; err->next = NULL; err->error = "premature EOF searching for closing triple backtick"; return (Node*)err; } tlen++; clen += t->length; t = t->next; } t = *startToken; char* strbuff = malloc(sizeof(char)*clen+1); strbuff[0] = '\0'; int i; for(i = 0; i < tlen; i++) { strncat(strbuff, t->literal, t->length); t = FreeToken(t); } /* * Skip past closing triple backtick * This is modifying the *parameter* that was passed in, * so we can return the node and advance the token tree. */ *startToken = FreeToken(t); while ((*startToken)->type == TT_NEWLINE || (*startToken)->type == TT_WHITESPACE) { *startToken = FreeToken(*startToken); } CodeBlockNode* ret = malloc(sizeof(CodeBlockNode)); ret->type = NT_BlockCode; ret->rawText = strbuff; ret->next = NULL; return (Node*)ret; } Node* parseParagraph(Token** startToken) { ParagraphNode* pnode = malloc(sizeof(ParagraphNode)); pnode->next = NULL; pnode->type = NT_Paragraph; Token* t = *startToken; pnode->ptype = PT_Standard; if (t->type == TT_GT) { pnode->ptype = PT_Quote; // consume TT_GT t = FreeToken(t); } pnode->content = t; Token* prevToken = NULL; while(t != NULL) { // Look for the end of the paragraph. if (t->type == TT_NEWLINE && t->next != NULL) { if (t->next->type == TT_WHITESPACE) { // Consume the newline if the next one is a space. t = FreeToken(t); prevToken->next = t; } else { // Convert this token into a whitespace character t->literal[0] = ' '; t->type = TT_WHITESPACE; if (prevToken != NULL) prevToken->next = t; prevToken = t; t = t->next; } if (pnode->ptype == PT_Quote) { if (t->type == TT_GT) { // removes TT_GT t = FreeToken(t); prevToken->next = t; if (t->next != NULL && t->next->type == TT_WHITESPACE) { // removes TT_WHITESPACE t = FreeToken(t); prevToken->next = t; } continue; } goto paragraphEnd; } switch (t->type) { case TT_NEWLINE: case TT_EOF: case TT_TRIPLEBACKTICK: case TT_GT: goto paragraphEnd; break; default: break; } } // TT_NEWLINE check if (prevToken != NULL) prevToken->next = t; prevToken = t; t = t->next; } paragraphEnd: *startToken = t; // on double newlines, this is the second newline. prevToken->next = NULL; // terminate the pnode->content list // remove trailing whitespace prevToken = NULL; t = pnode->content; while(t != NULL) { if (t->type == TT_WHITESPACE) { if(t->next == NULL) { FreeToken(t); prevToken->next = NULL; break; } else if (t->next->type == TT_WHITESPACE) { /* concatinate the two. */ int len = t->length + t->next->length; char* newws = malloc(sizeof(char)*len+1); newws[0] = '\0'; strncat(newws, t->literal, t->length); strncat(newws, t->next->literal, t->next->length); t = FreeToken(t); prevToken->next = t; t->length = len; free(t->literal); t->literal = newws; } } prevToken = t; t = t->next; } return (Node*)pnode; } char* NodeTypeString(NodeType t) { switch(t) { case NT_Header1: return "NT_Header1"; case NT_Header2: return "NT_Header2"; case NT_Header3: return "NT_Header3"; case NT_Header4: return "NT_Header4"; case NT_Paragraph: return "NT_Paragraph"; case NT_UnorderedList: return "NT_UnorderedList"; case NT_OrderedList: return "NT_OrderedList"; case NT_InlineCode: return "NT_InlineCode"; case NT_BlockCode: return "NT_BlockCode"; case NT_Bold: return "NT_Bold"; case NT_Underline: return "NT_Underline"; case NT_Error: return "NT_Error"; default: snprintf(stringBuff, 1000, "unknown NodeType: %d", t); return stringBuff; } } char* ParagraphTypeString(ParagraphType t) { switch (t) { case PT_Standard: return "PT_Standard"; case PT_Quote: return "PT_Quote"; case PT_Code: return "PT_Code"; } return "UNKNOWN"; } Node* FreeNode(Node* node) { Node* next = node->next; switch (node->type) { case NT_Header1: case NT_Header2: case NT_Header3: case NT_Header4: free(((HeaderNode*)node)->rawText); break; case NT_BlockCode: free(((CodeBlockNode*)node)->rawText); break; case NT_Error: free(((ErrorNode*)node)->error); break; case NT_Paragraph: { ParagraphNode* pnode = (ParagraphNode*)node; Token* t = pnode->content; while ((t = FreeToken(t)) != NULL); } break; case NT_UnorderedList: case NT_OrderedList: case NT_InlineCode: case NT_Bold: case NT_Underline: assert(0 && "//TODO"); break; } free(node); return next; }