diff --git a/lexer.c b/lexer.c index 2c303cb..458764b 100644 --- a/lexer.c +++ b/lexer.c @@ -240,13 +240,6 @@ isDigit(char ch) return ('0' <= ch && ch <= '9'); } -void -FreeToken(Token* t) -{ - free(t->literal); - free(t); -} - static Token* newTickToken(Lexer* l) diff --git a/main.c b/main.c index 9909f60..fc71174 100644 --- a/main.c +++ b/main.c @@ -85,11 +85,33 @@ main(int argc, const char** argv) } break; + case NT_Paragraph: + { + ParagraphNode* pnode = (ParagraphNode*)node; + printf("{ParagraphNode ptype:%s}\n", ParagraphTypeString(pnode->ptype)); + Token* content = pnode->content; + while(content != NULL) + { + if (content->type == TT_WHITESPACE) + { + printf(" "); + } + else + { + printf("%s", content->literal); + } + + content = content->next; + } + printf("\n"); + + } + break; + default: printf("%s\n", NodeTypeString(node->type)); } - node = node->next; } diff --git a/node.c b/node.c index f64de6c..d627c36 100644 --- a/node.c +++ b/node.c @@ -9,6 +9,7 @@ static char stringBuff[STRING_BUFF_SIZE]; Node* parseHeader(Token** firstToken); Node* parseCodeBlock(Token** firstToken); +Node* parseParagraph(Token** startToken); Node* ParseNodes(Token* firstToken) @@ -23,7 +24,9 @@ ParseNodes(Token* firstToken) switch (currentToken->type) { case TT_NEWLINE: + case TT_WHITESPACE: break; + case TT_HASH: // start of header currentNode = parseHeader(¤tToken); @@ -37,11 +40,11 @@ ParseNodes(Token* firstToken) return firstNode; default: // paragraph start? + currentNode = parseParagraph(¤tToken); break; } if (currentToken->next == NULL) { - printf("currentToken->next == NULL\n"); break; } @@ -193,6 +196,134 @@ parseCodeBlock(Token** startToken) return (Node*)ret; } +Node* +parseParagraph(Token** startToken) +{ + ParagraphNode* pnode = malloc(sizeof(ParagraphNode)); + pnode->next = NULL; + pnode->type = NT_Paragraph; + Token* t = *startToken; + pnode->ptype = PT_Standard; + + if (t->type == TT_GT) { + pnode->ptype = PT_Quote; + // consume TT_GT + Token* consumed = t; + t = t->next; + FreeToken(consumed); + } + + pnode->content = t; + Token* prevToken = NULL; + Token* consumed = NULL; + + while(t != NULL) + { + + // Look for the end of the paragraph. + if (t->type == TT_NEWLINE && t->next != NULL) + { + if (t->next->type == TT_WHITESPACE) + { + // Consume the newline if the next one is a space. + consumed = t; + t = t->next; + prevToken->next = t; + FreeToken(consumed); + } + else + { + // Convert this token into a whitespace character + t->literal[0] = ' '; + t->type = TT_WHITESPACE; + if (prevToken != NULL) + prevToken->next = t; + prevToken = t; + t = t->next; + } + + if (pnode->ptype == PT_Quote) { + if (t->type == TT_GT) { + // removes TT_GT + consumed = t; + t = t->next; + prevToken->next = t; + FreeToken(consumed); + + if (t->next != NULL && t->next->type == TT_WHITESPACE) + { + // removes TT_WHITESPACE + consumed = t; + t = t->next; + prevToken->next = t; + FreeToken(consumed); + } + continue; + } + goto paragraphEnd; + } + + switch (t->type) + { + case TT_NEWLINE: + case TT_EOF: + case TT_TRIPLEBACKTICK: + case TT_GT: + goto paragraphEnd; + break; + default: + break; + } + } // TT_NEWLINE check + + //printf("t->literal: %s\n", t->literal); + if (prevToken != NULL) + prevToken->next = t; + prevToken = t; + t = t->next; + } + +paragraphEnd: + *startToken = t; // on double newlines, this is the second newline. + prevToken->next = NULL; // terminate the pnode->content list + + // remove trailing whitespace + prevToken = NULL; + t = pnode->content; + while(t != NULL) + { + if (t->type == TT_WHITESPACE) + { + if(t->next == NULL) + { + prevToken->next = NULL; + break; + } + else if (t->next->type == TT_WHITESPACE) + { + // concatinate the two. + int len = t->length + t->next->length; + char* newws = malloc(sizeof(char)*len+1); + newws[0] = '\0'; + strncat(newws, t->literal, t->length); + strncat(newws, t->next->literal, t->next->length); + + consumed = t; + t = t->next; + prevToken->next = t; + FreeToken(consumed); + t->length = len; + free(t->literal); + t->literal = newws; + } + } + prevToken = t; + t = t->next; + } + + return (Node*)pnode; +} + char* NodeTypeString(NodeType t) { @@ -230,3 +361,17 @@ NodeTypeString(NodeType t) } } +char* +ParagraphTypeString(ParagraphType t) +{ + switch (t) + { + case PT_Standard: + return "PT_Standard"; + case PT_Quote: + return "PT_Quote"; + case PT_Code: + return "PT_Code"; + } + return "UNKNOWN"; +} diff --git a/node.h b/node.h index 282c7ac..ec62cab 100644 --- a/node.h +++ b/node.h @@ -55,14 +55,21 @@ typedef struct { char* error; } ErrorNode; -/* +typedef enum { + PT_Standard, + PT_Quote, + PT_Code, +} ParagraphType; + typedef struct { NodeType type; struct Node* next; + ParagraphType ptype; + struct Token* content; } ParagraphNode; -*/ Node* ParseNodes(Token* firstToken); char* NodeTypeString(NodeType t); +char* ParagraphTypeString(ParagraphType t); #endif diff --git a/sample.md b/sample.md index a0e8947..032e62b 100644 --- a/sample.md +++ b/sample.md @@ -10,11 +10,21 @@ _underlined text_ Nostra sem bibendum ridiculus aenean condimentum sed eleifend et odio egestas pellentesque. *Sit fusce.* At ligula dolor parturient sodales auctor. Egestas. -Dictum pharetra nulla _aliquet tincidunt_ parturient netus gravida rutrum -rhoncus. Donec dis mollis ornare `bibendum sollicitudin` velit lectus inceptos. +this has some `inline + code` in it. +> Block Quote thing. +> Dictum pharetra nulla _aliquet tincidunt_ parturient netus gravida rutrum +> +> rhoncus. Donec dis mollis ornare `bibendum sollicitudin` velit lectus inceptos. + + tabbed + +``` other code +``` ``` Laoreet arcu eget cubilia auctor vitae cursus lacus volutpat dui. +one two ``` ### Header 3 @@ -42,5 +52,5 @@ Laoreet arcu eget cubilia auctor vitae cursus lacus volutpat dui. 1. Ordered second level two 1. Ordered toplevel two 1. Ordered second level one - 1. Ordered third level + 1. Ordered third level 1. Ordered second level two diff --git a/token.c b/token.c index 4d89542..014c3bf 100644 --- a/token.c +++ b/token.c @@ -24,6 +24,14 @@ TokenString(Token* t) return stringBuff; } +void +FreeToken(Token* t) +{ + if (t->type != TT_TRIPLEBACKTICK) + free(t->literal); + free(t); +} + char* TokenTypeString(TokenType tt) { @@ -54,6 +62,8 @@ TokenTypeString(TokenType tt) return "TT_WORD"; case TT_NUMBER: return "TT_NUMBER"; + case TT_GT: + return "TT_GT"; } return "\0"; diff --git a/token.h b/token.h index bb937ea..cee0e44 100644 --- a/token.h +++ b/token.h @@ -16,6 +16,7 @@ typedef enum { TT_NEWLINE, TT_WORD, TT_NUMBER, + TT_GT, // greater than; used for block quotes } TokenType; typedef struct Token { @@ -29,5 +30,6 @@ typedef struct Token { char* TokenString(Token* t); char* TokenTypeString(TokenType tt); +void FreeToken(Token* t); #endif