409 lines
9.5 KiB
C
409 lines
9.5 KiB
C
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <assert.h>
|
|
|
|
#include "node.h"
|
|
|
|
#define STRING_BUFF_SIZE 1024
|
|
|
|
static char stringBuff[STRING_BUFF_SIZE];
|
|
|
|
Node* parseHeader(Token** firstToken);
|
|
Node* parseCodeBlock(Token** firstToken);
|
|
Node* parseParagraph(Token** startToken);
|
|
|
|
Node*
|
|
ParseNodes(Token* firstToken)
|
|
{
|
|
Node* firstNode = NULL;
|
|
|
|
Token* currentToken = firstToken;
|
|
Node* prevNode = NULL;
|
|
|
|
while (currentToken != NULL)
|
|
{
|
|
Node* currentNode = NULL;
|
|
|
|
switch (currentToken->type)
|
|
{
|
|
case TT_NEWLINE:
|
|
case TT_WHITESPACE:
|
|
break;
|
|
|
|
case TT_HASH:
|
|
// start of header
|
|
currentNode = parseHeader(¤tToken);
|
|
break;
|
|
|
|
case TT_TRIPLEBACKTICK:
|
|
currentNode = parseCodeBlock(¤tToken);
|
|
break;
|
|
|
|
case TT_EOF:
|
|
return firstNode;
|
|
|
|
default: // paragraph start?
|
|
currentNode = parseParagraph(¤tToken);
|
|
break;
|
|
}
|
|
|
|
if (currentToken->type == TT_NEWLINE || currentToken->type == TT_WHITESPACE)
|
|
currentToken = FreeToken(currentToken);
|
|
|
|
if (currentNode == NULL)
|
|
continue;
|
|
|
|
if (prevNode != NULL)
|
|
prevNode->next = currentNode;
|
|
|
|
if (firstNode == NULL)
|
|
firstNode = currentNode;
|
|
|
|
prevNode = currentNode;
|
|
}
|
|
|
|
return firstNode;
|
|
}
|
|
|
|
Node*
|
|
parseHeader(Token** startToken)
|
|
{
|
|
Token* t = *startToken;
|
|
|
|
// Count the number of TT_HASH tokens
|
|
int count = 1;
|
|
while (t->next != NULL && t->next->type == TT_HASH)
|
|
{
|
|
count++;
|
|
t = FreeToken(t);
|
|
}
|
|
|
|
if (t->next == NULL)
|
|
{
|
|
printf("Header missing text");
|
|
return NULL;
|
|
}
|
|
|
|
// Trim leading whitespace
|
|
while (t->next != NULL && t->type == TT_WHITESPACE)
|
|
{
|
|
t = FreeToken(t);
|
|
}
|
|
|
|
if (t->next == NULL)
|
|
{
|
|
printf("Header missing text");
|
|
return NULL;
|
|
}
|
|
|
|
Token* end = t;
|
|
int len = 0;
|
|
|
|
// find header text size
|
|
while (end->type != TT_NEWLINE && end->type != TT_EOF) {
|
|
len += end->length;
|
|
end = end->next;
|
|
}
|
|
|
|
char* strbuff = malloc(len+1);
|
|
strbuff[0] = '\0';
|
|
|
|
while(t != end) {
|
|
strncat(strbuff, t->literal, t->length);
|
|
t = FreeToken(t);
|
|
}
|
|
|
|
*startToken = t;
|
|
|
|
HeaderNode* retval = malloc(sizeof(HeaderNode));
|
|
switch(count) {
|
|
case 1:
|
|
retval->type = NT_Header1;
|
|
break;
|
|
case 2:
|
|
retval->type = NT_Header2;
|
|
break;
|
|
case 3:
|
|
retval->type = NT_Header3;
|
|
break;
|
|
default:
|
|
retval->type = NT_Header4;
|
|
break;
|
|
}
|
|
|
|
retval->next = NULL;
|
|
retval->rawText = strbuff;
|
|
|
|
return (Node*)retval;
|
|
}
|
|
|
|
Node*
|
|
parseCodeBlock(Token** startToken)
|
|
{
|
|
// find closing ticks
|
|
int tlen = 0; // number of tokens
|
|
int clen = 0; // number of characters
|
|
|
|
// skip past the opening triple backtick
|
|
*startToken = FreeToken(*startToken);
|
|
|
|
// skip the first newline
|
|
while ((*startToken)->type == TT_NEWLINE) {
|
|
*startToken = FreeToken(*startToken);
|
|
}
|
|
|
|
// assign this after we skip tokens so we don't have to
|
|
// re-skip them later.
|
|
Token* t = *startToken;
|
|
|
|
while (t->next != NULL && t->type != TT_TRIPLEBACKTICK) {
|
|
if (t->next->type == TT_EOF) {
|
|
printf("premature EOF");
|
|
|
|
ErrorNode* err = malloc(sizeof(ErrorNode));
|
|
err->type = NT_Error;
|
|
err->next = NULL;
|
|
err->error = "premature EOF searching for closing triple backtick";
|
|
|
|
return (Node*)err;
|
|
}
|
|
|
|
tlen++;
|
|
clen += t->length;
|
|
t = t->next;
|
|
}
|
|
|
|
t = *startToken;
|
|
|
|
char* strbuff = malloc(sizeof(char)*clen+1);
|
|
strbuff[0] = '\0';
|
|
int i;
|
|
|
|
for(i = 0; i < tlen; i++) {
|
|
strncat(strbuff, t->literal, t->length);
|
|
t = FreeToken(t);
|
|
}
|
|
|
|
/*
|
|
* Skip past closing triple backtick
|
|
* This is modifying the *parameter* that was passed in,
|
|
* so we can return the node and advance the token tree.
|
|
*/
|
|
*startToken = FreeToken(t);
|
|
|
|
while ((*startToken)->type == TT_NEWLINE || (*startToken)->type == TT_WHITESPACE) {
|
|
*startToken = FreeToken(*startToken);
|
|
}
|
|
|
|
CodeBlockNode* ret = malloc(sizeof(CodeBlockNode));
|
|
ret->type = NT_BlockCode;
|
|
ret->rawText = strbuff;
|
|
ret->next = NULL;
|
|
return (Node*)ret;
|
|
}
|
|
|
|
Node*
|
|
parseParagraph(Token** startToken)
|
|
{
|
|
ParagraphNode* pnode = malloc(sizeof(ParagraphNode));
|
|
pnode->next = NULL;
|
|
pnode->type = NT_Paragraph;
|
|
Token* t = *startToken;
|
|
pnode->ptype = PT_Standard;
|
|
|
|
if (t->type == TT_GT) {
|
|
pnode->ptype = PT_Quote;
|
|
// consume TT_GT
|
|
t = FreeToken(t);
|
|
}
|
|
|
|
pnode->content = t;
|
|
Token* prevToken = NULL;
|
|
|
|
while(t != NULL)
|
|
{
|
|
|
|
// Look for the end of the paragraph.
|
|
if (t->type == TT_NEWLINE && t->next != NULL)
|
|
{
|
|
if (t->next->type == TT_WHITESPACE)
|
|
{
|
|
// Consume the newline if the next one is a space.
|
|
t = FreeToken(t);
|
|
prevToken->next = t;
|
|
}
|
|
else
|
|
{
|
|
// Convert this token into a whitespace character
|
|
t->literal[0] = ' ';
|
|
t->type = TT_WHITESPACE;
|
|
if (prevToken != NULL)
|
|
prevToken->next = t;
|
|
prevToken = t;
|
|
t = t->next;
|
|
}
|
|
|
|
if (pnode->ptype == PT_Quote) {
|
|
if (t->type == TT_GT) {
|
|
// removes TT_GT
|
|
t = FreeToken(t);
|
|
prevToken->next = t;
|
|
|
|
if (t->next != NULL && t->next->type == TT_WHITESPACE)
|
|
{
|
|
// removes TT_WHITESPACE
|
|
t = FreeToken(t);
|
|
prevToken->next = t;
|
|
}
|
|
continue;
|
|
}
|
|
goto paragraphEnd;
|
|
}
|
|
|
|
switch (t->type)
|
|
{
|
|
case TT_NEWLINE:
|
|
case TT_EOF:
|
|
case TT_TRIPLEBACKTICK:
|
|
case TT_GT:
|
|
goto paragraphEnd;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
} // TT_NEWLINE check
|
|
|
|
if (prevToken != NULL)
|
|
prevToken->next = t;
|
|
prevToken = t;
|
|
t = t->next;
|
|
}
|
|
|
|
paragraphEnd:
|
|
*startToken = t; // on double newlines, this is the second newline.
|
|
prevToken->next = NULL; // terminate the pnode->content list
|
|
|
|
// remove trailing whitespace
|
|
prevToken = NULL;
|
|
t = pnode->content;
|
|
while(t != NULL)
|
|
{
|
|
if (t->type == TT_WHITESPACE)
|
|
{
|
|
if(t->next == NULL)
|
|
{
|
|
FreeToken(t);
|
|
prevToken->next = NULL;
|
|
break;
|
|
}
|
|
else if (t->next->type == TT_WHITESPACE)
|
|
{
|
|
/* concatinate the two. */
|
|
int len = t->length + t->next->length;
|
|
char* newws = malloc(sizeof(char)*len+1);
|
|
newws[0] = '\0';
|
|
strncat(newws, t->literal, t->length);
|
|
strncat(newws, t->next->literal, t->next->length);
|
|
|
|
t = FreeToken(t);
|
|
prevToken->next = t;
|
|
|
|
t->length = len;
|
|
free(t->literal);
|
|
t->literal = newws;
|
|
}
|
|
}
|
|
prevToken = t;
|
|
t = t->next;
|
|
}
|
|
|
|
return (Node*)pnode;
|
|
}
|
|
|
|
char*
|
|
NodeTypeString(NodeType t)
|
|
{
|
|
switch(t) {
|
|
case NT_Header1:
|
|
return "NT_Header1";
|
|
case NT_Header2:
|
|
return "NT_Header2";
|
|
case NT_Header3:
|
|
return "NT_Header3";
|
|
case NT_Header4:
|
|
return "NT_Header4";
|
|
case NT_Paragraph:
|
|
return "NT_Paragraph";
|
|
case NT_UnorderedList:
|
|
return "NT_UnorderedList";
|
|
case NT_OrderedList:
|
|
return "NT_OrderedList";
|
|
case NT_InlineCode:
|
|
return "NT_InlineCode";
|
|
case NT_BlockCode:
|
|
return "NT_BlockCode";
|
|
case NT_Bold:
|
|
return "NT_Bold";
|
|
case NT_Underline:
|
|
return "NT_Underline";
|
|
case NT_Error:
|
|
return "NT_Error";
|
|
|
|
default:
|
|
snprintf(stringBuff, 1000, "unknown NodeType: %d", t);
|
|
return stringBuff;
|
|
}
|
|
}
|
|
|
|
char*
|
|
ParagraphTypeString(ParagraphType t)
|
|
{
|
|
switch (t)
|
|
{
|
|
case PT_Standard:
|
|
return "PT_Standard";
|
|
case PT_Quote:
|
|
return "PT_Quote";
|
|
case PT_Code:
|
|
return "PT_Code";
|
|
}
|
|
return "UNKNOWN";
|
|
}
|
|
|
|
Node*
|
|
FreeNode(Node* node)
|
|
{
|
|
Node* next = node->next;
|
|
switch (node->type)
|
|
{
|
|
case NT_Header1:
|
|
case NT_Header2:
|
|
case NT_Header3:
|
|
case NT_Header4:
|
|
free(((HeaderNode*)node)->rawText);
|
|
break;
|
|
case NT_BlockCode:
|
|
free(((CodeBlockNode*)node)->rawText);
|
|
break;
|
|
case NT_Error:
|
|
free(((ErrorNode*)node)->error);
|
|
break;
|
|
case NT_Paragraph:
|
|
{
|
|
ParagraphNode* pnode = (ParagraphNode*)node;
|
|
Token* t = pnode->content;
|
|
while ((t = FreeToken(t)) != NULL);
|
|
}
|
|
break;
|
|
case NT_UnorderedList:
|
|
case NT_OrderedList:
|
|
case NT_InlineCode:
|
|
case NT_Bold:
|
|
case NT_Underline:
|
|
assert(0 && "//TODO");
|
|
break;
|
|
}
|
|
free(node);
|
|
return next;
|
|
}
|