readme-thing/node.c

409 lines
9.5 KiB
C

#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "node.h"
#define STRING_BUFF_SIZE 1024
static char stringBuff[STRING_BUFF_SIZE];
Node* parseHeader(Token** firstToken);
Node* parseCodeBlock(Token** firstToken);
Node* parseParagraph(Token** startToken);
Node*
ParseNodes(Token* firstToken)
{
Node* firstNode = NULL;
Token* currentToken = firstToken;
Node* prevNode = NULL;
while (currentToken != NULL)
{
Node* currentNode = NULL;
switch (currentToken->type)
{
case TT_NEWLINE:
case TT_WHITESPACE:
break;
case TT_HASH:
// start of header
currentNode = parseHeader(&currentToken);
break;
case TT_TRIPLEBACKTICK:
currentNode = parseCodeBlock(&currentToken);
break;
case TT_EOF:
return firstNode;
default: // paragraph start?
currentNode = parseParagraph(&currentToken);
break;
}
if (currentToken->type == TT_NEWLINE || currentToken->type == TT_WHITESPACE)
currentToken = FreeToken(currentToken);
if (currentNode == NULL)
continue;
if (prevNode != NULL)
prevNode->next = currentNode;
if (firstNode == NULL)
firstNode = currentNode;
prevNode = currentNode;
}
return firstNode;
}
Node*
parseHeader(Token** startToken)
{
Token* t = *startToken;
// Count the number of TT_HASH tokens
int count = 1;
while (t->next != NULL && t->next->type == TT_HASH)
{
count++;
t = FreeToken(t);
}
if (t->next == NULL)
{
printf("Header missing text");
return NULL;
}
// Trim leading whitespace
while (t->next != NULL && t->type == TT_WHITESPACE)
{
t = FreeToken(t);
}
if (t->next == NULL)
{
printf("Header missing text");
return NULL;
}
Token* end = t;
int len = 0;
// find header text size
while (end->type != TT_NEWLINE && end->type != TT_EOF) {
len += end->length;
end = end->next;
}
char* strbuff = malloc(len+1);
strbuff[0] = '\0';
while(t != end) {
strncat(strbuff, t->literal, t->length);
t = FreeToken(t);
}
*startToken = t;
HeaderNode* retval = malloc(sizeof(HeaderNode));
switch(count) {
case 1:
retval->type = NT_Header1;
break;
case 2:
retval->type = NT_Header2;
break;
case 3:
retval->type = NT_Header3;
break;
default:
retval->type = NT_Header4;
break;
}
retval->next = NULL;
retval->rawText = strbuff;
return (Node*)retval;
}
Node*
parseCodeBlock(Token** startToken)
{
// find closing ticks
int tlen = 0; // number of tokens
int clen = 0; // number of characters
// skip past the opening triple backtick
*startToken = FreeToken(*startToken);
// skip the first newline
while ((*startToken)->type == TT_NEWLINE) {
*startToken = FreeToken(*startToken);
}
// assign this after we skip tokens so we don't have to
// re-skip them later.
Token* t = *startToken;
while (t->next != NULL && t->type != TT_TRIPLEBACKTICK) {
if (t->next->type == TT_EOF) {
printf("premature EOF");
ErrorNode* err = malloc(sizeof(ErrorNode));
err->type = NT_Error;
err->next = NULL;
err->error = "premature EOF searching for closing triple backtick";
return (Node*)err;
}
tlen++;
clen += t->length;
t = t->next;
}
t = *startToken;
char* strbuff = malloc(sizeof(char)*clen+1);
strbuff[0] = '\0';
int i;
for(i = 0; i < tlen; i++) {
strncat(strbuff, t->literal, t->length);
t = FreeToken(t);
}
/*
* Skip past closing triple backtick
* This is modifying the *parameter* that was passed in,
* so we can return the node and advance the token tree.
*/
*startToken = FreeToken(t);
while ((*startToken)->type == TT_NEWLINE || (*startToken)->type == TT_WHITESPACE) {
*startToken = FreeToken(*startToken);
}
CodeBlockNode* ret = malloc(sizeof(CodeBlockNode));
ret->type = NT_BlockCode;
ret->rawText = strbuff;
ret->next = NULL;
return (Node*)ret;
}
Node*
parseParagraph(Token** startToken)
{
ParagraphNode* pnode = malloc(sizeof(ParagraphNode));
pnode->next = NULL;
pnode->type = NT_Paragraph;
Token* t = *startToken;
pnode->ptype = PT_Standard;
if (t->type == TT_GT) {
pnode->ptype = PT_Quote;
// consume TT_GT
t = FreeToken(t);
}
pnode->content = t;
Token* prevToken = NULL;
while(t != NULL)
{
// Look for the end of the paragraph.
if (t->type == TT_NEWLINE && t->next != NULL)
{
if (t->next->type == TT_WHITESPACE)
{
// Consume the newline if the next one is a space.
t = FreeToken(t);
prevToken->next = t;
}
else
{
// Convert this token into a whitespace character
t->literal[0] = ' ';
t->type = TT_WHITESPACE;
if (prevToken != NULL)
prevToken->next = t;
prevToken = t;
t = t->next;
}
if (pnode->ptype == PT_Quote) {
if (t->type == TT_GT) {
// removes TT_GT
t = FreeToken(t);
prevToken->next = t;
if (t->next != NULL && t->next->type == TT_WHITESPACE)
{
// removes TT_WHITESPACE
t = FreeToken(t);
prevToken->next = t;
}
continue;
}
goto paragraphEnd;
}
switch (t->type)
{
case TT_NEWLINE:
case TT_EOF:
case TT_TRIPLEBACKTICK:
case TT_GT:
goto paragraphEnd;
break;
default:
break;
}
} // TT_NEWLINE check
if (prevToken != NULL)
prevToken->next = t;
prevToken = t;
t = t->next;
}
paragraphEnd:
*startToken = t; // on double newlines, this is the second newline.
prevToken->next = NULL; // terminate the pnode->content list
// remove trailing whitespace
prevToken = NULL;
t = pnode->content;
while(t != NULL)
{
if (t->type == TT_WHITESPACE)
{
if(t->next == NULL)
{
FreeToken(t);
prevToken->next = NULL;
break;
}
else if (t->next->type == TT_WHITESPACE)
{
/* concatinate the two. */
int len = t->length + t->next->length;
char* newws = malloc(sizeof(char)*len+1);
newws[0] = '\0';
strncat(newws, t->literal, t->length);
strncat(newws, t->next->literal, t->next->length);
t = FreeToken(t);
prevToken->next = t;
t->length = len;
free(t->literal);
t->literal = newws;
}
}
prevToken = t;
t = t->next;
}
return (Node*)pnode;
}
char*
NodeTypeString(NodeType t)
{
switch(t) {
case NT_Header1:
return "NT_Header1";
case NT_Header2:
return "NT_Header2";
case NT_Header3:
return "NT_Header3";
case NT_Header4:
return "NT_Header4";
case NT_Paragraph:
return "NT_Paragraph";
case NT_UnorderedList:
return "NT_UnorderedList";
case NT_OrderedList:
return "NT_OrderedList";
case NT_InlineCode:
return "NT_InlineCode";
case NT_BlockCode:
return "NT_BlockCode";
case NT_Bold:
return "NT_Bold";
case NT_Underline:
return "NT_Underline";
case NT_Error:
return "NT_Error";
default:
snprintf(stringBuff, 1000, "unknown NodeType: %d", t);
return stringBuff;
}
}
char*
ParagraphTypeString(ParagraphType t)
{
switch (t)
{
case PT_Standard:
return "PT_Standard";
case PT_Quote:
return "PT_Quote";
case PT_Code:
return "PT_Code";
}
return "UNKNOWN";
}
Node*
FreeNode(Node* node)
{
Node* next = node->next;
switch (node->type)
{
case NT_Header1:
case NT_Header2:
case NT_Header3:
case NT_Header4:
free(((HeaderNode*)node)->rawText);
break;
case NT_BlockCode:
free(((CodeBlockNode*)node)->rawText);
break;
case NT_Error:
free(((ErrorNode*)node)->error);
break;
case NT_Paragraph:
{
ParagraphNode* pnode = (ParagraphNode*)node;
Token* t = pnode->content;
while ((t = FreeToken(t)) != NULL);
}
break;
case NT_UnorderedList:
case NT_OrderedList:
case NT_InlineCode:
case NT_Bold:
case NT_Underline:
assert(0 && "//TODO");
break;
}
free(node);
return next;
}