readme-thing/node.c

378 lines
8.8 KiB
C
Raw Normal View History

2021-07-14 08:15:46 -07:00
#include <stdio.h>
#include <string.h>
#include "node.h"
#define STRING_BUFF_SIZE 1024
static char stringBuff[STRING_BUFF_SIZE];
Node* parseHeader(Token** firstToken);
Node* parseCodeBlock(Token** firstToken);
Node* parseParagraph(Token** startToken);
2021-07-14 08:15:46 -07:00
Node*
ParseNodes(Token* firstToken)
2021-07-14 08:15:46 -07:00
{
Node* firstNode = NULL;
2021-07-14 08:15:46 -07:00
Token* currentToken = firstToken;
Node* prevNode = NULL;
2021-07-14 08:15:46 -07:00
while (1) {
Node* currentNode = NULL;
switch (currentToken->type) {
2021-07-14 08:15:46 -07:00
case TT_NEWLINE:
case TT_WHITESPACE:
2021-07-14 08:15:46 -07:00
break;
2021-07-14 08:15:46 -07:00
case TT_HASH:
// start of header
currentNode = parseHeader(&currentToken);
break;
case TT_TRIPLEBACKTICK:
currentNode = parseCodeBlock(&currentToken);
2021-07-14 08:15:46 -07:00
break;
case TT_EOF:
return firstNode;
default: // paragraph start?
currentNode = parseParagraph(&currentToken);
2021-07-14 08:15:46 -07:00
break;
}
if (currentToken->next == NULL) {
2021-07-14 08:15:46 -07:00
break;
}
currentToken = currentToken->next;
if (currentNode == NULL)
continue;
if (prevNode != NULL) {
prevNode->next = currentNode;
}
if (firstNode == NULL) {
firstNode = currentNode;
}
prevNode = currentNode;
2021-07-14 08:15:46 -07:00
}
return firstNode;
2021-07-14 08:15:46 -07:00
}
Node*
parseHeader(Token** startToken)
2021-07-14 08:15:46 -07:00
{
Token* t = *startToken;
2021-07-14 08:15:46 -07:00
// Count the number of TT_HASH tokens
int count = 1;
while (t->next != NULL && t->next->type == TT_HASH)
2021-07-14 08:15:46 -07:00
{
count++;
t = t->next;
2021-07-14 08:15:46 -07:00
}
if (t->next == NULL)
2021-07-14 08:15:46 -07:00
{
printf("Header missing text");
return NULL;
}
t = t->next;
2021-07-14 08:15:46 -07:00
// Trim leading whitespace
while (t->next != NULL && t->type == TT_WHITESPACE)
2021-07-14 08:15:46 -07:00
{
t = t->next;
2021-07-14 08:15:46 -07:00
}
if (t->next == NULL)
2021-07-14 08:15:46 -07:00
{
printf("Header missing text");
return NULL;
}
Token* end = t;
int len = 0;
// find header text size
while (end->type != TT_NEWLINE && end->type != TT_EOF) {
len += end->length;
end = end->next;
}
2021-07-14 08:15:46 -07:00
char* strbuff = malloc(len+1);
strbuff[0] = '\0';
2021-07-14 08:15:46 -07:00
while(t != end) {
strncat(strbuff, t->literal, t->length);
t = t->next;
2021-07-14 08:15:46 -07:00
}
*startToken = t;
HeaderNode* retval = malloc(sizeof(HeaderNode));
switch(count) {
case 1:
retval->type = NT_Header1;
break;
case 2:
retval->type = NT_Header2;
break;
case 3:
retval->type = NT_Header3;
break;
default:
retval->type = NT_Header4;
break;
}
retval->next = NULL;
retval->rawText = strbuff;
return (Node*)retval;
}
Node*
parseCodeBlock(Token** startToken)
{
// find closing ticks
int tlen = 0; // number of tokens
int clen = 0; // number of characters
Token* t = *startToken;
t = t->next; // skip past the opening triple backtick
// skip the first newline
if (t->type == TT_NEWLINE) {
t = t->next;
}
while (t->next != NULL && t->type != TT_TRIPLEBACKTICK) {
if (t->next->type == TT_EOF) {
printf("premature EOF");
ErrorNode* err = malloc(sizeof(ErrorNode));
err->type = NT_Error;
err->next = NULL;
err->error = "premature EOF searching for closing triple backtick";
return (Node*)err;
}
tlen++;
clen += t->length;
t = t->next;
}
t = *startToken;
char* strbuff = malloc(sizeof(char)*clen+1);
strbuff[0] = '\0';
int i;
t = t->next; // skip past the opening triple backtick
// skip the first newline
if (t->type == TT_NEWLINE) {
t = t->next;
}
for(i = 0; i < tlen; i++) {
strncat(strbuff, t->literal, t->length);
t = t->next;
}
// skip past closing triple backtick
*startToken = t->next;
CodeBlockNode* ret = malloc(sizeof(CodeBlockNode));
ret->type = NT_BlockCode;
ret->rawText = strbuff;
ret->next = NULL;
return (Node*)ret;
}
Node*
parseParagraph(Token** startToken)
{
ParagraphNode* pnode = malloc(sizeof(ParagraphNode));
pnode->next = NULL;
pnode->type = NT_Paragraph;
Token* t = *startToken;
pnode->ptype = PT_Standard;
if (t->type == TT_GT) {
pnode->ptype = PT_Quote;
// consume TT_GT
Token* consumed = t;
t = t->next;
FreeToken(consumed);
}
pnode->content = t;
Token* prevToken = NULL;
Token* consumed = NULL;
while(t != NULL)
{
// Look for the end of the paragraph.
if (t->type == TT_NEWLINE && t->next != NULL)
{
if (t->next->type == TT_WHITESPACE)
{
// Consume the newline if the next one is a space.
consumed = t;
t = t->next;
prevToken->next = t;
FreeToken(consumed);
}
else
{
// Convert this token into a whitespace character
t->literal[0] = ' ';
t->type = TT_WHITESPACE;
if (prevToken != NULL)
prevToken->next = t;
prevToken = t;
t = t->next;
}
if (pnode->ptype == PT_Quote) {
if (t->type == TT_GT) {
// removes TT_GT
consumed = t;
t = t->next;
prevToken->next = t;
FreeToken(consumed);
if (t->next != NULL && t->next->type == TT_WHITESPACE)
{
// removes TT_WHITESPACE
consumed = t;
t = t->next;
prevToken->next = t;
FreeToken(consumed);
}
continue;
}
goto paragraphEnd;
}
switch (t->type)
{
case TT_NEWLINE:
case TT_EOF:
case TT_TRIPLEBACKTICK:
case TT_GT:
goto paragraphEnd;
break;
default:
break;
}
} // TT_NEWLINE check
//printf("t->literal: %s\n", t->literal);
if (prevToken != NULL)
prevToken->next = t;
prevToken = t;
t = t->next;
}
paragraphEnd:
*startToken = t; // on double newlines, this is the second newline.
prevToken->next = NULL; // terminate the pnode->content list
// remove trailing whitespace
prevToken = NULL;
t = pnode->content;
while(t != NULL)
{
if (t->type == TT_WHITESPACE)
{
if(t->next == NULL)
{
prevToken->next = NULL;
break;
}
else if (t->next->type == TT_WHITESPACE)
{
// concatinate the two.
int len = t->length + t->next->length;
char* newws = malloc(sizeof(char)*len+1);
newws[0] = '\0';
strncat(newws, t->literal, t->length);
strncat(newws, t->next->literal, t->next->length);
consumed = t;
t = t->next;
prevToken->next = t;
FreeToken(consumed);
t->length = len;
free(t->literal);
t->literal = newws;
}
}
prevToken = t;
t = t->next;
}
return (Node*)pnode;
}
char*
NodeTypeString(NodeType t)
{
switch(t) {
case NT_Header1:
return "NT_Header1";
case NT_Header2:
return "NT_Header2";
case NT_Header3:
return "NT_Header3";
case NT_Header4:
return "NT_Header4";
case NT_Paragraph:
return "NT_Paragraph";
case NT_UnorderedList:
return "NT_UnorderedList";
case NT_OrderedList:
return "NT_OrderedList";
case NT_InlineCode:
return "NT_InlineCode";
case NT_BlockCode:
return "NT_BlockCode";
case NT_BlockQuote:
return "NT_BlockQuote";
case NT_Bold:
return "NT_Bold";
case NT_Underline:
return "NT_Underline";
case NT_Error:
return "NT_Error";
default:
snprintf(stringBuff, 1000, "unknown NodeType: %d", t);
return stringBuff;
}
2021-07-14 08:15:46 -07:00
}
char*
ParagraphTypeString(ParagraphType t)
{
switch (t)
{
case PT_Standard:
return "PT_Standard";
case PT_Quote:
return "PT_Quote";
case PT_Code:
return "PT_Code";
}
return "UNKNOWN";
}