2021-07-14 08:15:46 -07:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
|
|
|
#include "node.h"
|
|
|
|
|
|
|
|
#define STRING_BUFF_SIZE 1024
|
|
|
|
|
|
|
|
static char stringBuff[STRING_BUFF_SIZE];
|
|
|
|
|
2023-10-22 13:50:18 -07:00
|
|
|
Node* parseHeader(Token** firstToken);
|
|
|
|
Node* parseCodeBlock(Token** firstToken);
|
2021-07-14 08:15:46 -07:00
|
|
|
|
2023-10-20 17:19:34 -07:00
|
|
|
Node*
|
2023-10-22 13:50:18 -07:00
|
|
|
ParseNodes(Token* firstToken)
|
2021-07-14 08:15:46 -07:00
|
|
|
{
|
2023-10-22 13:50:18 -07:00
|
|
|
Node* firstNode = NULL;
|
2021-07-14 08:15:46 -07:00
|
|
|
|
2023-10-22 13:50:18 -07:00
|
|
|
Token* currentToken = firstToken;
|
2023-10-15 15:30:59 -07:00
|
|
|
Node* prevNode = NULL;
|
|
|
|
|
2021-07-14 08:15:46 -07:00
|
|
|
while (1) {
|
2023-10-15 15:30:59 -07:00
|
|
|
Node* currentNode = NULL;
|
|
|
|
|
2023-10-22 13:50:18 -07:00
|
|
|
switch (currentToken->type) {
|
2021-07-14 08:15:46 -07:00
|
|
|
case TT_NEWLINE:
|
|
|
|
break;
|
|
|
|
case TT_HASH:
|
|
|
|
// start of header
|
2023-10-15 17:55:12 -07:00
|
|
|
currentNode = parseHeader(¤tToken);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case TT_TRIPLEBACKTICK:
|
|
|
|
currentNode = parseCodeBlock(¤tToken);
|
2021-07-14 08:15:46 -07:00
|
|
|
break;
|
2023-10-15 15:30:59 -07:00
|
|
|
|
|
|
|
case TT_EOF:
|
2023-10-22 13:50:18 -07:00
|
|
|
return firstNode;
|
2023-10-15 17:55:12 -07:00
|
|
|
|
|
|
|
default: // paragraph start?
|
2021-07-14 08:15:46 -07:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2023-10-15 15:30:59 -07:00
|
|
|
if (currentToken->next == NULL) {
|
|
|
|
printf("currentToken->next == NULL\n");
|
2021-07-14 08:15:46 -07:00
|
|
|
break;
|
|
|
|
}
|
2023-10-15 15:30:59 -07:00
|
|
|
|
|
|
|
currentToken = currentToken->next;
|
|
|
|
if (currentNode == NULL)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
if (prevNode != NULL) {
|
|
|
|
prevNode->next = currentNode;
|
|
|
|
}
|
|
|
|
|
2023-10-22 13:50:18 -07:00
|
|
|
if (firstNode == NULL) {
|
|
|
|
firstNode = currentNode;
|
2023-10-15 15:30:59 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
prevNode = currentNode;
|
2021-07-14 08:15:46 -07:00
|
|
|
}
|
|
|
|
|
2023-10-22 13:50:18 -07:00
|
|
|
return firstNode;
|
2021-07-14 08:15:46 -07:00
|
|
|
}
|
|
|
|
|
2023-10-15 17:55:12 -07:00
|
|
|
Node*
|
2023-10-22 13:50:18 -07:00
|
|
|
parseHeader(Token** startToken)
|
2021-07-14 08:15:46 -07:00
|
|
|
{
|
2023-10-22 13:50:18 -07:00
|
|
|
Token* t = *startToken;
|
2021-07-14 08:15:46 -07:00
|
|
|
// Count the number of TT_HASH tokens
|
|
|
|
int count = 1;
|
2023-10-22 13:50:18 -07:00
|
|
|
while (t->next != NULL && t->next->type == TT_HASH)
|
2021-07-14 08:15:46 -07:00
|
|
|
{
|
|
|
|
count++;
|
2023-10-22 13:50:18 -07:00
|
|
|
t = t->next;
|
2021-07-14 08:15:46 -07:00
|
|
|
}
|
|
|
|
|
2023-10-22 13:50:18 -07:00
|
|
|
if (t->next == NULL)
|
2021-07-14 08:15:46 -07:00
|
|
|
{
|
|
|
|
printf("Header missing text");
|
|
|
|
return NULL;
|
|
|
|
}
|
2023-10-22 13:50:18 -07:00
|
|
|
t = t->next;
|
2021-07-14 08:15:46 -07:00
|
|
|
|
|
|
|
// Trim leading whitespace
|
2023-10-22 13:50:18 -07:00
|
|
|
while (t->next != NULL && t->type == TT_WHITESPACE)
|
2021-07-14 08:15:46 -07:00
|
|
|
{
|
2023-10-22 13:50:18 -07:00
|
|
|
t = t->next;
|
2021-07-14 08:15:46 -07:00
|
|
|
}
|
|
|
|
|
2023-10-22 13:50:18 -07:00
|
|
|
if (t->next == NULL)
|
2021-07-14 08:15:46 -07:00
|
|
|
{
|
|
|
|
printf("Header missing text");
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2023-10-22 13:50:18 -07:00
|
|
|
Token* end = t;
|
2023-10-15 17:55:12 -07:00
|
|
|
int len = 0;
|
|
|
|
// find header text size
|
2023-10-22 13:50:18 -07:00
|
|
|
while (end->type != TT_NEWLINE && end->type != TT_EOF) {
|
|
|
|
len += end->length;
|
2023-10-15 17:55:12 -07:00
|
|
|
end = end->next;
|
|
|
|
}
|
2021-07-14 08:15:46 -07:00
|
|
|
|
2023-10-15 17:55:12 -07:00
|
|
|
char* strbuff = malloc(len+1);
|
|
|
|
strbuff[0] = '\0';
|
2021-07-14 08:15:46 -07:00
|
|
|
|
2023-10-22 13:50:18 -07:00
|
|
|
while(t != end) {
|
|
|
|
strncat(strbuff, t->literal, t->length);
|
|
|
|
t = t->next;
|
2021-07-14 08:15:46 -07:00
|
|
|
}
|
|
|
|
|
2023-10-22 13:50:18 -07:00
|
|
|
|
|
|
|
*startToken = t;
|
2023-10-15 15:30:59 -07:00
|
|
|
|
|
|
|
HeaderNode* retval = malloc(sizeof(HeaderNode));
|
|
|
|
switch(count) {
|
|
|
|
case 1:
|
|
|
|
retval->type = NT_Header1;
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
retval->type = NT_Header2;
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
retval->type = NT_Header3;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
retval->type = NT_Header4;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
retval->next = NULL;
|
2023-10-15 17:55:12 -07:00
|
|
|
retval->rawText = strbuff;
|
|
|
|
|
|
|
|
return (Node*)retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
Node*
|
2023-10-22 13:50:18 -07:00
|
|
|
parseCodeBlock(Token** startToken)
|
2023-10-15 17:55:12 -07:00
|
|
|
{
|
|
|
|
// find closing ticks
|
|
|
|
int tlen = 0; // number of tokens
|
|
|
|
int clen = 0; // number of characters
|
2023-10-22 13:50:18 -07:00
|
|
|
Token* t = *startToken;
|
|
|
|
t = t->next; // skip past the opening triple backtick
|
2023-10-15 17:55:12 -07:00
|
|
|
|
|
|
|
// skip the first newline
|
2023-10-22 13:50:18 -07:00
|
|
|
if (t->type == TT_NEWLINE) {
|
|
|
|
t = t->next;
|
2023-10-15 17:55:12 -07:00
|
|
|
}
|
|
|
|
|
2023-10-22 13:50:18 -07:00
|
|
|
while (t->next != NULL && t->type != TT_TRIPLEBACKTICK) {
|
|
|
|
if (t->next->type == TT_EOF) {
|
2023-10-15 17:55:12 -07:00
|
|
|
printf("premature EOF");
|
|
|
|
|
|
|
|
ErrorNode* err = malloc(sizeof(ErrorNode));
|
|
|
|
err->type = NT_Error;
|
|
|
|
err->next = NULL;
|
|
|
|
err->error = "premature EOF searching for closing triple backtick";
|
|
|
|
|
|
|
|
return (Node*)err;
|
|
|
|
}
|
|
|
|
|
|
|
|
tlen++;
|
2023-10-22 13:50:18 -07:00
|
|
|
clen += t->length;
|
|
|
|
t = t->next;
|
2023-10-15 17:55:12 -07:00
|
|
|
}
|
|
|
|
|
2023-10-22 13:50:18 -07:00
|
|
|
t = *startToken;
|
2023-10-15 17:55:12 -07:00
|
|
|
|
|
|
|
char* strbuff = malloc(sizeof(char)*clen+1);
|
|
|
|
strbuff[0] = '\0';
|
|
|
|
int i;
|
2023-10-22 13:50:18 -07:00
|
|
|
t = t->next; // skip past the opening triple backtick
|
2023-10-15 17:55:12 -07:00
|
|
|
|
|
|
|
// skip the first newline
|
2023-10-22 13:50:18 -07:00
|
|
|
if (t->type == TT_NEWLINE) {
|
|
|
|
t = t->next;
|
2023-10-15 17:55:12 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
for(i = 0; i < tlen; i++) {
|
2023-10-22 13:50:18 -07:00
|
|
|
strncat(strbuff, t->literal, t->length);
|
|
|
|
t = t->next;
|
2023-10-15 17:55:12 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
// skip past closing triple backtick
|
2023-10-22 13:50:18 -07:00
|
|
|
*startToken = t->next;
|
2023-10-15 15:30:59 -07:00
|
|
|
|
2023-10-15 17:55:12 -07:00
|
|
|
CodeBlockNode* ret = malloc(sizeof(CodeBlockNode));
|
|
|
|
ret->type = NT_BlockCode;
|
|
|
|
ret->rawText = strbuff;
|
|
|
|
ret->next = NULL;
|
|
|
|
return (Node*)ret;
|
2023-10-15 15:30:59 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
char*
|
|
|
|
NodeTypeString(NodeType t)
|
|
|
|
{
|
|
|
|
switch(t) {
|
|
|
|
case NT_Header1:
|
|
|
|
return "NT_Header1";
|
|
|
|
case NT_Header2:
|
|
|
|
return "NT_Header2";
|
|
|
|
case NT_Header3:
|
|
|
|
return "NT_Header3";
|
|
|
|
case NT_Header4:
|
|
|
|
return "NT_Header4";
|
|
|
|
case NT_Paragraph:
|
|
|
|
return "NT_Paragraph";
|
|
|
|
case NT_UnorderedList:
|
|
|
|
return "NT_UnorderedList";
|
|
|
|
case NT_OrderedList:
|
|
|
|
return "NT_OrderedList";
|
|
|
|
case NT_InlineCode:
|
|
|
|
return "NT_InlineCode";
|
|
|
|
case NT_BlockCode:
|
|
|
|
return "NT_BlockCode";
|
|
|
|
case NT_BlockQuote:
|
|
|
|
return "NT_BlockQuote";
|
|
|
|
case NT_Bold:
|
|
|
|
return "NT_Bold";
|
|
|
|
case NT_Underline:
|
|
|
|
return "NT_Underline";
|
2023-10-15 17:55:12 -07:00
|
|
|
case NT_Error:
|
|
|
|
return "NT_Error";
|
2023-10-15 15:30:59 -07:00
|
|
|
|
|
|
|
default:
|
|
|
|
snprintf(stringBuff, 1000, "unknown NodeType: %d", t);
|
|
|
|
return stringBuff;
|
|
|
|
}
|
2021-07-14 08:15:46 -07:00
|
|
|
}
|
|
|
|
|