Implement the paragraph node

This node handles both plain paragraphs as well as block quotes. Single
newlines are turned into spaces, double newlines terminate the block.
Care is taken to not have repeated whitespace tokens in the content,
while keeping the number of spaces.
This commit is contained in:
Zorchenhimer 2023-10-22 22:29:38 -04:00
parent 3d0bba21e1
commit c17eaf1ec4
7 changed files with 203 additions and 14 deletions

View File

@ -240,13 +240,6 @@ isDigit(char ch)
return ('0' <= ch && ch <= '9');
}
void
FreeToken(Token* t)
{
free(t->literal);
free(t);
}
static
Token*
newTickToken(Lexer* l)

24
main.c
View File

@ -85,11 +85,33 @@ main(int argc, const char** argv)
}
break;
case NT_Paragraph:
{
ParagraphNode* pnode = (ParagraphNode*)node;
printf("{ParagraphNode ptype:%s}\n", ParagraphTypeString(pnode->ptype));
Token* content = pnode->content;
while(content != NULL)
{
if (content->type == TT_WHITESPACE)
{
printf(" ");
}
else
{
printf("%s", content->literal);
}
content = content->next;
}
printf("\n");
}
break;
default:
printf("%s\n", NodeTypeString(node->type));
}
node = node->next;
}

147
node.c
View File

@ -9,6 +9,7 @@ static char stringBuff[STRING_BUFF_SIZE];
Node* parseHeader(Token** firstToken);
Node* parseCodeBlock(Token** firstToken);
Node* parseParagraph(Token** startToken);
Node*
ParseNodes(Token* firstToken)
@ -23,7 +24,9 @@ ParseNodes(Token* firstToken)
switch (currentToken->type) {
case TT_NEWLINE:
case TT_WHITESPACE:
break;
case TT_HASH:
// start of header
currentNode = parseHeader(&currentToken);
@ -37,11 +40,11 @@ ParseNodes(Token* firstToken)
return firstNode;
default: // paragraph start?
currentNode = parseParagraph(&currentToken);
break;
}
if (currentToken->next == NULL) {
printf("currentToken->next == NULL\n");
break;
}
@ -193,6 +196,134 @@ parseCodeBlock(Token** startToken)
return (Node*)ret;
}
Node*
parseParagraph(Token** startToken)
{
ParagraphNode* pnode = malloc(sizeof(ParagraphNode));
pnode->next = NULL;
pnode->type = NT_Paragraph;
Token* t = *startToken;
pnode->ptype = PT_Standard;
if (t->type == TT_GT) {
pnode->ptype = PT_Quote;
// consume TT_GT
Token* consumed = t;
t = t->next;
FreeToken(consumed);
}
pnode->content = t;
Token* prevToken = NULL;
Token* consumed = NULL;
while(t != NULL)
{
// Look for the end of the paragraph.
if (t->type == TT_NEWLINE && t->next != NULL)
{
if (t->next->type == TT_WHITESPACE)
{
// Consume the newline if the next one is a space.
consumed = t;
t = t->next;
prevToken->next = t;
FreeToken(consumed);
}
else
{
// Convert this token into a whitespace character
t->literal[0] = ' ';
t->type = TT_WHITESPACE;
if (prevToken != NULL)
prevToken->next = t;
prevToken = t;
t = t->next;
}
if (pnode->ptype == PT_Quote) {
if (t->type == TT_GT) {
// removes TT_GT
consumed = t;
t = t->next;
prevToken->next = t;
FreeToken(consumed);
if (t->next != NULL && t->next->type == TT_WHITESPACE)
{
// removes TT_WHITESPACE
consumed = t;
t = t->next;
prevToken->next = t;
FreeToken(consumed);
}
continue;
}
goto paragraphEnd;
}
switch (t->type)
{
case TT_NEWLINE:
case TT_EOF:
case TT_TRIPLEBACKTICK:
case TT_GT:
goto paragraphEnd;
break;
default:
break;
}
} // TT_NEWLINE check
//printf("t->literal: %s\n", t->literal);
if (prevToken != NULL)
prevToken->next = t;
prevToken = t;
t = t->next;
}
paragraphEnd:
*startToken = t; // on double newlines, this is the second newline.
prevToken->next = NULL; // terminate the pnode->content list
// remove trailing whitespace
prevToken = NULL;
t = pnode->content;
while(t != NULL)
{
if (t->type == TT_WHITESPACE)
{
if(t->next == NULL)
{
prevToken->next = NULL;
break;
}
else if (t->next->type == TT_WHITESPACE)
{
// concatinate the two.
int len = t->length + t->next->length;
char* newws = malloc(sizeof(char)*len+1);
newws[0] = '\0';
strncat(newws, t->literal, t->length);
strncat(newws, t->next->literal, t->next->length);
consumed = t;
t = t->next;
prevToken->next = t;
FreeToken(consumed);
t->length = len;
free(t->literal);
t->literal = newws;
}
}
prevToken = t;
t = t->next;
}
return (Node*)pnode;
}
char*
NodeTypeString(NodeType t)
{
@ -230,3 +361,17 @@ NodeTypeString(NodeType t)
}
}
char*
ParagraphTypeString(ParagraphType t)
{
switch (t)
{
case PT_Standard:
return "PT_Standard";
case PT_Quote:
return "PT_Quote";
case PT_Code:
return "PT_Code";
}
return "UNKNOWN";
}

11
node.h
View File

@ -55,14 +55,21 @@ typedef struct {
char* error;
} ErrorNode;
/*
typedef enum {
PT_Standard,
PT_Quote,
PT_Code,
} ParagraphType;
typedef struct {
NodeType type;
struct Node* next;
ParagraphType ptype;
struct Token* content;
} ParagraphNode;
*/
Node* ParseNodes(Token* firstToken);
char* NodeTypeString(NodeType t);
char* ParagraphTypeString(ParagraphType t);
#endif

View File

@ -10,11 +10,21 @@ _underlined text_
Nostra sem bibendum ridiculus aenean condimentum sed eleifend et odio egestas
pellentesque. *Sit fusce.* At ligula dolor parturient sodales auctor. Egestas.
Dictum pharetra nulla _aliquet tincidunt_ parturient netus gravida rutrum
rhoncus. Donec dis mollis ornare `bibendum sollicitudin` velit lectus inceptos.
this has some `inline
code` in it.
> Block Quote thing.
> Dictum pharetra nulla _aliquet tincidunt_ parturient netus gravida rutrum
>
> rhoncus. Donec dis mollis ornare `bibendum sollicitudin` velit lectus inceptos.
tabbed
``` other code
```
```
Laoreet arcu eget cubilia auctor vitae cursus lacus volutpat dui.
one two
```
### Header 3

10
token.c
View File

@ -24,6 +24,14 @@ TokenString(Token* t)
return stringBuff;
}
void
FreeToken(Token* t)
{
if (t->type != TT_TRIPLEBACKTICK)
free(t->literal);
free(t);
}
char*
TokenTypeString(TokenType tt)
{
@ -54,6 +62,8 @@ TokenTypeString(TokenType tt)
return "TT_WORD";
case TT_NUMBER:
return "TT_NUMBER";
case TT_GT:
return "TT_GT";
}
return "\0";

View File

@ -16,6 +16,7 @@ typedef enum {
TT_NEWLINE,
TT_WORD,
TT_NUMBER,
TT_GT, // greater than; used for block quotes
} TokenType;
typedef struct Token {
@ -29,5 +30,6 @@ typedef struct Token {
char* TokenString(Token* t);
char* TokenTypeString(TokenType tt);
void FreeToken(Token* t);
#endif