Add codeblocks; Fix some stuff
- Parse code blocks that are wrapped sets of three backticks. - Added a TT_TRIPLEBACKTICK token. - Added a length field to the Token struct. - Added an error node. - Cleaned up some file read code. - Fixed the header raw text (don't reuse the same buffer for each node).
This commit is contained in:
parent
ca6bdca7ab
commit
d855df380b
3
Makefile
3
Makefile
|
@ -1,6 +1,7 @@
|
|||
|
||||
CC=gcc
|
||||
CFLAGS=-Wall -pedantic -Werror -std=c99
|
||||
CFLAGS=-Wall -pedantic -Werror -std=c99 -g -O0
|
||||
#-fsanitize=address
|
||||
|
||||
OBJ=main.o lexer.o token.o node.o
|
||||
|
||||
|
|
51
lexer.c
51
lexer.c
|
@ -10,6 +10,7 @@ static char* readNumber(Lexer* l);
|
|||
static int isLetter(char c);
|
||||
static int isDigit(char c);
|
||||
|
||||
static Token* newTickToken(Lexer* l);
|
||||
static Token* newToken(Lexer* l, TokenType tt);
|
||||
static Token* newIdentToken(Lexer* l, char* literal, TokenType tt);
|
||||
|
||||
|
@ -33,8 +34,11 @@ NewLexer(const char* filename)
|
|||
Lexer* state = malloc(sizeof(Lexer));
|
||||
state->rawFile = malloc((sizeof(char) * fileSize) + 1);
|
||||
state->rawLen = fileSize;
|
||||
state->readPosition = 0;
|
||||
|
||||
size_t read = fread(state->rawFile, sizeof(char), fileSize, fp);
|
||||
fclose(fp);
|
||||
|
||||
if (read != fileSize)
|
||||
{
|
||||
printf("something borked. only read %d bytes of %d\n", (int)read, fileSize);
|
||||
|
@ -44,7 +48,6 @@ NewLexer(const char* filename)
|
|||
|
||||
return NULL;
|
||||
}
|
||||
fclose(fp);
|
||||
|
||||
state->rawFile[fileSize] = '\0';
|
||||
state->line = 1;
|
||||
|
@ -80,8 +83,11 @@ NextToken(Lexer* l)
|
|||
case '.':
|
||||
tok = newToken(l, TT_PERIOD);
|
||||
break;
|
||||
case '>':
|
||||
tok = newToken(l, TT_GT);
|
||||
break;
|
||||
case '`':
|
||||
tok = newToken(l, TT_BACKTICK);
|
||||
tok = newTickToken(l);
|
||||
break;
|
||||
case '\0':
|
||||
tok = newToken(l, TT_EOF);
|
||||
|
@ -161,7 +167,6 @@ readIdentifier(Lexer* l)
|
|||
return out;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
void
|
||||
readChar(Lexer* l)
|
||||
|
@ -180,11 +185,6 @@ readChar(Lexer* l)
|
|||
l->readPosition++;
|
||||
}
|
||||
|
||||
void
|
||||
Parse(Lexer* l)
|
||||
{
|
||||
}
|
||||
|
||||
int
|
||||
isLetter(char ch)
|
||||
{
|
||||
|
@ -204,6 +204,39 @@ FreeToken(Token* t)
|
|||
free(t);
|
||||
}
|
||||
|
||||
static
|
||||
Token*
|
||||
newTickToken(Lexer* l)
|
||||
{
|
||||
|
||||
printf("backticks @ %d:%d\n", l->line, l->column);
|
||||
|
||||
if (l->position+3 > l->rawLen) {
|
||||
printf("premature EOF parsing ticks\n");
|
||||
return newToken(l, TT_BACKTICK);
|
||||
}
|
||||
|
||||
// peek up to two more characters
|
||||
int i;
|
||||
for(i = 0; i < 3; i++) {
|
||||
|
||||
if (l->rawFile[l->position+i] != '`') {
|
||||
printf("next char isn't a backtick @ %d: 0x%02X '%c'\n", l->readPosition+i, l->rawFile[l->position+i], l->rawFile[l->position+i]);
|
||||
return newToken(l, TT_BACKTICK);
|
||||
}
|
||||
}
|
||||
|
||||
Token* tok = malloc(sizeof(Token));
|
||||
tok->line = l->line;
|
||||
tok->column = l->column;
|
||||
tok->literal = "```";
|
||||
tok->type = TT_TRIPLEBACKTICK;
|
||||
tok->length = 3;
|
||||
readChar(l);
|
||||
readChar(l);
|
||||
return tok;
|
||||
}
|
||||
|
||||
static
|
||||
Token*
|
||||
newToken(Lexer* l,
|
||||
|
@ -217,6 +250,7 @@ newToken(Lexer* l,
|
|||
tok->literal = nc;
|
||||
tok->line = l->line;
|
||||
tok->column = l->column;
|
||||
tok->length = 1;
|
||||
return tok;
|
||||
}
|
||||
|
||||
|
@ -231,5 +265,6 @@ newIdentToken(Lexer* l,
|
|||
tok->literal = literal;
|
||||
tok->line = l->line;
|
||||
tok->column = l->column;
|
||||
tok->length = strlen(literal);
|
||||
return tok;
|
||||
}
|
||||
|
|
43
main.c
43
main.c
|
@ -7,35 +7,6 @@
|
|||
#include "lexer.h"
|
||||
#include "node.h"
|
||||
|
||||
/*
|
||||
* RawText ""
|
||||
* LineNumber 0
|
||||
* NodeType NT_Root
|
||||
* ChildNodes
|
||||
* RawText "# Header1"
|
||||
* LineNumber 1
|
||||
* NodeType NT_Header1
|
||||
* ChildNodes
|
||||
* {"Some text."}
|
||||
*
|
||||
* RawText "## Header2"
|
||||
*/
|
||||
|
||||
/*
|
||||
* NodeType NT_Root
|
||||
* ChildNodes
|
||||
* RawText "## Header2"
|
||||
* ChildNodes
|
||||
* paragraph
|
||||
* ChildNodes
|
||||
* {*bold text*}
|
||||
* {_underlined text_}
|
||||
* paragraph
|
||||
*
|
||||
*
|
||||
*/
|
||||
//Node* ParseLine(char *buffer);
|
||||
|
||||
void writeTokenFile(TokenList* tl);
|
||||
|
||||
int
|
||||
|
@ -93,6 +64,20 @@ main(int argc, const char** argv)
|
|||
}
|
||||
break;
|
||||
|
||||
case NT_BlockCode:
|
||||
{
|
||||
CodeBlockNode* cnode = (CodeBlockNode*)node;
|
||||
printf("{CodeBlockNode text:%s}\n", cnode->rawText);
|
||||
}
|
||||
break;
|
||||
|
||||
case NT_Error:
|
||||
{
|
||||
ErrorNode* enode = (ErrorNode*)node;
|
||||
printf("{ErrorNode error:%s}\n", enode->error);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
printf("%s\n", NodeTypeString(node->type));
|
||||
}
|
||||
|
|
109
node.c
109
node.c
|
@ -7,7 +7,8 @@
|
|||
|
||||
static char stringBuff[STRING_BUFF_SIZE];
|
||||
|
||||
HeaderNode* parseHeader(TokenList** list);
|
||||
Node* parseHeader(TokenList** list);
|
||||
Node* parseCodeBlock(TokenList** list);
|
||||
|
||||
NodeList*
|
||||
ParseNodes(TokenList* list)
|
||||
|
@ -32,13 +33,18 @@ ParseNodes(TokenList* list)
|
|||
break;
|
||||
case TT_HASH:
|
||||
// start of header
|
||||
currentNode = (Node*)parseHeader(¤tToken);
|
||||
currentNode = parseHeader(¤tToken);
|
||||
break;
|
||||
|
||||
case TT_TRIPLEBACKTICK:
|
||||
currentNode = parseCodeBlock(¤tToken);
|
||||
break;
|
||||
|
||||
case TT_EOF:
|
||||
printf("EOF found\n");
|
||||
return nl;
|
||||
default:
|
||||
|
||||
default: // paragraph start?
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -65,7 +71,7 @@ ParseNodes(TokenList* list)
|
|||
return nl;
|
||||
}
|
||||
|
||||
HeaderNode*
|
||||
Node*
|
||||
parseHeader(TokenList** list)
|
||||
{
|
||||
TokenList* l = *list;
|
||||
|
@ -96,23 +102,19 @@ parseHeader(TokenList** list)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
stringBuff[0] = '\0';
|
||||
while (1)
|
||||
{
|
||||
int bufSize = strlen(stringBuff);
|
||||
int litSize = strlen(l->token->literal);
|
||||
if (bufSize + litSize + 1 > STRING_BUFF_SIZE)
|
||||
{
|
||||
printf("Buffer not big enough!");
|
||||
return NULL;
|
||||
}
|
||||
strncat(stringBuff, l->token->literal, strlen(l->token->literal));
|
||||
|
||||
if (l->next == NULL || l->next->token->type == TT_NEWLINE)
|
||||
{
|
||||
break;
|
||||
TokenList* end = l;
|
||||
int len = 0;
|
||||
// find header text size
|
||||
while (end->token->type != TT_NEWLINE && end->token->type != TT_EOF) {
|
||||
len += end->token->length;
|
||||
end = end->next;
|
||||
}
|
||||
|
||||
char* strbuff = malloc(len+1);
|
||||
strbuff[0] = '\0';
|
||||
|
||||
while(l != end) {
|
||||
strncat(strbuff, l->token->literal, l->token->length);
|
||||
l = l->next;
|
||||
}
|
||||
|
||||
|
@ -136,9 +138,72 @@ parseHeader(TokenList** list)
|
|||
}
|
||||
|
||||
retval->next = NULL;
|
||||
retval->rawText = stringBuff;
|
||||
retval->rawText = strbuff;
|
||||
|
||||
return retval;
|
||||
return (Node*)retval;
|
||||
}
|
||||
|
||||
Node*
|
||||
parseCodeBlock(TokenList** list)
|
||||
{
|
||||
TokenList* l = *list;
|
||||
// find closing ticks
|
||||
int tlen = 0; // number of tokens
|
||||
int clen = 0; // number of characters
|
||||
l = l->next; // skip past the opening triple backtick
|
||||
|
||||
// skip the first newline
|
||||
if (l->token->type == TT_NEWLINE) {
|
||||
l = l->next;
|
||||
}
|
||||
|
||||
while (l->next != NULL && l->next->token->type != TT_TRIPLEBACKTICK) {
|
||||
if (l->next->token->type == TT_EOF) {
|
||||
printf("premature EOF");
|
||||
|
||||
ErrorNode* err = malloc(sizeof(ErrorNode));
|
||||
err->type = NT_Error;
|
||||
err->next = NULL;
|
||||
err->error = "premature EOF searching for closing triple backtick";
|
||||
|
||||
return (Node*)err;
|
||||
}
|
||||
|
||||
tlen++;
|
||||
clen += l->token->length;
|
||||
l = l->next;
|
||||
}
|
||||
|
||||
l = *list;
|
||||
|
||||
printf("codeblock token length: %d\n", tlen);
|
||||
printf("codeblock char length: %d\n", clen);
|
||||
|
||||
printf("malloc(%ld)\n", sizeof(char)*clen+1);
|
||||
char* strbuff = malloc(sizeof(char)*clen+1);
|
||||
strbuff[0] = '\0';
|
||||
int i;
|
||||
l = l->next; // skip past the opening triple backtick
|
||||
|
||||
// skip the first newline
|
||||
if (l->token->type == TT_NEWLINE) {
|
||||
l = l->next;
|
||||
}
|
||||
|
||||
for(i = 0; i < tlen; i++) {
|
||||
strncat(strbuff, l->token->literal, l->token->length);
|
||||
l = l->next;
|
||||
}
|
||||
|
||||
// skip past closing triple backtick
|
||||
*list = l->next;
|
||||
|
||||
printf("malloc(%ld)\n", sizeof(CodeBlockNode));
|
||||
CodeBlockNode* ret = malloc(sizeof(CodeBlockNode));
|
||||
ret->type = NT_BlockCode;
|
||||
ret->rawText = strbuff;
|
||||
ret->next = NULL;
|
||||
return (Node*)ret;
|
||||
}
|
||||
|
||||
char*
|
||||
|
@ -169,6 +234,8 @@ NodeTypeString(NodeType t)
|
|||
return "NT_Bold";
|
||||
case NT_Underline:
|
||||
return "NT_Underline";
|
||||
case NT_Error:
|
||||
return "NT_Error";
|
||||
|
||||
default:
|
||||
snprintf(stringBuff, 1000, "unknown NodeType: %d", t);
|
||||
|
|
27
node.h
27
node.h
|
@ -6,18 +6,29 @@
|
|||
#define NODE_H
|
||||
|
||||
typedef enum {
|
||||
// Stand-alone elements
|
||||
// cannot contain text modifiers
|
||||
NT_Header1,
|
||||
NT_Header2,
|
||||
NT_Header3,
|
||||
NT_Header4,
|
||||
NT_BlockCode,
|
||||
|
||||
// Container elements
|
||||
// can contain text modifiers
|
||||
NT_Paragraph,
|
||||
NT_UnorderedList,
|
||||
NT_OrderedList,
|
||||
NT_InlineCode,
|
||||
NT_BlockCode,
|
||||
NT_BlockQuote,
|
||||
|
||||
// Contained elements (cannot be bare)
|
||||
// text modifiers
|
||||
NT_InlineCode,
|
||||
NT_Bold,
|
||||
NT_Underline,
|
||||
|
||||
// something went wrong
|
||||
NT_Error,
|
||||
} NodeType;
|
||||
|
||||
struct NodeList;
|
||||
|
@ -40,6 +51,18 @@ typedef struct {
|
|||
char* rawText;
|
||||
} HeaderNode;
|
||||
|
||||
typedef struct {
|
||||
NodeType type;
|
||||
struct Node* next;
|
||||
char* rawText;
|
||||
} CodeBlockNode;
|
||||
|
||||
typedef struct {
|
||||
NodeType type;
|
||||
struct Node* next;
|
||||
char* error;
|
||||
} ErrorNode;
|
||||
|
||||
/*
|
||||
typedef struct {
|
||||
NodeType type;
|
||||
|
|
2
token.c
2
token.c
|
@ -63,6 +63,8 @@ TokenTypeString(TokenType tt)
|
|||
return "TT_PERIOD";
|
||||
case TT_BACKTICK:
|
||||
return "TT_BACKTICK";
|
||||
case TT_TRIPLEBACKTICK:
|
||||
return "TT_TRIPLEBACKTICK";
|
||||
case TT_WHITESPACE:
|
||||
return "TT_WHITESPACE";
|
||||
case TT_NEWLINE:
|
||||
|
|
Loading…
Reference in New Issue