From ca6bdca7ab50f894f8b703e5893f318971909ff4 Mon Sep 17 00:00:00 2001 From: Zorchenhimer Date: Sun, 15 Oct 2023 18:30:59 -0400 Subject: [PATCH] Fix basic parse issues Fixed parsing the tokens so it actually worked. Only headers are currently implemented, but header nodes are now properly parsed and everything else is properly ignored. - Get the input filename from the command line - Added in a bunch of checks to avoid segfaults - Added some more debug info in places --- lexer.c | 2 +- lexer.h | 2 +- main.c | 61 ++++++++++++++++++++++++++++---- node.c | 106 +++++++++++++++++++++++++++++++++++++++++++++++--------- node.h | 16 +++++++-- token.c | 3 ++ 6 files changed, 162 insertions(+), 28 deletions(-) diff --git a/lexer.c b/lexer.c index e33d038..03757f4 100644 --- a/lexer.c +++ b/lexer.c @@ -14,7 +14,7 @@ static Token* newToken(Lexer* l, TokenType tt); static Token* newIdentToken(Lexer* l, char* literal, TokenType tt); Lexer* -NewLexer(char* filename) +NewLexer(const char* filename) { FILE* fp; fp = fopen(filename, "r"); diff --git a/lexer.h b/lexer.h index a1edc7b..a0f9f79 100644 --- a/lexer.h +++ b/lexer.h @@ -42,7 +42,7 @@ typedef struct Lexer { // int ChildCount; //} Node; -Lexer* NewLexer(char* filename); +Lexer* NewLexer(const char* filename); Token* NextToken(Lexer* l); void ReadChar(Lexer* l); void Parse(Lexer* l); diff --git a/main.c b/main.c index 4ba37ac..9eefe79 100644 --- a/main.c +++ b/main.c @@ -41,10 +41,27 @@ void writeTokenFile(TokenList* tl); int main(int argc, const char** argv) { - Lexer* l = NewLexer("sample.md"); + + /*int i; + for(i = 0; i < argc; i++) { + printf("[%d:%d] %s\n", i, argc, argv[i]); + }*/ + + if (argc <= 1) { + printf("Missing input file\n"); + return 1; + } + + if (argc > 2) { + printf("Too many arguments\n"); + return 2; + } + + Lexer* l = NewLexer(argv[1]); TokenList* current = malloc(sizeof(TokenList)); TokenList* tl = current;//= malloc(sizeof(TokenList)); current->token = NULL; + current->next = NULL; TokenType tt; do @@ -56,7 +73,33 @@ main(int argc, const char** argv) while(tt != TT_EOF); writeTokenFile(tl); - ParseNodes(tl); + NodeList* nl = ParseNodes(tl); + + Node* node = nl->first; + + printf("nodes:\n"); + while(node != NULL) + { + /*PrintNodeType(node->type);*/ + + switch (node->type) { + case NT_Header1: + case NT_Header2: + case NT_Header3: + case NT_Header4: + { + HeaderNode* hnode = (HeaderNode*)node; + printf("{HeaderNode type:%s text:%s}\n", NodeTypeString(hnode->type), hnode->rawText); + } + break; + + default: + printf("%s\n", NodeTypeString(node->type)); + } + + + node = node->next; + } printf("rawLen: %d position: %d readPosition: %d ch: %c line: %d column: %d\n", l->rawLen, @@ -72,6 +115,7 @@ main(int argc, const char** argv) void writeTokenFile(TokenList* tl) { + printf("writeTokenFile() start\n"); int count; FILE* fp = fopen("tokens.txt", "w"); if (fp == NULL) @@ -81,19 +125,22 @@ writeTokenFile(TokenList* tl) } TokenList* current = tl; - for(count = 0; current->next != NULL; count++) { + for(count = 0; current != NULL; count++) { if (count == 0 && current->token == NULL) { printf("first token null\n"); + break; } - else if (count == 0) - { - printf("%s\n", TokenString(current->token)); - } + + /*printf("writeTokenFile(): %s\n", TokenString(current->token));*/ fprintf(fp, "%s\n", TokenString(current->token)); current = current->next; } fclose(fp); + if (count == 0) { + printf("nothing written to file!\n"); + } + printf("Token count: %d\n", count); } diff --git a/node.c b/node.c index 5dd5537..295b975 100644 --- a/node.c +++ b/node.c @@ -7,46 +7,65 @@ static char stringBuff[STRING_BUFF_SIZE]; -Node* parseHeader(TokenList** list); +HeaderNode* parseHeader(TokenList** list); NodeList* ParseNodes(TokenList* list) { + printf("ParseNodes() start\n"); NodeList* nl = malloc(sizeof(NodeList)); - NodeList* currentNode = nl; + nl->first = NULL; - currentNode->next = NULL; - currentNode->node = NULL; + /*currentNode->next = NULL;*/ + /*currentNode->node = NULL;*/ - TokenList* current = list; + TokenList* currentToken = list; + Node* prevNode = NULL; + + printf("ParseNodes() loop\n"); - //while(current != NULL) { while (1) { - switch (current->token->type) { + Node* currentNode = NULL; + + switch (currentToken->token->type) { case TT_NEWLINE: break; case TT_HASH: // start of header - //Node* nodes; - //nodes = parseHeader(current); - currentNode->node = parseHeader(¤t); + currentNode = (Node*)parseHeader(¤tToken); break; + + case TT_EOF: + printf("EOF found\n"); + return nl; default: break; } - if (current->next == NULL) { - //printf("next is null\n"); + if (currentToken->next == NULL) { + printf("currentToken->next == NULL\n"); break; } - //printf("current = current->next;\n"); - current = current->next; + + currentToken = currentToken->next; + if (currentNode == NULL) + continue; + + if (prevNode != NULL) { + prevNode->next = currentNode; + } + + if (nl->first == NULL) { + nl->first = currentNode; + } + + prevNode = currentNode; } return nl; } -Node* +HeaderNode* parseHeader(TokenList** list) { TokenList* l = *list; @@ -99,6 +118,61 @@ parseHeader(TokenList** list) *list = l; printf("header hash count: %d\ntext: '%s'\n", count, stringBuff); - return NULL; + + HeaderNode* retval = malloc(sizeof(HeaderNode)); + switch(count) { + case 1: + retval->type = NT_Header1; + break; + case 2: + retval->type = NT_Header2; + break; + case 3: + retval->type = NT_Header3; + break; + default: + retval->type = NT_Header4; + break; + } + + retval->next = NULL; + retval->rawText = stringBuff; + + return retval; +} + +char* +NodeTypeString(NodeType t) +{ + switch(t) { + case NT_Header1: + return "NT_Header1"; + case NT_Header2: + return "NT_Header2"; + case NT_Header3: + return "NT_Header3"; + case NT_Header4: + return "NT_Header4"; + case NT_Paragraph: + return "NT_Paragraph"; + case NT_UnorderedList: + return "NT_UnorderedList"; + case NT_OrderedList: + return "NT_OrderedList"; + case NT_InlineCode: + return "NT_InlineCode"; + case NT_BlockCode: + return "NT_BlockCode"; + case NT_BlockQuote: + return "NT_BlockQuote"; + case NT_Bold: + return "NT_Bold"; + case NT_Underline: + return "NT_Underline"; + + default: + snprintf(stringBuff, 1000, "unknown NodeType: %d", t); + return stringBuff; + } } diff --git a/node.h b/node.h index 12a6210..acf3097 100644 --- a/node.h +++ b/node.h @@ -24,12 +24,14 @@ struct NodeList; typedef struct Node { NodeType type; - struct NodeList* children; + struct Node* next; + /*struct NodeList* children;*/ } Node; typedef struct NodeList { - struct Node* node; - struct Node* next; + struct Node* first; + /*struct Node* node; + struct Node* next;*/ } NodeList; typedef struct { @@ -38,6 +40,14 @@ typedef struct { char* rawText; } HeaderNode; +/* +typedef struct { + NodeType type; + struct Node* next; +} ParagraphNode; +*/ + NodeList* ParseNodes(TokenList* list); +char* NodeTypeString(NodeType t); #endif diff --git a/token.c b/token.c index b559dce..ca753bb 100644 --- a/token.c +++ b/token.c @@ -20,7 +20,10 @@ TokenListAdd(TokenList* current, Token* next) return current; } + //printf("%s\n", TokenString(current->token)); + TokenList* nl = malloc(sizeof(TokenList)); + nl->next = NULL; nl->token = next; current->next = nl; return nl;