Fix basic parse issues

Fixed parsing the tokens so it actually worked. Only headers are currently implemented, but header nodes are now properly parsed and everything else is properly ignored. - Get the input filename from the command line - Added in a bunch of checks to avoid segfaults - Added some more debug info in places
2023-10-15 18:30:59 -04:00 · 2023-10-15 18:30:59 -04:00 · ca6bdca7ab
parent dd2655db0c
commit ca6bdca7ab
6 changed files with 162 additions and 28 deletions
--- a/lexer.c
+++ b/lexer.c
@ -14,7 +14,7 @@ static Token* newToken(Lexer* l, TokenType tt);
 static Token* newIdentToken(Lexer* l, char* literal, TokenType tt);

 Lexer*
-NewLexer(char* filename)
+NewLexer(const char* filename)
 {
    FILE* fp;
    fp = fopen(filename, "r");
--- a/lexer.h
+++ b/lexer.h
@ -42,7 +42,7 @@ typedef struct Lexer {
 //    int ChildCount;
 //} Node;

-Lexer* NewLexer(char* filename);
+Lexer* NewLexer(const char* filename);
 Token* NextToken(Lexer* l);
 void ReadChar(Lexer* l);
 void Parse(Lexer* l);
--- a/main.c
+++ b/main.c
@ -41,10 +41,27 @@ void writeTokenFile(TokenList* tl);
 int
 main(int argc, const char** argv)
 {
-    Lexer* l = NewLexer("sample.md");
+
+    /*int i;
+    for(i = 0; i < argc; i++) {
+        printf("[%d:%d] %s\n", i, argc, argv[i]);
+    }*/
+
+    if (argc <= 1) {
+        printf("Missing input file\n");
+        return 1;
+    }
+
+    if (argc > 2) {
+        printf("Too many arguments\n");
+        return 2;
+    }
+
+    Lexer* l = NewLexer(argv[1]);
    TokenList* current = malloc(sizeof(TokenList));
    TokenList* tl = current;//= malloc(sizeof(TokenList));
    current->token = NULL;
+    current->next = NULL;

    TokenType tt;
    do
@ -56,7 +73,33 @@ main(int argc, const char** argv)
    while(tt != TT_EOF);

    writeTokenFile(tl);
-    ParseNodes(tl);
+    NodeList* nl = ParseNodes(tl);
+
+    Node* node = nl->first;
+
+    printf("nodes:\n");
+    while(node != NULL)
+    {
+        /*PrintNodeType(node->type);*/
+
+        switch (node->type) {
+            case NT_Header1:
+            case NT_Header2:
+            case NT_Header3:
+            case NT_Header4:
+            {
+                HeaderNode* hnode = (HeaderNode*)node;
+                printf("{HeaderNode type:%s text:%s}\n", NodeTypeString(hnode->type), hnode->rawText);
+            }
+                break;
+
+            default:
+                printf("%s\n", NodeTypeString(node->type));
+        }
+
+
+        node = node->next;
+    }

    printf("rawLen: %d position: %d readPosition: %d ch: %c line: %d column: %d\n",
            l->rawLen,
@ -72,6 +115,7 @@ main(int argc, const char** argv)
 void
 writeTokenFile(TokenList* tl)
 {
+    printf("writeTokenFile() start\n");
    int count;
    FILE* fp = fopen("tokens.txt", "w");
    if (fp == NULL)
@ -81,19 +125,22 @@ writeTokenFile(TokenList* tl)
    }

    TokenList* current = tl;
-    for(count = 0; current->next != NULL; count++) {
+    for(count = 0; current != NULL; count++) {
        if (count == 0 && current->token == NULL)
        {
            printf("first token null\n");
+            break;
        }
-        else if (count == 0)
-        {
-            printf("%s\n", TokenString(current->token));
-        }
+
+        /*printf("writeTokenFile(): %s\n", TokenString(current->token));*/
        fprintf(fp, "%s\n", TokenString(current->token));
        current = current->next;
    }
    fclose(fp);

+    if (count == 0) {
+        printf("nothing written to file!\n");
+    }
+
    printf("Token count: %d\n", count);
 }
--- a/node.c
+++ b/node.c
@ -7,46 +7,65 @@

 static char stringBuff[STRING_BUFF_SIZE];

-Node* parseHeader(TokenList** list);
+HeaderNode* parseHeader(TokenList** list);

 NodeList*
 ParseNodes(TokenList* list)
 {
+    printf("ParseNodes() start\n");
    NodeList* nl = malloc(sizeof(NodeList));
-    NodeList* currentNode = nl;
+    nl->first = NULL;

-    currentNode->next = NULL;
-    currentNode->node = NULL;
+    /*currentNode->next = NULL;*/
+    /*currentNode->node = NULL;*/

-    TokenList* current = list;
+    TokenList* currentToken = list;
+    Node* prevNode = NULL;
+
+    printf("ParseNodes() loop\n");

-    //while(current != NULL) {
    while (1) {
-        switch (current->token->type) {
+        Node* currentNode = NULL;
+
+        switch (currentToken->token->type) {
            case TT_NEWLINE:
                break;
            case TT_HASH:
                // start of header
-                //Node* nodes;
-                //nodes = parseHeader(current);
-                currentNode->node = parseHeader(&current);
+                currentNode = (Node*)parseHeader(&currentToken);
                break;
+
+            case TT_EOF:
+                printf("EOF found\n");
+                return nl;
            default:
                break;
        }

-        if (current->next == NULL) {
-            //printf("next is null\n");
+        if (currentToken->next == NULL) {
+            printf("currentToken->next == NULL\n");
            break;
        }
-        //printf("current = current->next;\n");
-        current = current->next;
+
+        currentToken = currentToken->next;
+        if (currentNode == NULL)
+            continue;
+
+        if (prevNode != NULL) {
+            prevNode->next = currentNode;
+        }
+
+        if (nl->first == NULL) {
+            nl->first = currentNode;
+        }
+
+        prevNode = currentNode;
    }

    return nl;
 }

-Node*
+HeaderNode*
 parseHeader(TokenList** list)
 {
    TokenList* l = *list;
@ -99,6 +118,61 @@ parseHeader(TokenList** list)

    *list = l;
    printf("header hash count: %d\ntext: '%s'\n", count, stringBuff);
-    return NULL;
+
+    HeaderNode* retval = malloc(sizeof(HeaderNode));
+    switch(count) {
+        case 1:
+            retval->type = NT_Header1;
+            break;
+        case 2:
+            retval->type = NT_Header2;
+            break;
+        case 3:
+            retval->type = NT_Header3;
+            break;
+        default:
+            retval->type = NT_Header4;
+            break;
+    }
+
+    retval->next = NULL;
+    retval->rawText = stringBuff;
+
+    return retval;
+}
+
+char*
+NodeTypeString(NodeType t)
+{
+    switch(t) {
+        case NT_Header1:
+            return "NT_Header1";
+        case NT_Header2:
+            return "NT_Header2";
+        case NT_Header3:
+            return "NT_Header3";
+        case NT_Header4:
+            return "NT_Header4";
+        case NT_Paragraph:
+            return "NT_Paragraph";
+        case NT_UnorderedList:
+            return "NT_UnorderedList";
+        case NT_OrderedList:
+            return "NT_OrderedList";
+        case NT_InlineCode:
+            return "NT_InlineCode";
+        case NT_BlockCode:
+            return "NT_BlockCode";
+        case NT_BlockQuote:
+            return "NT_BlockQuote";
+        case NT_Bold:
+            return "NT_Bold";
+        case NT_Underline:
+            return "NT_Underline";
+
+        default:
+            snprintf(stringBuff, 1000, "unknown NodeType: %d", t);
+            return stringBuff;
+    }
 }

--- a/node.h
+++ b/node.h
@ -24,12 +24,14 @@ struct NodeList;

 typedef struct Node {
    NodeType type;
-    struct NodeList* children;
+    struct Node* next;
+    /*struct NodeList* children;*/
 } Node;

 typedef struct NodeList {
-    struct Node* node;
-    struct Node* next;
+    struct Node* first;
+    /*struct Node* node;
+    struct Node* next;*/
 } NodeList;

 typedef struct {
@ -38,6 +40,14 @@ typedef struct {
    char* rawText;
 } HeaderNode;

+/*
+typedef struct {
+    NodeType type;
+    struct Node* next;
+} ParagraphNode;
+*/
+
 NodeList* ParseNodes(TokenList* list);
+char* NodeTypeString(NodeType t);

 #endif
--- a/token.c
+++ b/token.c
@ -20,7 +20,10 @@ TokenListAdd(TokenList* current, Token* next)
        return current;
    }

+    //printf("%s\n", TokenString(current->token));
+
    TokenList* nl = malloc(sizeof(TokenList));
+    nl->next = NULL;
    nl->token = next;
    current->next = nl;
    return nl;