From b952ac99aca09cd7e58587eae148b13ab2cbae35 Mon Sep 17 00:00:00 2001
From: Zorchenhimer <zorchenhimer@gmail.com>
Date: Wed, 15 Nov 2023 21:43:03 -0500
Subject: [PATCH] Fix a lot of memory leaks

Added a bunch of code to free created objects.  One notable change was
to return the next token from FreeToken() to cut down on boiler plate
code that kept track of a "consumed" token.  Now all that just happens
in FreeToken() and I don't have to worry about that anywhere else.

Also added FreeNode() to free all nodes.  This wasn't done before at
all.  Like FreeToken(), it will return the next node in the list to
avoid having to track that stuff in the calling context.
---
 lexer.c   |  11 ++---
 lexer.h   |  26 +----------
 main.c    |  32 ++++++++------
 node.c    | 127 +++++++++++++++++++++++++++++++++---------------------
 node.h    |   2 +-
 sample.md |   1 -
 token.c   |  11 +++--
 token.h   |   2 +-
 8 files changed, 115 insertions(+), 97 deletions(-)

diff --git a/lexer.c b/lexer.c
index 458764b..cdf809d 100644
--- a/lexer.c
+++ b/lexer.c
@@ -128,8 +128,6 @@ NextToken(Lexer* l)
             {
                 tok = newToken(l, TT_ILLEGAL);
             }
-            //printf("Invalid token: %X\n", l->ch);
-            //return NULL;
     }
 
     readChar(l);
@@ -146,8 +144,11 @@ newWhitespaceToken(Lexer* l)
     tok->type = TT_WHITESPACE;
 
     int position = l->position;
-    // grab the char so we can use this funciton for both
-    // spaces and tabs.
+
+    /*
+     * Grab the char so we can use this funciton for both
+     * spaces and tabs.
+     */
     char ch = l->ch;
     while (peekChar(l) == ch){
         readChar(l);
@@ -277,7 +278,7 @@ newToken(Lexer* l,
 {
     Token* tok = malloc(sizeof(Token));
     char* nc = malloc(sizeof(char)+1);
-    *nc = l->ch;
+    nc[0] = l->ch;
     nc[1] = '\0';
     tok->type = tt;
     tok->literal = nc;
diff --git a/lexer.h b/lexer.h
index a0f9f79..3d0705b 100644
--- a/lexer.h
+++ b/lexer.h
@@ -4,21 +4,6 @@
 #ifndef LEXER_H
 #define LEXER_H
 
-//typedef enum NodeType {
-//    NT_Root,
-//    NT_Header1,
-//    NT_Header2,
-//    NT_Header3,
-//    NT_ListItem,
-//    NT_OrderedListItem,
-//    NT_Paragraph,
-//    NT_PlainText,
-//    NT_BoldText,
-//    NT_UnderlineText,
-//    NT_InlineCode,
-//    NT_BlockCode,
-//} NodeType;
-
 typedef struct Lexer {
     char* rawFile;
     int rawLen;
@@ -32,19 +17,10 @@ typedef struct Lexer {
 
 } Lexer;
 
-//typedef struct Node {
-//    NodeType type;
-//    char RawText;
-//    int LineNumber;
-//
-//    //struct Node **ChildNodes;
-//    void** ChildNodes;
-//    int ChildCount;
-//} Node;
-
 Lexer* NewLexer(const char* filename);
 Token* NextToken(Lexer* l);
 void ReadChar(Lexer* l);
 void Parse(Lexer* l);
+void FreeLexer(Lexer* l);
 
 #endif
diff --git a/main.c b/main.c
index fc71174..a2c937c 100644
--- a/main.c
+++ b/main.c
@@ -52,15 +52,17 @@ main(int argc, const char** argv)
     }
     while(tt != TT_EOF);
 
+    FreeLexer(l);
+
     writeTokenFile(firstToken);
-    Node* node = ParseNodes(firstToken);
+    Node* firstNode = ParseNodes(firstToken);
+    Node* node = firstNode;
 
     printf("nodes:\n");
     while(node != NULL)
     {
-        /*PrintNodeType(node->type);*/
-
-        switch (node->type) {
+        switch (node->type)
+        {
             case NT_Header1:
             case NT_Header2:
             case NT_Header3:
@@ -115,14 +117,20 @@ main(int argc, const char** argv)
         node = node->next;
     }
 
-    //printf("rawLen: %d position: %d readPosition: %d ch: %c line: %d column: %d\n",
-    //        l->rawLen,
-    //        l->position,
-    //        l->readPosition,
-    //        l->ch,
-    //        l->line,
-    //        l->column
-    //);
+    node = firstNode;
+    firstNode = NULL;
+
+    while ((node = FreeNode(node)) != NULL);
+
+    if (node != NULL)
+        printf("last node != NULL\n");
+
+    if (firstNode != NULL)
+    {
+        printf("firstNode != NULL\n");
+        printf("%s\n", NodeTypeString(firstNode->type));
+    }
+
     return 0;
 }
 
diff --git a/node.c b/node.c
index d627c36..e3bf36d 100644
--- a/node.c
+++ b/node.c
@@ -1,5 +1,6 @@
 #include <stdio.h>
 #include <string.h>
+#include <assert.h>
 
 #include "node.h"
 
@@ -19,10 +20,12 @@ ParseNodes(Token* firstToken)
     Token* currentToken = firstToken;
     Node* prevNode = NULL;
 
-    while (1) {
+    while (currentToken != NULL)
+    {
         Node* currentNode = NULL;
 
-        switch (currentToken->type) {
+        switch (currentToken->type)
+        {
             case TT_NEWLINE:
             case TT_WHITESPACE:
                 break;
@@ -44,21 +47,17 @@ ParseNodes(Token* firstToken)
                 break;
         }
 
-        if (currentToken->next == NULL) {
-            break;
-        }
+        if (currentToken->type == TT_NEWLINE || currentToken->type == TT_WHITESPACE)
+            currentToken = FreeToken(currentToken);
 
-        currentToken = currentToken->next;
         if (currentNode == NULL)
             continue;
 
-        if (prevNode != NULL) {
+        if (prevNode != NULL)
             prevNode->next = currentNode;
-        }
 
-        if (firstNode == NULL) {
+        if (firstNode == NULL)
             firstNode = currentNode;
-        }
 
         prevNode = currentNode;
     }
@@ -70,12 +69,13 @@ Node*
 parseHeader(Token** startToken)
 {
     Token* t = *startToken;
+
     // Count the number of TT_HASH tokens
     int count = 1;
     while (t->next != NULL && t->next->type == TT_HASH)
     {
         count++;
-        t = t->next;
+        t = FreeToken(t);
     }
 
     if (t->next == NULL)
@@ -83,12 +83,11 @@ parseHeader(Token** startToken)
         printf("Header missing text");
         return NULL;
     }
-    t = t->next;
 
     // Trim leading whitespace
     while (t->next != NULL && t->type == TT_WHITESPACE)
     {
-        t = t->next;
+        t = FreeToken(t);
     }
 
     if (t->next == NULL)
@@ -99,6 +98,7 @@ parseHeader(Token** startToken)
 
     Token* end = t;
     int len = 0;
+
     // find header text size
     while (end->type != TT_NEWLINE && end->type != TT_EOF) {
         len += end->length;
@@ -110,10 +110,9 @@ parseHeader(Token** startToken)
 
     while(t != end) {
         strncat(strbuff, t->literal, t->length);
-        t = t->next;
+        t = FreeToken(t);
     }
 
-
     *startToken = t;
 
     HeaderNode* retval = malloc(sizeof(HeaderNode));
@@ -144,14 +143,19 @@ parseCodeBlock(Token** startToken)
     // find closing ticks
     int tlen = 0;   // number of tokens
     int clen = 0;   // number of characters
-    Token* t = *startToken;
-    t = t->next; // skip past the opening triple backtick
+
+    // skip past the opening triple backtick
+    *startToken = FreeToken(*startToken);
 
     // skip the first newline
-    if (t->type == TT_NEWLINE) {
-        t = t->next;
+    while ((*startToken)->type == TT_NEWLINE) {
+        *startToken = FreeToken(*startToken);
     }
 
+    // assign this after we skip tokens so we don't have to
+    // re-skip them later.
+    Token* t = *startToken;
+
     while (t->next != NULL && t->type != TT_TRIPLEBACKTICK) {
         if (t->next->type == TT_EOF) {
             printf("premature EOF");
@@ -174,20 +178,22 @@ parseCodeBlock(Token** startToken)
     char* strbuff = malloc(sizeof(char)*clen+1);
     strbuff[0] = '\0';
     int i;
-    t = t->next; // skip past the opening triple backtick
-
-    // skip the first newline
-    if (t->type == TT_NEWLINE) {
-        t = t->next;
-    }
 
     for(i = 0; i < tlen; i++) {
         strncat(strbuff, t->literal, t->length);
-        t = t->next;
+        t = FreeToken(t);
     }
 
-    // skip past closing triple backtick
-    *startToken = t->next;
+    /*
+     * Skip past closing triple backtick
+     * This is modifying the *parameter* that was passed in,
+     * so we can return the node and advance the token tree.
+     */
+    *startToken = FreeToken(t);
+
+    while ((*startToken)->type == TT_NEWLINE || (*startToken)->type == TT_WHITESPACE) {
+        *startToken = FreeToken(*startToken);
+    }
 
     CodeBlockNode* ret = malloc(sizeof(CodeBlockNode));
     ret->type = NT_BlockCode;
@@ -208,14 +214,11 @@ parseParagraph(Token** startToken)
     if (t->type == TT_GT) {
         pnode->ptype = PT_Quote;
         // consume TT_GT
-        Token* consumed = t;
-        t = t->next;
-        FreeToken(consumed);
+        t = FreeToken(t);
     }
 
     pnode->content = t;
     Token* prevToken = NULL;
-    Token* consumed = NULL;
 
     while(t != NULL)
     {
@@ -226,10 +229,8 @@ parseParagraph(Token** startToken)
             if (t->next->type == TT_WHITESPACE)
             {
                 // Consume the newline if the next one is a space.
-                consumed = t;
-                t = t->next;
+                t = FreeToken(t);
                 prevToken->next = t;
-                FreeToken(consumed);
             }
             else
             {
@@ -245,18 +246,14 @@ parseParagraph(Token** startToken)
             if (pnode->ptype == PT_Quote) {
                 if (t->type == TT_GT) {
                     // removes TT_GT
-                    consumed = t;
-                    t = t->next;
+                    t = FreeToken(t);
                     prevToken->next = t;
-                    FreeToken(consumed);
 
                     if (t->next != NULL && t->next->type == TT_WHITESPACE)
                     {
                         // removes TT_WHITESPACE
-                        consumed = t;
-                        t = t->next;
+                        t = FreeToken(t);
                         prevToken->next = t;
-                        FreeToken(consumed);
                     }
                     continue;
                 }
@@ -276,7 +273,6 @@ parseParagraph(Token** startToken)
             }
         } // TT_NEWLINE check
 
-        //printf("t->literal: %s\n", t->literal);
         if (prevToken != NULL)
             prevToken->next = t;
         prevToken = t;
@@ -296,22 +292,22 @@ paragraphEnd:
         {
             if(t->next == NULL)
             {
+                FreeToken(t);
                 prevToken->next = NULL;
                 break;
             }
             else if (t->next->type == TT_WHITESPACE)
             {
-                // concatinate the two.
+                /* concatinate the two. */
                 int len = t->length + t->next->length;
                 char* newws = malloc(sizeof(char)*len+1);
                 newws[0] = '\0';
                 strncat(newws, t->literal, t->length);
                 strncat(newws, t->next->literal, t->next->length);
 
-                consumed = t;
-                t = t->next;
+                t = FreeToken(t);
                 prevToken->next = t;
-                FreeToken(consumed);
+
                 t->length = len;
                 free(t->literal);
                 t->literal = newws;
@@ -346,8 +342,6 @@ NodeTypeString(NodeType t)
             return "NT_InlineCode";
         case NT_BlockCode:
             return "NT_BlockCode";
-        case NT_BlockQuote:
-            return "NT_BlockQuote";
         case NT_Bold:
             return "NT_Bold";
         case NT_Underline:
@@ -375,3 +369,40 @@ ParagraphTypeString(ParagraphType t)
     }
     return "UNKNOWN";
 }
+
+Node*
+FreeNode(Node* node)
+{
+    Node* next = node->next;
+    switch (node->type)
+    {
+    case NT_Header1:
+    case NT_Header2:
+    case NT_Header3:
+    case NT_Header4:
+        free(((HeaderNode*)node)->rawText);
+        break;
+    case NT_BlockCode:
+        free(((CodeBlockNode*)node)->rawText);
+        break;
+    case NT_Error:
+        free(((ErrorNode*)node)->error);
+        break;
+    case NT_Paragraph:
+        {
+            ParagraphNode* pnode = (ParagraphNode*)node;
+            Token* t = pnode->content;
+            while ((t = FreeToken(t)) != NULL);
+        }
+        break;
+    case NT_UnorderedList:
+    case NT_OrderedList:
+    case NT_InlineCode:
+    case NT_Bold:
+    case NT_Underline:
+        assert(0 && "//TODO");
+        break;
+    }
+    free(node);
+    return next;
+}
diff --git a/node.h b/node.h
index ec62cab..f79ba5a 100644
--- a/node.h
+++ b/node.h
@@ -19,7 +19,6 @@ typedef enum {
     NT_Paragraph,
     NT_UnorderedList,
     NT_OrderedList,
-    NT_BlockQuote,
 
     // Contained elements (cannot be bare)
     // text modifiers
@@ -71,5 +70,6 @@ typedef struct {
 Node* ParseNodes(Token* firstToken);
 char* NodeTypeString(NodeType t);
 char* ParagraphTypeString(ParagraphType t);
+Node* FreeNode(Node* node);
 
 #endif
diff --git a/sample.md b/sample.md
index 032e62b..92d8211 100644
--- a/sample.md
+++ b/sample.md
@@ -12,7 +12,6 @@ pellentesque. *Sit fusce.* At ligula dolor parturient sodales auctor. Egestas.
 
 this has some `inline 
  code` in it.
-
 > Block Quote thing.
 > Dictum pharetra nulla _aliquet tincidunt_ parturient netus gravida rutrum
 >
diff --git a/token.c b/token.c
index 014c3bf..c569954 100644
--- a/token.c
+++ b/token.c
@@ -13,23 +13,26 @@ char* printableOnly(char* input);
 char*
 TokenString(Token* t)
 {
-    //char* str = malloc(sizeof(char) * 1000);
-    snprintf(stringBuff, 1000, "[%d:%d] Type: %s Literal: '%s'",
+    char* printable = printableOnly(t->literal);
+    snprintf(stringBuff, STRING_BUFF_SIZE, "[%d:%d] Type: %s Literal: '%s'",
             t->line,
             t->column,
             TokenTypeString(t->type),
-            printableOnly(t->literal)
+            printable
             );
 
+    free(printable);
     return stringBuff;
 }
 
-void
+Token*
 FreeToken(Token* t)
 {
+    Token* next = t->next;
     if (t->type != TT_TRIPLEBACKTICK)
         free(t->literal);
     free(t);
+    return next;
 }
 
 char*
diff --git a/token.h b/token.h
index cee0e44..a98d70e 100644
--- a/token.h
+++ b/token.h
@@ -30,6 +30,6 @@ typedef struct Token {
 
 char* TokenString(Token* t);
 char* TokenTypeString(TokenType tt);
-void FreeToken(Token* t);
+Token* FreeToken(Token* t);
 
 #endif