Combine repeated whitespace tokens

Whitespace tokens now contain all equal contiguous whitespace characters. The token list will no longer contain, eg, three TT_WHITESPACE tokens for three spaces and will instead have one TT_WHITESPACE token that has a length of three.
2023-10-20 20:22:11 -04:00 · 2023-10-20 20:22:11 -04:00 · 9c172c5216
parent da1ad03661
commit 9c172c5216
1 changed files with 42 additions and 2 deletions
--- a/lexer.c
+++ b/lexer.c
@ -4,6 +4,7 @@

 #include "lexer.h"

+static char peekChar(Lexer* l);
 static void readChar(Lexer* l);
 static char* readIdentifier(Lexer* l);
 static char* readNumber(Lexer* l);
@ -13,6 +14,7 @@ static int isDigit(char c);
 static Token* newTickToken(Lexer* l);
 static Token* newToken(Lexer* l, TokenType tt);
 static Token* newIdentToken(Lexer* l, char* literal, TokenType tt);
+static Token* newWhitespaceToken(Lexer* l);

 Lexer*
 NewLexer(const char* filename)
@ -99,7 +101,7 @@ NextToken(Lexer* l)
            break;
        case ' ':
        case '\t':
-            tok = newToken(l, TT_WHITESPACE);
+            tok = newWhitespaceToken(l);
            break;
        case '\r':
            readChar(l);
@ -133,6 +135,33 @@ NextToken(Lexer* l)
    return tok;
 }

+static
+Token*
+newWhitespaceToken(Lexer* l)
+{
+    Token* tok = malloc(sizeof(Token));
+    tok->line = l->line;
+    tok->column = l->column;
+    tok->type = TT_WHITESPACE;
+
+    int position = l->position;
+    // grab the char so we can use this funciton for both
+    // spaces and tabs.
+    char ch = l->ch;
+    while (peekChar(l) == ch){
+        readChar(l);
+    }
+
+    int count = l->position - position+1;
+    tok->literal = malloc(sizeof(char)*count+1);
+    for (int i = 0; i < count; i++) {
+        tok->literal[i] = ch;
+    }
+    tok->literal[count] = '\0';
+    tok->length = count;
+    return tok;
+}
+
 static
 char*
 readNumber(Lexer* l)
@ -167,6 +196,18 @@ readIdentifier(Lexer* l)
    return out;
 }

+static
+char
+peekChar(Lexer* l)
+{
+
+    if (l->readPosition >= l->rawLen) {
+        return '\0';
+    }
+
+    return l->rawFile[l->readPosition];
+}
+
 static
 void
 readChar(Lexer* l)
@ -219,7 +260,6 @@ newTickToken(Lexer* l)
    // peek up to two more characters
    int i;
    for(i = 0; i < 3; i++) {
-
        if (l->rawFile[l->position+i] != '`') {
            printf("next char isn't a backtick @ %d: 0x%02X '%c'\n", l->readPosition+i, l->rawFile[l->position+i], l->rawFile[l->position+i]);
            return newToken(l, TT_BACKTICK);