From 9c172c52165c8f17d6d847d5a66aee263a4dd442 Mon Sep 17 00:00:00 2001 From: Zorchenhimer Date: Fri, 20 Oct 2023 20:22:11 -0400 Subject: [PATCH] Combine repeated whitespace tokens Whitespace tokens now contain all equal contiguous whitespace characters. The token list will no longer contain, eg, three TT_WHITESPACE tokens for three spaces and will instead have one TT_WHITESPACE token that has a length of three. --- lexer.c | 44 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/lexer.c b/lexer.c index 86344d0..ae92849 100644 --- a/lexer.c +++ b/lexer.c @@ -4,6 +4,7 @@ #include "lexer.h" +static char peekChar(Lexer* l); static void readChar(Lexer* l); static char* readIdentifier(Lexer* l); static char* readNumber(Lexer* l); @@ -13,6 +14,7 @@ static int isDigit(char c); static Token* newTickToken(Lexer* l); static Token* newToken(Lexer* l, TokenType tt); static Token* newIdentToken(Lexer* l, char* literal, TokenType tt); +static Token* newWhitespaceToken(Lexer* l); Lexer* NewLexer(const char* filename) @@ -99,7 +101,7 @@ NextToken(Lexer* l) break; case ' ': case '\t': - tok = newToken(l, TT_WHITESPACE); + tok = newWhitespaceToken(l); break; case '\r': readChar(l); @@ -133,6 +135,33 @@ NextToken(Lexer* l) return tok; } +static +Token* +newWhitespaceToken(Lexer* l) +{ + Token* tok = malloc(sizeof(Token)); + tok->line = l->line; + tok->column = l->column; + tok->type = TT_WHITESPACE; + + int position = l->position; + // grab the char so we can use this funciton for both + // spaces and tabs. + char ch = l->ch; + while (peekChar(l) == ch){ + readChar(l); + } + + int count = l->position - position+1; + tok->literal = malloc(sizeof(char)*count+1); + for (int i = 0; i < count; i++) { + tok->literal[i] = ch; + } + tok->literal[count] = '\0'; + tok->length = count; + return tok; +} + static char* readNumber(Lexer* l) @@ -167,6 +196,18 @@ readIdentifier(Lexer* l) return out; } +static +char +peekChar(Lexer* l) +{ + + if (l->readPosition >= l->rawLen) { + return '\0'; + } + + return l->rawFile[l->readPosition]; +} + static void readChar(Lexer* l) @@ -219,7 +260,6 @@ newTickToken(Lexer* l) // peek up to two more characters int i; for(i = 0; i < 3; i++) { - if (l->rawFile[l->position+i] != '`') { printf("next char isn't a backtick @ %d: 0x%02X '%c'\n", l->readPosition+i, l->rawFile[l->position+i], l->rawFile[l->position+i]); return newToken(l, TT_BACKTICK);