Combine repeated whitespace tokens

Whitespace tokens now contain all equal contiguous whitespace
characters.  The token list will no longer contain, eg, three
TT_WHITESPACE tokens for three spaces and will instead have one
TT_WHITESPACE token that has a length of three.
This commit is contained in:
Zorchenhimer 2023-10-20 20:22:11 -04:00
parent da1ad03661
commit 9c172c5216
1 changed files with 42 additions and 2 deletions

44
lexer.c
View File

@ -4,6 +4,7 @@
#include "lexer.h" #include "lexer.h"
static char peekChar(Lexer* l);
static void readChar(Lexer* l); static void readChar(Lexer* l);
static char* readIdentifier(Lexer* l); static char* readIdentifier(Lexer* l);
static char* readNumber(Lexer* l); static char* readNumber(Lexer* l);
@ -13,6 +14,7 @@ static int isDigit(char c);
static Token* newTickToken(Lexer* l); static Token* newTickToken(Lexer* l);
static Token* newToken(Lexer* l, TokenType tt); static Token* newToken(Lexer* l, TokenType tt);
static Token* newIdentToken(Lexer* l, char* literal, TokenType tt); static Token* newIdentToken(Lexer* l, char* literal, TokenType tt);
static Token* newWhitespaceToken(Lexer* l);
Lexer* Lexer*
NewLexer(const char* filename) NewLexer(const char* filename)
@ -99,7 +101,7 @@ NextToken(Lexer* l)
break; break;
case ' ': case ' ':
case '\t': case '\t':
tok = newToken(l, TT_WHITESPACE); tok = newWhitespaceToken(l);
break; break;
case '\r': case '\r':
readChar(l); readChar(l);
@ -133,6 +135,33 @@ NextToken(Lexer* l)
return tok; return tok;
} }
static
Token*
newWhitespaceToken(Lexer* l)
{
Token* tok = malloc(sizeof(Token));
tok->line = l->line;
tok->column = l->column;
tok->type = TT_WHITESPACE;
int position = l->position;
// grab the char so we can use this funciton for both
// spaces and tabs.
char ch = l->ch;
while (peekChar(l) == ch){
readChar(l);
}
int count = l->position - position+1;
tok->literal = malloc(sizeof(char)*count+1);
for (int i = 0; i < count; i++) {
tok->literal[i] = ch;
}
tok->literal[count] = '\0';
tok->length = count;
return tok;
}
static static
char* char*
readNumber(Lexer* l) readNumber(Lexer* l)
@ -167,6 +196,18 @@ readIdentifier(Lexer* l)
return out; return out;
} }
static
char
peekChar(Lexer* l)
{
if (l->readPosition >= l->rawLen) {
return '\0';
}
return l->rawFile[l->readPosition];
}
static static
void void
readChar(Lexer* l) readChar(Lexer* l)
@ -219,7 +260,6 @@ newTickToken(Lexer* l)
// peek up to two more characters // peek up to two more characters
int i; int i;
for(i = 0; i < 3; i++) { for(i = 0; i < 3; i++) {
if (l->rawFile[l->position+i] != '`') { if (l->rawFile[l->position+i] != '`') {
printf("next char isn't a backtick @ %d: 0x%02X '%c'\n", l->readPosition+i, l->rawFile[l->position+i], l->rawFile[l->position+i]); printf("next char isn't a backtick @ %d: 0x%02X '%c'\n", l->readPosition+i, l->rawFile[l->position+i], l->rawFile[l->position+i]);
return newToken(l, TT_BACKTICK); return newToken(l, TT_BACKTICK);