Combine repeated whitespace tokens
Whitespace tokens now contain all equal contiguous whitespace characters. The token list will no longer contain, eg, three TT_WHITESPACE tokens for three spaces and will instead have one TT_WHITESPACE token that has a length of three.
This commit is contained in:
parent
da1ad03661
commit
9c172c5216
44
lexer.c
44
lexer.c
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#include "lexer.h"
|
#include "lexer.h"
|
||||||
|
|
||||||
|
static char peekChar(Lexer* l);
|
||||||
static void readChar(Lexer* l);
|
static void readChar(Lexer* l);
|
||||||
static char* readIdentifier(Lexer* l);
|
static char* readIdentifier(Lexer* l);
|
||||||
static char* readNumber(Lexer* l);
|
static char* readNumber(Lexer* l);
|
||||||
|
@ -13,6 +14,7 @@ static int isDigit(char c);
|
||||||
static Token* newTickToken(Lexer* l);
|
static Token* newTickToken(Lexer* l);
|
||||||
static Token* newToken(Lexer* l, TokenType tt);
|
static Token* newToken(Lexer* l, TokenType tt);
|
||||||
static Token* newIdentToken(Lexer* l, char* literal, TokenType tt);
|
static Token* newIdentToken(Lexer* l, char* literal, TokenType tt);
|
||||||
|
static Token* newWhitespaceToken(Lexer* l);
|
||||||
|
|
||||||
Lexer*
|
Lexer*
|
||||||
NewLexer(const char* filename)
|
NewLexer(const char* filename)
|
||||||
|
@ -99,7 +101,7 @@ NextToken(Lexer* l)
|
||||||
break;
|
break;
|
||||||
case ' ':
|
case ' ':
|
||||||
case '\t':
|
case '\t':
|
||||||
tok = newToken(l, TT_WHITESPACE);
|
tok = newWhitespaceToken(l);
|
||||||
break;
|
break;
|
||||||
case '\r':
|
case '\r':
|
||||||
readChar(l);
|
readChar(l);
|
||||||
|
@ -133,6 +135,33 @@ NextToken(Lexer* l)
|
||||||
return tok;
|
return tok;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
Token*
|
||||||
|
newWhitespaceToken(Lexer* l)
|
||||||
|
{
|
||||||
|
Token* tok = malloc(sizeof(Token));
|
||||||
|
tok->line = l->line;
|
||||||
|
tok->column = l->column;
|
||||||
|
tok->type = TT_WHITESPACE;
|
||||||
|
|
||||||
|
int position = l->position;
|
||||||
|
// grab the char so we can use this funciton for both
|
||||||
|
// spaces and tabs.
|
||||||
|
char ch = l->ch;
|
||||||
|
while (peekChar(l) == ch){
|
||||||
|
readChar(l);
|
||||||
|
}
|
||||||
|
|
||||||
|
int count = l->position - position+1;
|
||||||
|
tok->literal = malloc(sizeof(char)*count+1);
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
tok->literal[i] = ch;
|
||||||
|
}
|
||||||
|
tok->literal[count] = '\0';
|
||||||
|
tok->length = count;
|
||||||
|
return tok;
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
char*
|
char*
|
||||||
readNumber(Lexer* l)
|
readNumber(Lexer* l)
|
||||||
|
@ -167,6 +196,18 @@ readIdentifier(Lexer* l)
|
||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
char
|
||||||
|
peekChar(Lexer* l)
|
||||||
|
{
|
||||||
|
|
||||||
|
if (l->readPosition >= l->rawLen) {
|
||||||
|
return '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
return l->rawFile[l->readPosition];
|
||||||
|
}
|
||||||
|
|
||||||
static
|
static
|
||||||
void
|
void
|
||||||
readChar(Lexer* l)
|
readChar(Lexer* l)
|
||||||
|
@ -219,7 +260,6 @@ newTickToken(Lexer* l)
|
||||||
// peek up to two more characters
|
// peek up to two more characters
|
||||||
int i;
|
int i;
|
||||||
for(i = 0; i < 3; i++) {
|
for(i = 0; i < 3; i++) {
|
||||||
|
|
||||||
if (l->rawFile[l->position+i] != '`') {
|
if (l->rawFile[l->position+i] != '`') {
|
||||||
printf("next char isn't a backtick @ %d: 0x%02X '%c'\n", l->readPosition+i, l->rawFile[l->position+i], l->rawFile[l->position+i]);
|
printf("next char isn't a backtick @ %d: 0x%02X '%c'\n", l->readPosition+i, l->rawFile[l->position+i], l->rawFile[l->position+i]);
|
||||||
return newToken(l, TT_BACKTICK);
|
return newToken(l, TT_BACKTICK);
|
||||||
|
|
Loading…
Reference in New Issue