236 lines
4.5 KiB
C
236 lines
4.5 KiB
C
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
#include "lexer.h"
|
||
|
|
||
|
static void readChar(Lexer* l);
|
||
|
static char* readIdentifier(Lexer* l);
|
||
|
static char* readNumber(Lexer* l);
|
||
|
static int isLetter(char c);
|
||
|
static int isDigit(char c);
|
||
|
|
||
|
static Token* newToken(Lexer* l, TokenType tt);
|
||
|
static Token* newIdentToken(Lexer* l, char* literal, TokenType tt);
|
||
|
|
||
|
Lexer*
|
||
|
NewLexer(char* filename)
|
||
|
{
|
||
|
FILE* fp;
|
||
|
fp = fopen(filename, "r");
|
||
|
|
||
|
if (fp == NULL)
|
||
|
{
|
||
|
printf("Can't open the file for some reason\n");
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
fseek(fp, 0, SEEK_END);
|
||
|
int fileSize = ftell(fp);
|
||
|
fseek(fp, 0, SEEK_SET);
|
||
|
printf("fileSize: %d\n", fileSize);
|
||
|
|
||
|
Lexer* state = malloc(sizeof(Lexer));
|
||
|
state->rawFile = malloc((sizeof(char) * fileSize) + 1);
|
||
|
state->rawLen = fileSize;
|
||
|
|
||
|
size_t read = fread(state->rawFile, sizeof(char), fileSize, fp);
|
||
|
if (read != fileSize)
|
||
|
{
|
||
|
printf("something borked. only read %d bytes of %d\n", (int)read, fileSize);
|
||
|
|
||
|
free(state->rawFile);
|
||
|
free(state);
|
||
|
|
||
|
return NULL;
|
||
|
}
|
||
|
fclose(fp);
|
||
|
|
||
|
state->rawFile[fileSize] = '\0';
|
||
|
state->line = 1;
|
||
|
|
||
|
readChar(state);
|
||
|
return state;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
FreeLexer(Lexer* l)
|
||
|
{
|
||
|
free(l->rawFile);
|
||
|
free(l);
|
||
|
}
|
||
|
|
||
|
Token*
|
||
|
NextToken(Lexer* l)
|
||
|
{
|
||
|
Token* tok;
|
||
|
switch (l->ch) {
|
||
|
case '#':
|
||
|
tok = newToken(l, TT_HASH);
|
||
|
break;
|
||
|
case '*':
|
||
|
tok = newToken(l, TT_ASTERISK);
|
||
|
break;
|
||
|
case '_':
|
||
|
tok = newToken(l, TT_UNDERSCORE);
|
||
|
break;
|
||
|
case '-':
|
||
|
tok = newToken(l, TT_DASH);
|
||
|
break;
|
||
|
case '.':
|
||
|
tok = newToken(l, TT_PERIOD);
|
||
|
break;
|
||
|
case '`':
|
||
|
tok = newToken(l, TT_BACKTICK);
|
||
|
break;
|
||
|
case '\0':
|
||
|
tok = newToken(l, TT_EOF);
|
||
|
break;
|
||
|
case '\n':
|
||
|
tok = newToken(l, TT_NEWLINE);
|
||
|
l->line++;
|
||
|
l->column = 0;
|
||
|
break;
|
||
|
case ' ':
|
||
|
case '\t':
|
||
|
tok = newToken(l, TT_WHITESPACE);
|
||
|
break;
|
||
|
case '\r':
|
||
|
readChar(l);
|
||
|
return NextToken(l); // lets GOOOOO
|
||
|
default:
|
||
|
if (isLetter(l->ch))
|
||
|
{
|
||
|
int start = l->column;
|
||
|
char* literal = readIdentifier(l);
|
||
|
tok = newIdentToken(l, literal, TT_WORD);
|
||
|
tok->column = start;
|
||
|
return tok;
|
||
|
}
|
||
|
else if (isDigit(l->ch))
|
||
|
{
|
||
|
int start = l->column;
|
||
|
char* literal = readNumber(l);
|
||
|
tok = newIdentToken(l, literal, TT_NUMBER);
|
||
|
tok->column = start;
|
||
|
return tok;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
tok = newToken(l, TT_ILLEGAL);
|
||
|
}
|
||
|
//printf("Invalid token: %X\n", l->ch);
|
||
|
//return NULL;
|
||
|
}
|
||
|
|
||
|
readChar(l);
|
||
|
return tok;
|
||
|
}
|
||
|
|
||
|
static
|
||
|
char*
|
||
|
readNumber(Lexer* l)
|
||
|
{
|
||
|
int position = l->position;
|
||
|
while (isDigit(l->ch))
|
||
|
{
|
||
|
readChar(l);
|
||
|
}
|
||
|
|
||
|
int len = (l->position - position);
|
||
|
char* out = malloc(sizeof(char) * len + 1);
|
||
|
memcpy(out, &l->rawFile[position], len);
|
||
|
out[len] = '\0';
|
||
|
return out;
|
||
|
}
|
||
|
|
||
|
static
|
||
|
char*
|
||
|
readIdentifier(Lexer* l)
|
||
|
{
|
||
|
int position = l->position;
|
||
|
while (isLetter(l->ch))
|
||
|
{
|
||
|
readChar(l);
|
||
|
}
|
||
|
|
||
|
int len = (l->position - position);
|
||
|
char* out = malloc(sizeof(char) * len + 1);
|
||
|
memcpy(out, &l->rawFile[position], len);
|
||
|
out[len] = '\0';
|
||
|
return out;
|
||
|
}
|
||
|
|
||
|
|
||
|
static
|
||
|
void
|
||
|
readChar(Lexer* l)
|
||
|
{
|
||
|
l->column++;
|
||
|
if (l->readPosition >= l->rawLen)
|
||
|
{
|
||
|
l->ch = 0;
|
||
|
}
|
||
|
else
|
||
|
{
|
||
|
l->ch = l->rawFile[l->readPosition];
|
||
|
}
|
||
|
|
||
|
l->position = l->readPosition;
|
||
|
l->readPosition++;
|
||
|
}
|
||
|
|
||
|
void
|
||
|
Parse(Lexer* l)
|
||
|
{
|
||
|
}
|
||
|
|
||
|
int
|
||
|
isLetter(char ch)
|
||
|
{
|
||
|
return (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_');
|
||
|
}
|
||
|
|
||
|
int
|
||
|
isDigit(char ch)
|
||
|
{
|
||
|
return ('0' <= ch && ch <= '9');
|
||
|
}
|
||
|
|
||
|
void
|
||
|
FreeToken(Token* t)
|
||
|
{
|
||
|
free(t->literal);
|
||
|
free(t);
|
||
|
}
|
||
|
|
||
|
static
|
||
|
Token*
|
||
|
newToken(Lexer* l,
|
||
|
TokenType tt)
|
||
|
{
|
||
|
Token* tok = malloc(sizeof(Token));
|
||
|
char* nc = malloc(sizeof(char)+1);
|
||
|
*nc = l->ch;
|
||
|
nc[1] = '\0';
|
||
|
tok->type = tt;
|
||
|
tok->literal = nc;
|
||
|
tok->line = l->line;
|
||
|
tok->column = l->column;
|
||
|
return tok;
|
||
|
}
|
||
|
|
||
|
static
|
||
|
Token*
|
||
|
newIdentToken(Lexer* l,
|
||
|
char* literal,
|
||
|
TokenType tt)
|
||
|
{
|
||
|
Token* tok = malloc(sizeof(Token));
|
||
|
tok->type = tt;
|
||
|
tok->literal = literal;
|
||
|
tok->line = l->line;
|
||
|
tok->column = l->column;
|
||
|
return tok;
|
||
|
}
|