Initial commit

This commit is contained in:
Zorchenhimer 2021-07-14 11:15:46 -04:00
commit 5e1d6ae207
12 changed files with 775 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
*.o
*.txt
readme

23
Makefile Normal file
View File

@ -0,0 +1,23 @@
.PHONY: run test run clean
CC=gcc
CFLAGS=-Wall -Wpedantic -Werror -std=c99
OBJ=main.o lexer.o token.o node.o
all: readme
run: readme
./readme
readme: ${OBJ}
${CC} ${CFLAGS} -o $@ $^
#token.o: token.h token.c
# ${CC} ${CFLAGS} -o $@ $<
.c.o:
${CC} ${CFLAGS} -c -o $@ $<
clean:
-rm *.o readme

235
lexer.c Normal file
View File

@ -0,0 +1,235 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lexer.h"
static void readChar(Lexer* l);
static char* readIdentifier(Lexer* l);
static char* readNumber(Lexer* l);
static int isLetter(char c);
static int isDigit(char c);
static Token* newToken(Lexer* l, TokenType tt);
static Token* newIdentToken(Lexer* l, char* literal, TokenType tt);
Lexer*
NewLexer(char* filename)
{
FILE* fp;
fp = fopen(filename, "r");
if (fp == NULL)
{
printf("Can't open the file for some reason\n");
return NULL;
}
fseek(fp, 0, SEEK_END);
int fileSize = ftell(fp);
fseek(fp, 0, SEEK_SET);
printf("fileSize: %d\n", fileSize);
Lexer* state = malloc(sizeof(Lexer));
state->rawFile = malloc((sizeof(char) * fileSize) + 1);
state->rawLen = fileSize;
size_t read = fread(state->rawFile, sizeof(char), fileSize, fp);
if (read != fileSize)
{
printf("something borked. only read %d bytes of %d\n", (int)read, fileSize);
free(state->rawFile);
free(state);
return NULL;
}
fclose(fp);
state->rawFile[fileSize] = '\0';
state->line = 1;
readChar(state);
return state;
}
void
FreeLexer(Lexer* l)
{
free(l->rawFile);
free(l);
}
Token*
NextToken(Lexer* l)
{
Token* tok;
switch (l->ch) {
case '#':
tok = newToken(l, TT_HASH);
break;
case '*':
tok = newToken(l, TT_ASTERISK);
break;
case '_':
tok = newToken(l, TT_UNDERSCORE);
break;
case '-':
tok = newToken(l, TT_DASH);
break;
case '.':
tok = newToken(l, TT_PERIOD);
break;
case '`':
tok = newToken(l, TT_BACKTICK);
break;
case '\0':
tok = newToken(l, TT_EOF);
break;
case '\n':
tok = newToken(l, TT_NEWLINE);
l->line++;
l->column = 0;
break;
case ' ':
case '\t':
tok = newToken(l, TT_WHITESPACE);
break;
case '\r':
readChar(l);
return NextToken(l); // lets GOOOOO
default:
if (isLetter(l->ch))
{
int start = l->column;
char* literal = readIdentifier(l);
tok = newIdentToken(l, literal, TT_WORD);
tok->column = start;
return tok;
}
else if (isDigit(l->ch))
{
int start = l->column;
char* literal = readNumber(l);
tok = newIdentToken(l, literal, TT_NUMBER);
tok->column = start;
return tok;
}
else
{
tok = newToken(l, TT_ILLEGAL);
}
//printf("Invalid token: %X\n", l->ch);
//return NULL;
}
readChar(l);
return tok;
}
static
char*
readNumber(Lexer* l)
{
int position = l->position;
while (isDigit(l->ch))
{
readChar(l);
}
int len = (l->position - position);
char* out = malloc(sizeof(char) * len + 1);
memcpy(out, &l->rawFile[position], len);
out[len] = '\0';
return out;
}
static
char*
readIdentifier(Lexer* l)
{
int position = l->position;
while (isLetter(l->ch))
{
readChar(l);
}
int len = (l->position - position);
char* out = malloc(sizeof(char) * len + 1);
memcpy(out, &l->rawFile[position], len);
out[len] = '\0';
return out;
}
static
void
readChar(Lexer* l)
{
l->column++;
if (l->readPosition >= l->rawLen)
{
l->ch = 0;
}
else
{
l->ch = l->rawFile[l->readPosition];
}
l->position = l->readPosition;
l->readPosition++;
}
void
Parse(Lexer* l)
{
}
int
isLetter(char ch)
{
return (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_');
}
int
isDigit(char ch)
{
return ('0' <= ch && ch <= '9');
}
void
FreeToken(Token* t)
{
free(t->literal);
free(t);
}
static
Token*
newToken(Lexer* l,
TokenType tt)
{
Token* tok = malloc(sizeof(Token));
char* nc = malloc(sizeof(char)+1);
*nc = l->ch;
nc[1] = '\0';
tok->type = tt;
tok->literal = nc;
tok->line = l->line;
tok->column = l->column;
return tok;
}
static
Token*
newIdentToken(Lexer* l,
char* literal,
TokenType tt)
{
Token* tok = malloc(sizeof(Token));
tok->type = tt;
tok->literal = literal;
tok->line = l->line;
tok->column = l->column;
return tok;
}

50
lexer.h Normal file
View File

@ -0,0 +1,50 @@
#include "token.h"
#ifndef LEXER_H
#define LEXER_H
//typedef enum NodeType {
// NT_Root,
// NT_Header1,
// NT_Header2,
// NT_Header3,
// NT_ListItem,
// NT_OrderedListItem,
// NT_Paragraph,
// NT_PlainText,
// NT_BoldText,
// NT_UnderlineText,
// NT_InlineCode,
// NT_BlockCode,
//} NodeType;
typedef struct Lexer {
char* rawFile;
int rawLen;
int position; // current index
int readPosition; // next index
char ch; // character under examination
// values for current index
int line;
int column;
} Lexer;
//typedef struct Node {
// NodeType type;
// char RawText;
// int LineNumber;
//
// //struct Node **ChildNodes;
// void** ChildNodes;
// int ChildCount;
//} Node;
Lexer* NewLexer(char* filename);
Token* NextToken(Lexer* l);
void ReadChar(Lexer* l);
void Parse(Lexer* l);
#endif

99
main.c Normal file
View File

@ -0,0 +1,99 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "main.h"
#include "token.h"
#include "lexer.h"
#include "node.h"
/*
* RawText ""
* LineNumber 0
* NodeType NT_Root
* ChildNodes
* RawText "# Header1"
* LineNumber 1
* NodeType NT_Header1
* ChildNodes
* {"Some text."}
*
* RawText "## Header2"
*/
/*
* NodeType NT_Root
* ChildNodes
* RawText "## Header2"
* ChildNodes
* paragraph
* ChildNodes
* {*bold text*}
* {_underlined text_}
* paragraph
*
*
*/
//Node* ParseLine(char *buffer);
void writeTokenFile(TokenList* tl);
int
main(int argc, const char** argv)
{
Lexer* l = NewLexer("sample.md");
TokenList* current = malloc(sizeof(TokenList));
TokenList* tl = current;//= malloc(sizeof(TokenList));
current->token = NULL;
TokenType tt;
do
{
Token* t = NextToken(l);
tt = t->type;
current = TokenListAdd(current, t);
}
while(tt != TT_EOF);
writeTokenFile(tl);
ParseNodes(tl);
printf("rawLen: %d position: %d readPosition: %d ch: %c line: %d column: %d\n",
l->rawLen,
l->position,
l->readPosition,
l->ch,
l->line,
l->column
);
return 0;
}
void
writeTokenFile(TokenList* tl)
{
int count;
FILE* fp = fopen("tokens.txt", "w");
if (fp == NULL)
{
printf("unable to open output.txt\n");
return;
}
TokenList* current = tl;
for(count = 0; current->next != NULL; count++) {
if (count == 0 && current->token == NULL)
{
printf("first token null\n");
}
else if (count == 0)
{
printf("%s\n", TokenString(current->token));
}
fprintf(fp, "%s\n", TokenString(current->token));
current = current->next;
}
fclose(fp);
printf("Token count: %d\n", count);
}

7
main.h Normal file
View File

@ -0,0 +1,7 @@
#ifndef MAIN_H
#define MAIN_H
#define MAXBUFFER 1024
#endif

104
node.c Normal file
View File

@ -0,0 +1,104 @@
#include <stdio.h>
#include <string.h>
#include "node.h"
#define STRING_BUFF_SIZE 1024
static char stringBuff[STRING_BUFF_SIZE];
Node* parseHeader(TokenList** list);
NodeList*
ParseNodes(TokenList* list)
{
NodeList* nl = malloc(sizeof(NodeList));
NodeList* currentNode = nl;
currentNode->next = NULL;
currentNode->node = NULL;
TokenList* current = list;
//while(current != NULL) {
while (1) {
switch (current->token->type) {
case TT_NEWLINE:
break;
case TT_HASH:
// start of header
//Node* nodes;
//nodes = parseHeader(current);
currentNode->node = parseHeader(&current);
break;
default:
break;
}
if (current->next == NULL) {
//printf("next is null\n");
break;
}
//printf("current = current->next;\n");
current = current->next;
}
return nl;
}
Node*
parseHeader(TokenList** list)
{
TokenList* l = *list;
// Count the number of TT_HASH tokens
int count = 1;
while (l->next != NULL && l->next->token->type == TT_HASH)
{
count++;
l = l->next;
}
if (l->next == NULL)
{
printf("Header missing text");
return NULL;
}
l = l->next;
// Trim leading whitespace
while (l->next != NULL && l->token->type == TT_WHITESPACE)
{
l = l->next;
}
if (l->next == NULL)
{
printf("Header missing text");
return NULL;
}
stringBuff[0] = '\0';
while (1)
{
int bufSize = strlen(stringBuff);
int litSize = strlen(l->token->literal);
if (bufSize + litSize + 1 > STRING_BUFF_SIZE)
{
printf("Buffer not big enough!");
return NULL;
}
strncat(stringBuff, l->token->literal, strlen(l->token->literal));
if (l->next == NULL || l->next->token->type == TT_NEWLINE)
{
break;
}
l = l->next;
}
*list = l;
printf("header hash count: %d\ntext: '%s'\n", count, stringBuff);
return NULL;
}

43
node.h Normal file
View File

@ -0,0 +1,43 @@
#include <stdlib.h>
#include "token.h"
#ifndef NODE_H
#define NODE_H
typedef enum {
NT_Header1,
NT_Header2,
NT_Header3,
NT_Header4,
NT_Paragraph,
NT_UnorderedList,
NT_OrderedList,
NT_InlineCode,
NT_BlockCode,
NT_BlockQuote,
NT_Bold,
NT_Underline,
} NodeType;
struct NodeList;
typedef struct Node {
NodeType type;
struct NodeList* children;
} Node;
typedef struct NodeList {
struct Node* node;
struct Node* next;
} NodeList;
typedef struct {
NodeType type;
struct Node* next;
char* rawText;
} HeaderNode;
NodeList* ParseNodes(TokenList* list);
#endif

28
readme.md Normal file
View File

@ -0,0 +1,28 @@
# Terminal Markdown Viewer
## Goals
To render markdown in the terminal and use colors, font weights, etc, to
display the document.
## Implemented syntax
- Headers
- Unordered lists
- Ordered lists
- Inline code
- Block code
- Block quote?
- Bold
- Underline
### maybies
- Task list
- Explicit colors
- Inter-document links
### nopes
- Tables
- Syntax highlighting code

46
sample.md Normal file
View File

@ -0,0 +1,46 @@
# Header 1
Some text.
## Header 2
*bold text*
_underlined text_
Nostra sem bibendum ridiculus aenean condimentum sed eleifend et odio egestas
pellentesque. *Sit fusce.* At ligula dolor parturient sodales auctor. Egestas.
Dictum pharetra nulla _aliquet tincidunt_ parturient netus gravida rutrum
rhoncus. Donec dis mollis ornare `bibendum sollicitudin` velit lectus inceptos.
```
Laoreet arcu eget cubilia auctor vitae cursus lacus volutpat dui.
```
### Header 3
- List item one.
- List item two.
- List item three.
- List item four.
1. Ordered list one
1. Ordered list two
1. Ordered list three
1. Ordered list four
- Toplevel one
- Second level one
- Second level two
- Toplevel two
- Second level one
- Third level
- Second level two
1. Ordered toplevel one
1. Ordered second level one
1. Ordered second level two
1. Ordered toplevel two
1. Ordered second level one
1. Ordered third level
1. Ordered second level two

99
token.c Normal file
View File

@ -0,0 +1,99 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "token.h"
#define STRING_BUFF_SIZE 1024
static char stringBuff[STRING_BUFF_SIZE];
char* printableOnly(char* input);
TokenList*
TokenListAdd(TokenList* current, Token* next)
{
if (current->token == NULL)
{
printf("current->token == null\n");
current->token = next;
return current;
}
TokenList* nl = malloc(sizeof(TokenList));
nl->token = next;
current->next = nl;
return nl;
}
char*
TokenString(Token* t)
{
//char* str = malloc(sizeof(char) * 1000);
snprintf(stringBuff, 1000, "[%d:%d] Type: %s Literal: '%s'",
t->line,
t->column,
TokenTypeString(t->type),
printableOnly(t->literal)
);
return stringBuff;
}
char*
TokenTypeString(TokenType tt)
{
switch (tt) {
case TT_ILLEGAL:
return "TT_ILLEGAL";
case TT_EOF:
return "TT_EOF";
case TT_HASH:
return "TT_HASH";
case TT_ASTERISK:
return "TT_ASTERISK";
case TT_UNDERSCORE:
return "TT_UNDERSCORE";
case TT_DASH:
return "TT_DASH";
case TT_PERIOD:
return "TT_PERIOD";
case TT_BACKTICK:
return "TT_BACKTICK";
case TT_WHITESPACE:
return "TT_WHITESPACE";
case TT_NEWLINE:
return "TT_NEWLINE";
case TT_WORD:
return "TT_WORD";
case TT_NUMBER:
return "TT_NUMBER";
}
return "\0";
}
char*
printableOnly(char* input)
{
char *str = malloc(sizeof(char) * ((strlen(input)*4)+1));
int i, j;
int len = strlen(input);
for(i = 0, j = 0; i < len; i++, j++)
{
if(input[i] < 0x20 || input[i] > 0x7F)
{
// hex notation
snprintf(&str[j], 5, "\\x%02X", input[i]);
j+=3;
}
else
{
str[j] = input[i];
}
}
str[j] = '\0';
return str;
}

38
token.h Normal file
View File

@ -0,0 +1,38 @@
#ifndef TOKEN_H
#define TOKEN_H
typedef enum {
TT_ILLEGAL,
TT_EOF,
TT_HASH, // #
TT_ASTERISK,
TT_UNDERSCORE,
TT_DASH,
TT_PERIOD,
TT_BACKTICK,
TT_WHITESPACE,
TT_NEWLINE,
TT_WORD,
TT_NUMBER,
} TokenType;
typedef struct Token {
TokenType type;
char* literal;
int line;
int column;
char* printBuff;
} Token;
typedef struct TokenList {
Token* token;
struct TokenList* next;
} TokenList;
TokenList* TokenListAdd(TokenList* current, Token* next);
char* TokenString(Token* t);
char* TokenTypeString(TokenType tt);
#endif