commit 5e1d6ae207241def1a53495bcd635a2d8769db12 Author: Zorchenhimer Date: Wed Jul 14 11:15:46 2021 -0400 Initial commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8b0c2b6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.o +*.txt +readme diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..45b72f7 --- /dev/null +++ b/Makefile @@ -0,0 +1,23 @@ +.PHONY: run test run clean + +CC=gcc +CFLAGS=-Wall -Wpedantic -Werror -std=c99 + +OBJ=main.o lexer.o token.o node.o + +all: readme + +run: readme + ./readme + +readme: ${OBJ} + ${CC} ${CFLAGS} -o $@ $^ + +#token.o: token.h token.c +# ${CC} ${CFLAGS} -o $@ $< + +.c.o: + ${CC} ${CFLAGS} -c -o $@ $< + +clean: + -rm *.o readme diff --git a/lexer.c b/lexer.c new file mode 100644 index 0000000..e33d038 --- /dev/null +++ b/lexer.c @@ -0,0 +1,235 @@ +#include +#include +#include + +#include "lexer.h" + +static void readChar(Lexer* l); +static char* readIdentifier(Lexer* l); +static char* readNumber(Lexer* l); +static int isLetter(char c); +static int isDigit(char c); + +static Token* newToken(Lexer* l, TokenType tt); +static Token* newIdentToken(Lexer* l, char* literal, TokenType tt); + +Lexer* +NewLexer(char* filename) +{ + FILE* fp; + fp = fopen(filename, "r"); + + if (fp == NULL) + { + printf("Can't open the file for some reason\n"); + return NULL; + } + + fseek(fp, 0, SEEK_END); + int fileSize = ftell(fp); + fseek(fp, 0, SEEK_SET); + printf("fileSize: %d\n", fileSize); + + Lexer* state = malloc(sizeof(Lexer)); + state->rawFile = malloc((sizeof(char) * fileSize) + 1); + state->rawLen = fileSize; + + size_t read = fread(state->rawFile, sizeof(char), fileSize, fp); + if (read != fileSize) + { + printf("something borked. only read %d bytes of %d\n", (int)read, fileSize); + + free(state->rawFile); + free(state); + + return NULL; + } + fclose(fp); + + state->rawFile[fileSize] = '\0'; + state->line = 1; + + readChar(state); + return state; +} + +void +FreeLexer(Lexer* l) +{ + free(l->rawFile); + free(l); +} + +Token* +NextToken(Lexer* l) +{ + Token* tok; + switch (l->ch) { + case '#': + tok = newToken(l, TT_HASH); + break; + case '*': + tok = newToken(l, TT_ASTERISK); + break; + case '_': + tok = newToken(l, TT_UNDERSCORE); + break; + case '-': + tok = newToken(l, TT_DASH); + break; + case '.': + tok = newToken(l, TT_PERIOD); + break; + case '`': + tok = newToken(l, TT_BACKTICK); + break; + case '\0': + tok = newToken(l, TT_EOF); + break; + case '\n': + tok = newToken(l, TT_NEWLINE); + l->line++; + l->column = 0; + break; + case ' ': + case '\t': + tok = newToken(l, TT_WHITESPACE); + break; + case '\r': + readChar(l); + return NextToken(l); // lets GOOOOO + default: + if (isLetter(l->ch)) + { + int start = l->column; + char* literal = readIdentifier(l); + tok = newIdentToken(l, literal, TT_WORD); + tok->column = start; + return tok; + } + else if (isDigit(l->ch)) + { + int start = l->column; + char* literal = readNumber(l); + tok = newIdentToken(l, literal, TT_NUMBER); + tok->column = start; + return tok; + } + else + { + tok = newToken(l, TT_ILLEGAL); + } + //printf("Invalid token: %X\n", l->ch); + //return NULL; + } + + readChar(l); + return tok; +} + +static +char* +readNumber(Lexer* l) +{ + int position = l->position; + while (isDigit(l->ch)) + { + readChar(l); + } + + int len = (l->position - position); + char* out = malloc(sizeof(char) * len + 1); + memcpy(out, &l->rawFile[position], len); + out[len] = '\0'; + return out; +} + +static +char* +readIdentifier(Lexer* l) +{ + int position = l->position; + while (isLetter(l->ch)) + { + readChar(l); + } + + int len = (l->position - position); + char* out = malloc(sizeof(char) * len + 1); + memcpy(out, &l->rawFile[position], len); + out[len] = '\0'; + return out; +} + + +static +void +readChar(Lexer* l) +{ + l->column++; + if (l->readPosition >= l->rawLen) + { + l->ch = 0; + } + else + { + l->ch = l->rawFile[l->readPosition]; + } + + l->position = l->readPosition; + l->readPosition++; +} + +void +Parse(Lexer* l) +{ +} + +int +isLetter(char ch) +{ + return (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_'); +} + +int +isDigit(char ch) +{ + return ('0' <= ch && ch <= '9'); +} + +void +FreeToken(Token* t) +{ + free(t->literal); + free(t); +} + +static +Token* +newToken(Lexer* l, + TokenType tt) +{ + Token* tok = malloc(sizeof(Token)); + char* nc = malloc(sizeof(char)+1); + *nc = l->ch; + nc[1] = '\0'; + tok->type = tt; + tok->literal = nc; + tok->line = l->line; + tok->column = l->column; + return tok; +} + +static +Token* +newIdentToken(Lexer* l, + char* literal, + TokenType tt) +{ + Token* tok = malloc(sizeof(Token)); + tok->type = tt; + tok->literal = literal; + tok->line = l->line; + tok->column = l->column; + return tok; +} diff --git a/lexer.h b/lexer.h new file mode 100644 index 0000000..a1edc7b --- /dev/null +++ b/lexer.h @@ -0,0 +1,50 @@ + +#include "token.h" + +#ifndef LEXER_H +#define LEXER_H + +//typedef enum NodeType { +// NT_Root, +// NT_Header1, +// NT_Header2, +// NT_Header3, +// NT_ListItem, +// NT_OrderedListItem, +// NT_Paragraph, +// NT_PlainText, +// NT_BoldText, +// NT_UnderlineText, +// NT_InlineCode, +// NT_BlockCode, +//} NodeType; + +typedef struct Lexer { + char* rawFile; + int rawLen; + int position; // current index + int readPosition; // next index + char ch; // character under examination + + // values for current index + int line; + int column; + +} Lexer; + +//typedef struct Node { +// NodeType type; +// char RawText; +// int LineNumber; +// +// //struct Node **ChildNodes; +// void** ChildNodes; +// int ChildCount; +//} Node; + +Lexer* NewLexer(char* filename); +Token* NextToken(Lexer* l); +void ReadChar(Lexer* l); +void Parse(Lexer* l); + +#endif diff --git a/main.c b/main.c new file mode 100644 index 0000000..4ba37ac --- /dev/null +++ b/main.c @@ -0,0 +1,99 @@ +#include +#include +#include + +#include "main.h" +#include "token.h" +#include "lexer.h" +#include "node.h" + +/* + * RawText "" + * LineNumber 0 + * NodeType NT_Root + * ChildNodes + * RawText "# Header1" + * LineNumber 1 + * NodeType NT_Header1 + * ChildNodes + * {"Some text."} + * + * RawText "## Header2" + */ + +/* + * NodeType NT_Root + * ChildNodes + * RawText "## Header2" + * ChildNodes + * paragraph + * ChildNodes + * {*bold text*} + * {_underlined text_} + * paragraph + * + * + */ +//Node* ParseLine(char *buffer); + +void writeTokenFile(TokenList* tl); + +int +main(int argc, const char** argv) +{ + Lexer* l = NewLexer("sample.md"); + TokenList* current = malloc(sizeof(TokenList)); + TokenList* tl = current;//= malloc(sizeof(TokenList)); + current->token = NULL; + + TokenType tt; + do + { + Token* t = NextToken(l); + tt = t->type; + current = TokenListAdd(current, t); + } + while(tt != TT_EOF); + + writeTokenFile(tl); + ParseNodes(tl); + + printf("rawLen: %d position: %d readPosition: %d ch: %c line: %d column: %d\n", + l->rawLen, + l->position, + l->readPosition, + l->ch, + l->line, + l->column + ); + return 0; +} + +void +writeTokenFile(TokenList* tl) +{ + int count; + FILE* fp = fopen("tokens.txt", "w"); + if (fp == NULL) + { + printf("unable to open output.txt\n"); + return; + } + + TokenList* current = tl; + for(count = 0; current->next != NULL; count++) { + if (count == 0 && current->token == NULL) + { + printf("first token null\n"); + } + else if (count == 0) + { + printf("%s\n", TokenString(current->token)); + } + fprintf(fp, "%s\n", TokenString(current->token)); + current = current->next; + } + fclose(fp); + + printf("Token count: %d\n", count); +} diff --git a/main.h b/main.h new file mode 100644 index 0000000..6d49149 --- /dev/null +++ b/main.h @@ -0,0 +1,7 @@ + +#ifndef MAIN_H +#define MAIN_H + +#define MAXBUFFER 1024 + +#endif diff --git a/node.c b/node.c new file mode 100644 index 0000000..5dd5537 --- /dev/null +++ b/node.c @@ -0,0 +1,104 @@ +#include +#include + +#include "node.h" + +#define STRING_BUFF_SIZE 1024 + +static char stringBuff[STRING_BUFF_SIZE]; + +Node* parseHeader(TokenList** list); + +NodeList* +ParseNodes(TokenList* list) +{ + NodeList* nl = malloc(sizeof(NodeList)); + NodeList* currentNode = nl; + + currentNode->next = NULL; + currentNode->node = NULL; + + TokenList* current = list; + + //while(current != NULL) { + while (1) { + switch (current->token->type) { + case TT_NEWLINE: + break; + case TT_HASH: + // start of header + //Node* nodes; + //nodes = parseHeader(current); + currentNode->node = parseHeader(¤t); + break; + default: + break; + } + + if (current->next == NULL) { + //printf("next is null\n"); + break; + } + //printf("current = current->next;\n"); + current = current->next; + } + + return nl; +} + +Node* +parseHeader(TokenList** list) +{ + TokenList* l = *list; + // Count the number of TT_HASH tokens + int count = 1; + while (l->next != NULL && l->next->token->type == TT_HASH) + { + count++; + l = l->next; + } + + if (l->next == NULL) + { + printf("Header missing text"); + return NULL; + } + l = l->next; + + // Trim leading whitespace + while (l->next != NULL && l->token->type == TT_WHITESPACE) + { + l = l->next; + } + + if (l->next == NULL) + { + printf("Header missing text"); + return NULL; + } + + stringBuff[0] = '\0'; + while (1) + { + int bufSize = strlen(stringBuff); + int litSize = strlen(l->token->literal); + if (bufSize + litSize + 1 > STRING_BUFF_SIZE) + { + printf("Buffer not big enough!"); + return NULL; + } + strncat(stringBuff, l->token->literal, strlen(l->token->literal)); + + if (l->next == NULL || l->next->token->type == TT_NEWLINE) + { + break; + } + + l = l->next; + } + + *list = l; + printf("header hash count: %d\ntext: '%s'\n", count, stringBuff); + return NULL; +} + diff --git a/node.h b/node.h new file mode 100644 index 0000000..12a6210 --- /dev/null +++ b/node.h @@ -0,0 +1,43 @@ +#include + +#include "token.h" + +#ifndef NODE_H +#define NODE_H + +typedef enum { + NT_Header1, + NT_Header2, + NT_Header3, + NT_Header4, + NT_Paragraph, + NT_UnorderedList, + NT_OrderedList, + NT_InlineCode, + NT_BlockCode, + NT_BlockQuote, + NT_Bold, + NT_Underline, +} NodeType; + +struct NodeList; + +typedef struct Node { + NodeType type; + struct NodeList* children; +} Node; + +typedef struct NodeList { + struct Node* node; + struct Node* next; +} NodeList; + +typedef struct { + NodeType type; + struct Node* next; + char* rawText; +} HeaderNode; + +NodeList* ParseNodes(TokenList* list); + +#endif diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..9e245ad --- /dev/null +++ b/readme.md @@ -0,0 +1,28 @@ +# Terminal Markdown Viewer + +## Goals + +To render markdown in the terminal and use colors, font weights, etc, to +display the document. + +## Implemented syntax + +- Headers +- Unordered lists +- Ordered lists +- Inline code +- Block code +- Block quote? +- Bold +- Underline + +### maybies + +- Task list +- Explicit colors +- Inter-document links + +### nopes + +- Tables +- Syntax highlighting code diff --git a/sample.md b/sample.md new file mode 100644 index 0000000..a0e8947 --- /dev/null +++ b/sample.md @@ -0,0 +1,46 @@ +# Header 1 + +Some text. + +## Header 2 + +*bold text* +_underlined text_ + +Nostra sem bibendum ridiculus aenean condimentum sed eleifend et odio egestas +pellentesque. *Sit fusce.* At ligula dolor parturient sodales auctor. Egestas. + +Dictum pharetra nulla _aliquet tincidunt_ parturient netus gravida rutrum +rhoncus. Donec dis mollis ornare `bibendum sollicitudin` velit lectus inceptos. + +``` +Laoreet arcu eget cubilia auctor vitae cursus lacus volutpat dui. +``` + +### Header 3 + +- List item one. +- List item two. +- List item three. +- List item four. + +1. Ordered list one +1. Ordered list two +1. Ordered list three +1. Ordered list four + +- Toplevel one + - Second level one + - Second level two +- Toplevel two + - Second level one + - Third level + - Second level two + +1. Ordered toplevel one + 1. Ordered second level one + 1. Ordered second level two +1. Ordered toplevel two + 1. Ordered second level one + 1. Ordered third level + 1. Ordered second level two diff --git a/token.c b/token.c new file mode 100644 index 0000000..b559dce --- /dev/null +++ b/token.c @@ -0,0 +1,99 @@ +#include +#include +#include + +#include "token.h" + +#define STRING_BUFF_SIZE 1024 + +static char stringBuff[STRING_BUFF_SIZE]; + +char* printableOnly(char* input); + +TokenList* +TokenListAdd(TokenList* current, Token* next) +{ + if (current->token == NULL) + { + printf("current->token == null\n"); + current->token = next; + return current; + } + + TokenList* nl = malloc(sizeof(TokenList)); + nl->token = next; + current->next = nl; + return nl; +} + +char* +TokenString(Token* t) +{ + //char* str = malloc(sizeof(char) * 1000); + snprintf(stringBuff, 1000, "[%d:%d] Type: %s Literal: '%s'", + t->line, + t->column, + TokenTypeString(t->type), + printableOnly(t->literal) + ); + + return stringBuff; +} + +char* +TokenTypeString(TokenType tt) +{ + switch (tt) { + case TT_ILLEGAL: + return "TT_ILLEGAL"; + case TT_EOF: + return "TT_EOF"; + case TT_HASH: + return "TT_HASH"; + case TT_ASTERISK: + return "TT_ASTERISK"; + case TT_UNDERSCORE: + return "TT_UNDERSCORE"; + case TT_DASH: + return "TT_DASH"; + case TT_PERIOD: + return "TT_PERIOD"; + case TT_BACKTICK: + return "TT_BACKTICK"; + case TT_WHITESPACE: + return "TT_WHITESPACE"; + case TT_NEWLINE: + return "TT_NEWLINE"; + case TT_WORD: + return "TT_WORD"; + case TT_NUMBER: + return "TT_NUMBER"; + } + + return "\0"; +} + +char* +printableOnly(char* input) +{ + char *str = malloc(sizeof(char) * ((strlen(input)*4)+1)); + int i, j; + int len = strlen(input); + for(i = 0, j = 0; i < len; i++, j++) + { + if(input[i] < 0x20 || input[i] > 0x7F) + { + // hex notation + snprintf(&str[j], 5, "\\x%02X", input[i]); + j+=3; + } + else + { + str[j] = input[i]; + } + } + + str[j] = '\0'; + return str; +} + diff --git a/token.h b/token.h new file mode 100644 index 0000000..fef4d0e --- /dev/null +++ b/token.h @@ -0,0 +1,38 @@ + +#ifndef TOKEN_H +#define TOKEN_H + +typedef enum { + TT_ILLEGAL, + TT_EOF, + TT_HASH, // # + TT_ASTERISK, + TT_UNDERSCORE, + TT_DASH, + TT_PERIOD, + TT_BACKTICK, + TT_WHITESPACE, + TT_NEWLINE, + TT_WORD, + TT_NUMBER, +} TokenType; + +typedef struct Token { + TokenType type; + char* literal; + int line; + int column; + char* printBuff; +} Token; + +typedef struct TokenList { + Token* token; + struct TokenList* next; +} TokenList; + +TokenList* TokenListAdd(TokenList* current, Token* next); + +char* TokenString(Token* t); +char* TokenTypeString(TokenType tt); + +#endif