Initial commit

2021-07-14 11:15:46 -04:00 · 2021-07-14 11:15:46 -04:00 · 5e1d6ae207
commit 5e1d6ae207
12 changed files with 775 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
+*.o
+*.txt
+readme
--- a/23
+++ b/23
@ -0,0 +1,23 @@
+.PHONY: run test run clean
+
+CC=gcc
+CFLAGS=-Wall -Wpedantic -Werror -std=c99
+
+OBJ=main.o lexer.o token.o node.o
+
+all: readme
+
+run: readme
+	./readme
+
+readme: ${OBJ}
+	${CC} ${CFLAGS} -o $@ $^
+
+#token.o: token.h token.c
+#	${CC} ${CFLAGS} -o $@ $<
+
+.c.o:
+	${CC} ${CFLAGS} -c -o $@ $<
+
+clean:
+	-rm *.o readme
--- a/lexer.c
+++ b/lexer.c
@ -0,0 +1,235 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "lexer.h"
+
+static void readChar(Lexer* l);
+static char* readIdentifier(Lexer* l);
+static char* readNumber(Lexer* l);
+static int isLetter(char c);
+static int isDigit(char c);
+
+static Token* newToken(Lexer* l, TokenType tt);
+static Token* newIdentToken(Lexer* l, char* literal, TokenType tt);
+
+Lexer*
+NewLexer(char* filename)
+{
+    FILE* fp;
+    fp = fopen(filename, "r");
+
+    if (fp == NULL)
+    {
+        printf("Can't open the file for some reason\n");
+        return NULL;
+    }
+
+    fseek(fp, 0, SEEK_END);
+    int fileSize = ftell(fp);
+    fseek(fp, 0, SEEK_SET);
+    printf("fileSize: %d\n", fileSize);
+
+    Lexer* state = malloc(sizeof(Lexer));
+    state->rawFile = malloc((sizeof(char) * fileSize) + 1);
+    state->rawLen = fileSize;
+
+    size_t read = fread(state->rawFile, sizeof(char), fileSize, fp);
+    if (read != fileSize)
+    {
+        printf("something borked.  only read %d bytes of %d\n", (int)read, fileSize);
+
+        free(state->rawFile);
+        free(state);
+
+        return NULL;
+    }
+    fclose(fp);
+
+    state->rawFile[fileSize] = '\0';
+    state->line = 1;
+
+    readChar(state);
+    return state;
+}
+
+void
+FreeLexer(Lexer* l)
+{
+    free(l->rawFile);
+    free(l);
+}
+
+Token*
+NextToken(Lexer* l)
+{
+    Token* tok;
+    switch (l->ch) {
+        case '#':
+            tok = newToken(l, TT_HASH);
+            break;
+        case '*':
+            tok = newToken(l, TT_ASTERISK);
+            break;
+        case '_':
+            tok = newToken(l, TT_UNDERSCORE);
+            break;
+        case '-':
+            tok = newToken(l, TT_DASH);
+            break;
+        case '.':
+            tok = newToken(l, TT_PERIOD);
+            break;
+        case '`':
+            tok = newToken(l, TT_BACKTICK);
+            break;
+        case '\0':
+            tok = newToken(l, TT_EOF);
+            break;
+        case '\n':
+            tok = newToken(l, TT_NEWLINE);
+            l->line++;
+            l->column = 0;
+            break;
+        case ' ':
+        case '\t':
+            tok = newToken(l, TT_WHITESPACE);
+            break;
+        case '\r':
+            readChar(l);
+            return NextToken(l); // lets GOOOOO
+        default:
+            if (isLetter(l->ch))
+            {
+                int start = l->column;
+                char* literal = readIdentifier(l);
+                tok = newIdentToken(l, literal, TT_WORD);
+                tok->column = start;
+                return tok;
+            }
+            else if (isDigit(l->ch))
+            {
+                int start = l->column;
+                char* literal = readNumber(l);
+                tok = newIdentToken(l, literal, TT_NUMBER);
+                tok->column = start;
+                return tok;
+            }
+            else
+            {
+                tok = newToken(l, TT_ILLEGAL);
+            }
+            //printf("Invalid token: %X\n", l->ch);
+            //return NULL;
+    }
+
+    readChar(l);
+    return tok;
+}
+
+static
+char*
+readNumber(Lexer* l)
+{
+    int position = l->position;
+    while (isDigit(l->ch))
+    {
+        readChar(l);
+    }
+
+    int len = (l->position - position);
+    char* out = malloc(sizeof(char) * len + 1);
+    memcpy(out, &l->rawFile[position], len);
+    out[len] = '\0';
+    return out;
+}
+
+static
+char*
+readIdentifier(Lexer* l)
+{
+    int position = l->position;
+    while (isLetter(l->ch))
+    {
+        readChar(l);
+    }
+
+    int len = (l->position - position);
+    char* out = malloc(sizeof(char) * len + 1);
+    memcpy(out, &l->rawFile[position], len);
+    out[len] = '\0';
+    return out;
+}
+
+
+static
+void
+readChar(Lexer* l)
+{
+    l->column++;
+    if (l->readPosition >= l->rawLen)
+    {
+        l->ch = 0;
+    }
+    else
+    {
+        l->ch = l->rawFile[l->readPosition];
+    }
+
+    l->position = l->readPosition;
+    l->readPosition++;
+}
+
+void
+Parse(Lexer* l)
+{
+}
+
+int
+isLetter(char ch)
+{
+    return (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_');
+}
+
+int
+isDigit(char ch)
+{
+    return ('0' <= ch && ch <= '9');
+}
+
+void
+FreeToken(Token* t)
+{
+    free(t->literal);
+    free(t);
+}
+
+static
+Token*
+newToken(Lexer* l,
+         TokenType tt)
+{
+    Token* tok = malloc(sizeof(Token));
+    char* nc = malloc(sizeof(char)+1);
+    *nc = l->ch;
+    nc[1] = '\0';
+    tok->type = tt;
+    tok->literal = nc;
+    tok->line = l->line;
+    tok->column = l->column;
+    return tok;
+}
+
+static
+Token*
+newIdentToken(Lexer* l,
+              char* literal,
+              TokenType tt)
+{
+    Token* tok = malloc(sizeof(Token));
+    tok->type = tt;
+    tok->literal = literal;
+    tok->line = l->line;
+    tok->column = l->column;
+    return tok;
+}
--- a/lexer.h
+++ b/lexer.h
@ -0,0 +1,50 @@
+
+#include "token.h"
+
+#ifndef LEXER_H
+#define LEXER_H
+
+//typedef enum NodeType {
+//    NT_Root,
+//    NT_Header1,
+//    NT_Header2,
+//    NT_Header3,
+//    NT_ListItem,
+//    NT_OrderedListItem,
+//    NT_Paragraph,
+//    NT_PlainText,
+//    NT_BoldText,
+//    NT_UnderlineText,
+//    NT_InlineCode,
+//    NT_BlockCode,
+//} NodeType;
+
+typedef struct Lexer {
+    char* rawFile;
+    int rawLen;
+    int position; // current index
+    int readPosition; // next index
+    char ch; // character under examination
+
+    // values for current index
+    int line;
+    int column;
+
+} Lexer;
+
+//typedef struct Node {
+//    NodeType type;
+//    char RawText;
+//    int LineNumber;
+//
+//    //struct Node **ChildNodes;
+//    void** ChildNodes;
+//    int ChildCount;
+//} Node;
+
+Lexer* NewLexer(char* filename);
+Token* NextToken(Lexer* l);
+void ReadChar(Lexer* l);
+void Parse(Lexer* l);
+
+#endif
--- a/main.c
+++ b/main.c
@ -0,0 +1,99 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include "main.h"
+#include "token.h"
+#include "lexer.h"
+#include "node.h"
+
+/*
+ *  RawText ""
+ *  LineNumber 0
+ *  NodeType NT_Root
+ *  ChildNodes
+ *      RawText "# Header1"
+ *      LineNumber 1
+ *      NodeType NT_Header1
+ *      ChildNodes
+ *          {"Some text."}
+ *
+ *      RawText "## Header2"
+ */
+
+/*
+ * NodeType NT_Root
+ * ChildNodes
+ *      RawText "## Header2"
+ *      ChildNodes
+ *          paragraph
+ *          ChildNodes
+ *              {*bold text*}
+ *              {_underlined text_}
+ *          paragraph
+ *
+ *
+ */
+//Node* ParseLine(char *buffer);
+
+void writeTokenFile(TokenList* tl);
+
+int
+main(int argc, const char** argv)
+{
+    Lexer* l = NewLexer("sample.md");
+    TokenList* current = malloc(sizeof(TokenList));
+    TokenList* tl = current;//= malloc(sizeof(TokenList));
+    current->token = NULL;
+
+    TokenType tt;
+    do
+    {
+        Token* t = NextToken(l);
+        tt = t->type;
+        current = TokenListAdd(current, t);
+    }
+    while(tt != TT_EOF);
+
+    writeTokenFile(tl);
+    ParseNodes(tl);
+
+    printf("rawLen: %d position: %d readPosition: %d ch: %c line: %d column: %d\n",
+            l->rawLen,
+            l->position,
+            l->readPosition,
+            l->ch,
+            l->line,
+            l->column
+    );
+    return 0;
+}
+
+void
+writeTokenFile(TokenList* tl)
+{
+    int count;
+    FILE* fp = fopen("tokens.txt", "w");
+    if (fp == NULL)
+    {
+        printf("unable to open output.txt\n");
+        return;
+    }
+
+    TokenList* current = tl;
+    for(count = 0; current->next != NULL; count++) {
+        if (count == 0 && current->token == NULL)
+        {
+            printf("first token null\n");
+        }
+        else if (count == 0)
+        {
+            printf("%s\n", TokenString(current->token));
+        }
+        fprintf(fp, "%s\n", TokenString(current->token));
+        current = current->next;
+    }
+    fclose(fp);
+
+    printf("Token count: %d\n", count);
+}
--- a/main.h
+++ b/main.h
@ -0,0 +1,7 @@
+
+#ifndef MAIN_H
+#define MAIN_H
+
+#define MAXBUFFER 1024
+
+#endif
--- a/node.c
+++ b/node.c
@ -0,0 +1,104 @@
+#include <stdio.h>
+#include <string.h>
+
+#include "node.h"
+
+#define STRING_BUFF_SIZE 1024
+
+static char stringBuff[STRING_BUFF_SIZE];
+
+Node* parseHeader(TokenList** list);
+
+NodeList*
+ParseNodes(TokenList* list)
+{
+    NodeList* nl = malloc(sizeof(NodeList));
+    NodeList* currentNode = nl;
+
+    currentNode->next = NULL;
+    currentNode->node = NULL;
+
+    TokenList* current = list;
+
+    //while(current != NULL) {
+    while (1) {
+        switch (current->token->type) {
+            case TT_NEWLINE:
+                break;
+            case TT_HASH:
+                // start of header
+                //Node* nodes;
+                //nodes = parseHeader(current);
+                currentNode->node = parseHeader(&current);
+                break;
+            default:
+                break;
+        }
+
+        if (current->next == NULL) {
+            //printf("next is null\n");
+            break;
+        }
+        //printf("current = current->next;\n");
+        current = current->next;
+    }
+
+    return nl;
+}
+
+Node*
+parseHeader(TokenList** list)
+{
+    TokenList* l = *list;
+    // Count the number of TT_HASH tokens
+    int count = 1;
+    while (l->next != NULL && l->next->token->type == TT_HASH)
+    {
+        count++;
+        l = l->next;
+    }
+
+    if (l->next == NULL)
+    {
+        printf("Header missing text");
+        return NULL;
+    }
+    l = l->next;
+
+    // Trim leading whitespace
+    while (l->next != NULL && l->token->type == TT_WHITESPACE)
+    {
+        l = l->next;
+    }
+
+    if (l->next == NULL)
+    {
+        printf("Header missing text");
+        return NULL;
+    }
+
+    stringBuff[0] = '\0';
+    while (1)
+    {
+        int bufSize = strlen(stringBuff);
+        int litSize = strlen(l->token->literal);
+        if (bufSize + litSize + 1 > STRING_BUFF_SIZE)
+        {
+            printf("Buffer not big enough!");
+            return NULL;
+        }
+        strncat(stringBuff, l->token->literal, strlen(l->token->literal));
+
+        if (l->next == NULL || l->next->token->type == TT_NEWLINE)
+        {
+            break;
+        }
+
+        l = l->next;
+    }
+
+    *list = l;
+    printf("header hash count: %d\ntext: '%s'\n", count, stringBuff);
+    return NULL;
+}
+
--- a/node.h
+++ b/node.h
@ -0,0 +1,43 @@
+#include <stdlib.h>
+
+#include "token.h"
+
+#ifndef NODE_H
+#define NODE_H
+
+typedef enum {
+    NT_Header1,
+    NT_Header2,
+    NT_Header3,
+    NT_Header4,
+    NT_Paragraph,
+    NT_UnorderedList,
+    NT_OrderedList,
+    NT_InlineCode,
+    NT_BlockCode,
+    NT_BlockQuote,
+    NT_Bold,
+    NT_Underline,
+} NodeType;
+
+struct NodeList;
+
+typedef struct Node {
+    NodeType type;
+    struct NodeList* children;
+} Node;
+
+typedef struct NodeList {
+    struct Node* node;
+    struct Node* next;
+} NodeList;
+
+typedef struct {
+    NodeType type;
+    struct Node* next;
+    char* rawText;
+} HeaderNode;
+
+NodeList* ParseNodes(TokenList* list);
+
+#endif
--- a/readme.md
+++ b/readme.md
@ -0,0 +1,28 @@
+# Terminal Markdown Viewer
+
+## Goals
+
+To render markdown in the terminal and use colors, font weights, etc, to
+display the document.
+
+## Implemented syntax
+
+- Headers
+- Unordered lists
+- Ordered lists
+- Inline code
+- Block code
+- Block quote?
+- Bold
+- Underline
+
+### maybies
+
+- Task list
+- Explicit colors
+- Inter-document links
+
+### nopes
+
+- Tables
+- Syntax highlighting code
--- a/sample.md
+++ b/sample.md
@ -0,0 +1,46 @@
+# Header 1
+
+Some text.
+
+## Header 2
+
+*bold text*
+_underlined text_
+
+Nostra sem bibendum ridiculus aenean condimentum sed eleifend et odio egestas
+pellentesque. *Sit fusce.* At ligula dolor parturient sodales auctor. Egestas.
+
+Dictum pharetra nulla _aliquet tincidunt_ parturient netus gravida rutrum
+rhoncus. Donec dis mollis ornare `bibendum sollicitudin` velit lectus inceptos.
+
+```
+Laoreet arcu eget cubilia auctor vitae cursus lacus volutpat dui.
+```
+
+### Header 3
+
+- List item one.
+- List item two.
+- List item three.
+- List item four.
+
+1. Ordered list one
+1. Ordered list two
+1. Ordered list three
+1. Ordered list four
+
+- Toplevel one
+    - Second level one
+    - Second level two
+- Toplevel two
+    - Second level one
+        - Third level
+    - Second level two
+
+1. Ordered toplevel one
+    1. Ordered second level one
+    1. Ordered second level two
+1. Ordered toplevel two
+    1. Ordered second level one
+        1. Ordered third level
+    1. Ordered second level two
--- a/token.c
+++ b/token.c
@ -0,0 +1,99 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "token.h"
+
+#define STRING_BUFF_SIZE 1024
+
+static char stringBuff[STRING_BUFF_SIZE];
+
+char* printableOnly(char* input);
+
+TokenList*
+TokenListAdd(TokenList* current, Token* next)
+{
+    if (current->token == NULL)
+    {
+        printf("current->token == null\n");
+        current->token = next;
+        return current;
+    }
+
+    TokenList* nl = malloc(sizeof(TokenList));
+    nl->token = next;
+    current->next = nl;
+    return nl;
+}
+
+char*
+TokenString(Token* t)
+{
+    //char* str = malloc(sizeof(char) * 1000);
+    snprintf(stringBuff, 1000, "[%d:%d] Type: %s Literal: '%s'",
+            t->line,
+            t->column,
+            TokenTypeString(t->type),
+            printableOnly(t->literal)
+            );
+
+    return stringBuff;
+}
+
+char*
+TokenTypeString(TokenType tt)
+{
+    switch (tt) {
+    case TT_ILLEGAL:
+        return "TT_ILLEGAL";
+    case TT_EOF:
+        return "TT_EOF";
+    case TT_HASH:
+        return "TT_HASH";
+    case TT_ASTERISK:
+        return "TT_ASTERISK";
+    case TT_UNDERSCORE:
+        return "TT_UNDERSCORE";
+    case TT_DASH:
+        return "TT_DASH";
+    case TT_PERIOD:
+        return "TT_PERIOD";
+    case TT_BACKTICK:
+        return "TT_BACKTICK";
+    case TT_WHITESPACE:
+        return "TT_WHITESPACE";
+    case TT_NEWLINE:
+        return "TT_NEWLINE";
+    case TT_WORD:
+        return "TT_WORD";
+    case TT_NUMBER:
+        return "TT_NUMBER";
+    }
+
+    return "\0";
+}
+
+char*
+printableOnly(char* input)
+{
+    char *str = malloc(sizeof(char) * ((strlen(input)*4)+1));
+    int i, j;
+    int len = strlen(input);
+    for(i = 0, j = 0; i < len; i++, j++)
+    {
+        if(input[i] < 0x20 || input[i] > 0x7F)
+        {
+            // hex notation
+            snprintf(&str[j], 5, "\\x%02X", input[i]);
+            j+=3;
+        }
+        else
+        {
+            str[j] = input[i];
+        }
+    }
+
+    str[j] = '\0';
+    return str;
+}
+
--- a/token.h
+++ b/token.h
@ -0,0 +1,38 @@
+
+#ifndef TOKEN_H
+#define TOKEN_H
+
+typedef enum {
+    TT_ILLEGAL,
+    TT_EOF,
+    TT_HASH, // #
+    TT_ASTERISK,
+    TT_UNDERSCORE,
+    TT_DASH,
+    TT_PERIOD,
+    TT_BACKTICK,
+    TT_WHITESPACE,
+    TT_NEWLINE,
+    TT_WORD,
+    TT_NUMBER,
+} TokenType;
+
+typedef struct Token {
+    TokenType type;
+    char* literal;
+    int line;
+    int column;
+    char* printBuff;
+} Token;
+
+typedef struct TokenList {
+    Token* token;
+    struct TokenList* next;
+} TokenList;
+
+TokenList* TokenListAdd(TokenList* current, Token* next);
+
+char* TokenString(Token* t);
+char* TokenTypeString(TokenType tt);
+
+#endif