Initial commit

2021-07-14 11:15:46 -04:00 · 2021-07-14 11:15:46 -04:00 · 5e1d6ae207
commit 5e1d6ae207
12 changed files with 775 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,3 @@
 *.o
 *.txt
 readme
--- a/23
+++ b/23
@ -0,0 +1,23 @@
 .PHONY: run test run clean
 CC=gcc
 CFLAGS=-Wall -Wpedantic -Werror -std=c99
 OBJ=main.o lexer.o token.o node.o
 all: readme
 run: readme
 	./readme
 readme: ${OBJ}
 	${CC} ${CFLAGS} -o $@ $^
 #token.o: token.h token.c
 #	${CC} ${CFLAGS} -o $@ $<
 .c.o:
 	${CC} ${CFLAGS} -c -o $@ $<
 clean:
 	-rm *.o readme
--- a/lexer.c
+++ b/lexer.c
@ -0,0 +1,235 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "lexer.h"
 static void readChar(Lexer* l);
 static char* readIdentifier(Lexer* l);
 static char* readNumber(Lexer* l);
 static int isLetter(char c);
 static int isDigit(char c);
 static Token* newToken(Lexer* l, TokenType tt);
 static Token* newIdentToken(Lexer* l, char* literal, TokenType tt);
 Lexer*
 NewLexer(char* filename)
 {
    FILE* fp;
    fp = fopen(filename, "r");
    if (fp == NULL)
    {
        printf("Can't open the file for some reason\n");
        return NULL;
    }
    fseek(fp, 0, SEEK_END);
    int fileSize = ftell(fp);
    fseek(fp, 0, SEEK_SET);
    printf("fileSize: %d\n", fileSize);
    Lexer* state = malloc(sizeof(Lexer));
    state->rawFile = malloc((sizeof(char) * fileSize) + 1);
    state->rawLen = fileSize;
    size_t read = fread(state->rawFile, sizeof(char), fileSize, fp);
    if (read != fileSize)
    {
        printf("something borked.  only read %d bytes of %d\n", (int)read, fileSize);
        free(state->rawFile);
        free(state);
        return NULL;
    }
    fclose(fp);
    state->rawFile[fileSize] = '\0';
    state->line = 1;
    readChar(state);
    return state;
 }
 void
 FreeLexer(Lexer* l)
 {
    free(l->rawFile);
    free(l);
 }
 Token*
 NextToken(Lexer* l)
 {
    Token* tok;
    switch (l->ch) {
        case '#':
            tok = newToken(l, TT_HASH);
            break;
        case '*':
            tok = newToken(l, TT_ASTERISK);
            break;
        case '_':
            tok = newToken(l, TT_UNDERSCORE);
            break;
        case '-':
            tok = newToken(l, TT_DASH);
            break;
        case '.':
            tok = newToken(l, TT_PERIOD);
            break;
        case '`':
            tok = newToken(l, TT_BACKTICK);
            break;
        case '\0':
            tok = newToken(l, TT_EOF);
            break;
        case '\n':
            tok = newToken(l, TT_NEWLINE);
            l->line++;
            l->column = 0;
            break;
        case ' ':
        case '\t':
            tok = newToken(l, TT_WHITESPACE);
            break;
        case '\r':
            readChar(l);
            return NextToken(l); // lets GOOOOO
        default:
            if (isLetter(l->ch))
            {
                int start = l->column;
                char* literal = readIdentifier(l);
                tok = newIdentToken(l, literal, TT_WORD);
                tok->column = start;
                return tok;
            }
            else if (isDigit(l->ch))
            {
                int start = l->column;
                char* literal = readNumber(l);
                tok = newIdentToken(l, literal, TT_NUMBER);
                tok->column = start;
                return tok;
            }
            else
            {
                tok = newToken(l, TT_ILLEGAL);
            }
            //printf("Invalid token: %X\n", l->ch);
            //return NULL;
    }
    readChar(l);
    return tok;
 }
 static
 char*
 readNumber(Lexer* l)
 {
    int position = l->position;
    while (isDigit(l->ch))
    {
        readChar(l);
    }
    int len = (l->position - position);
    char* out = malloc(sizeof(char) * len + 1);
    memcpy(out, &l->rawFile[position], len);
    out[len] = '\0';
    return out;
 }
 static
 char*
 readIdentifier(Lexer* l)
 {
    int position = l->position;
    while (isLetter(l->ch))
    {
        readChar(l);
    }
    int len = (l->position - position);
    char* out = malloc(sizeof(char) * len + 1);
    memcpy(out, &l->rawFile[position], len);
    out[len] = '\0';
    return out;
 }
 static
 void
 readChar(Lexer* l)
 {
    l->column++;
    if (l->readPosition >= l->rawLen)
    {
        l->ch = 0;
    }
    else
    {
        l->ch = l->rawFile[l->readPosition];
    }
    l->position = l->readPosition;
    l->readPosition++;
 }
 void
 Parse(Lexer* l)
 {
 }
 int
 isLetter(char ch)
 {
    return (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ch == '_');
 }
 int
 isDigit(char ch)
 {
    return ('0' <= ch && ch <= '9');
 }
 void
 FreeToken(Token* t)
 {
    free(t->literal);
    free(t);
 }
 static
 Token*
 newToken(Lexer* l,
         TokenType tt)
 {
    Token* tok = malloc(sizeof(Token));
    char* nc = malloc(sizeof(char)+1);
    *nc = l->ch;
    nc[1] = '\0';
    tok->type = tt;
    tok->literal = nc;
    tok->line = l->line;
    tok->column = l->column;
    return tok;
 }
 static
 Token*
 newIdentToken(Lexer* l,
              char* literal,
              TokenType tt)
 {
    Token* tok = malloc(sizeof(Token));
    tok->type = tt;
    tok->literal = literal;
    tok->line = l->line;
    tok->column = l->column;
    return tok;
 }
--- a/lexer.h
+++ b/lexer.h
@ -0,0 +1,50 @@
 #include "token.h"
 #ifndef LEXER_H
 #define LEXER_H
 //typedef enum NodeType {
 //    NT_Root,
 //    NT_Header1,
 //    NT_Header2,
 //    NT_Header3,
 //    NT_ListItem,
 //    NT_OrderedListItem,
 //    NT_Paragraph,
 //    NT_PlainText,
 //    NT_BoldText,
 //    NT_UnderlineText,
 //    NT_InlineCode,
 //    NT_BlockCode,
 //} NodeType;
 typedef struct Lexer {
    char* rawFile;
    int rawLen;
    int position; // current index
    int readPosition; // next index
    char ch; // character under examination
    // values for current index
    int line;
    int column;
 } Lexer;
 //typedef struct Node {
 //    NodeType type;
 //    char RawText;
 //    int LineNumber;
 //
 //    //struct Node **ChildNodes;
 //    void** ChildNodes;
 //    int ChildCount;
 //} Node;
 Lexer* NewLexer(char* filename);
 Token* NextToken(Lexer* l);
 void ReadChar(Lexer* l);
 void Parse(Lexer* l);
 #endif
--- a/main.c
+++ b/main.c
@ -0,0 +1,99 @@
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include "main.h"
 #include "token.h"
 #include "lexer.h"
 #include "node.h"
 /*
 *  RawText ""
 *  LineNumber 0
 *  NodeType NT_Root
 *  ChildNodes
 *      RawText "# Header1"
 *      LineNumber 1
 *      NodeType NT_Header1
 *      ChildNodes
 *          {"Some text."}
 *
 *      RawText "## Header2"
 */
 /*
 * NodeType NT_Root
 * ChildNodes
 *      RawText "## Header2"
 *      ChildNodes
 *          paragraph
 *          ChildNodes
 *              {*bold text*}
 *              {_underlined text_}
 *          paragraph
 *
 *
 */
 //Node* ParseLine(char *buffer);
 void writeTokenFile(TokenList* tl);
 int
 main(int argc, const char** argv)
 {
    Lexer* l = NewLexer("sample.md");
    TokenList* current = malloc(sizeof(TokenList));
    TokenList* tl = current;//= malloc(sizeof(TokenList));
    current->token = NULL;
    TokenType tt;
    do
    {
        Token* t = NextToken(l);
        tt = t->type;
        current = TokenListAdd(current, t);
    }
    while(tt != TT_EOF);
    writeTokenFile(tl);
    ParseNodes(tl);
    printf("rawLen: %d position: %d readPosition: %d ch: %c line: %d column: %d\n",
            l->rawLen,
            l->position,
            l->readPosition,
            l->ch,
            l->line,
            l->column
    );
    return 0;
 }
 void
 writeTokenFile(TokenList* tl)
 {
    int count;
    FILE* fp = fopen("tokens.txt", "w");
    if (fp == NULL)
    {
        printf("unable to open output.txt\n");
        return;
    }
    TokenList* current = tl;
    for(count = 0; current->next != NULL; count++) {
        if (count == 0 && current->token == NULL)
        {
            printf("first token null\n");
        }
        else if (count == 0)
        {
            printf("%s\n", TokenString(current->token));
        }
        fprintf(fp, "%s\n", TokenString(current->token));
        current = current->next;
    }
    fclose(fp);
    printf("Token count: %d\n", count);
 }
--- a/main.h
+++ b/main.h
@ -0,0 +1,7 @@
 #ifndef MAIN_H
 #define MAIN_H
 #define MAXBUFFER 1024
 #endif
--- a/node.c
+++ b/node.c
@ -0,0 +1,104 @@
 #include <stdio.h>
 #include <string.h>
 #include "node.h"
 #define STRING_BUFF_SIZE 1024
 static char stringBuff[STRING_BUFF_SIZE];
 Node* parseHeader(TokenList** list);
 NodeList*
 ParseNodes(TokenList* list)
 {
    NodeList* nl = malloc(sizeof(NodeList));
    NodeList* currentNode = nl;
    currentNode->next = NULL;
    currentNode->node = NULL;
    TokenList* current = list;
    //while(current != NULL) {
    while (1) {
        switch (current->token->type) {
            case TT_NEWLINE:
                break;
            case TT_HASH:
                // start of header
                //Node* nodes;
                //nodes = parseHeader(current);
                currentNode->node = parseHeader(&current);
                break;
            default:
                break;
        }
        if (current->next == NULL) {
            //printf("next is null\n");
            break;
        }
        //printf("current = current->next;\n");
        current = current->next;
    }
    return nl;
 }
 Node*
 parseHeader(TokenList** list)
 {
    TokenList* l = *list;
    // Count the number of TT_HASH tokens
    int count = 1;
    while (l->next != NULL && l->next->token->type == TT_HASH)
    {
        count++;
        l = l->next;
    }
    if (l->next == NULL)
    {
        printf("Header missing text");
        return NULL;
    }
    l = l->next;
    // Trim leading whitespace
    while (l->next != NULL && l->token->type == TT_WHITESPACE)
    {
        l = l->next;
    }
    if (l->next == NULL)
    {
        printf("Header missing text");
        return NULL;
    }
    stringBuff[0] = '\0';
    while (1)
    {
        int bufSize = strlen(stringBuff);
        int litSize = strlen(l->token->literal);
        if (bufSize + litSize + 1 > STRING_BUFF_SIZE)
        {
            printf("Buffer not big enough!");
            return NULL;
        }
        strncat(stringBuff, l->token->literal, strlen(l->token->literal));
        if (l->next == NULL || l->next->token->type == TT_NEWLINE)
        {
            break;
        }
        l = l->next;
    }
    *list = l;
    printf("header hash count: %d\ntext: '%s'\n", count, stringBuff);
    return NULL;
 }
--- a/node.h
+++ b/node.h
@ -0,0 +1,43 @@
 #include <stdlib.h>
 #include "token.h"
 #ifndef NODE_H
 #define NODE_H
 typedef enum {
    NT_Header1,
    NT_Header2,
    NT_Header3,
    NT_Header4,
    NT_Paragraph,
    NT_UnorderedList,
    NT_OrderedList,
    NT_InlineCode,
    NT_BlockCode,
    NT_BlockQuote,
    NT_Bold,
    NT_Underline,
 } NodeType;
 struct NodeList;
 typedef struct Node {
    NodeType type;
    struct NodeList* children;
 } Node;
 typedef struct NodeList {
    struct Node* node;
    struct Node* next;
 } NodeList;
 typedef struct {
    NodeType type;
    struct Node* next;
    char* rawText;
 } HeaderNode;
 NodeList* ParseNodes(TokenList* list);
 #endif
--- a/readme.md
+++ b/readme.md
@ -0,0 +1,28 @@
 # Terminal Markdown Viewer
 ## Goals
 To render markdown in the terminal and use colors, font weights, etc, to
 display the document.
 ## Implemented syntax
 - Headers
 - Unordered lists
 - Ordered lists
 - Inline code
 - Block code
 - Block quote?
 - Bold
 - Underline
 ### maybies
 - Task list
 - Explicit colors
 - Inter-document links
 ### nopes
 - Tables
 - Syntax highlighting code
--- a/sample.md
+++ b/sample.md
@ -0,0 +1,46 @@
 # Header 1
 Some text.
 ## Header 2
 *bold text*
 _underlined text_
 Nostra sem bibendum ridiculus aenean condimentum sed eleifend et odio egestas
 pellentesque. *Sit fusce.* At ligula dolor parturient sodales auctor. Egestas.
 Dictum pharetra nulla _aliquet tincidunt_ parturient netus gravida rutrum
 rhoncus. Donec dis mollis ornare `bibendum sollicitudin` velit lectus inceptos.
 ```
 Laoreet arcu eget cubilia auctor vitae cursus lacus volutpat dui.
 ```
 ### Header 3
 - List item one.
 - List item two.
 - List item three.
 - List item four.
 1. Ordered list one
 1. Ordered list two
 1. Ordered list three
 1. Ordered list four
 - Toplevel one
    - Second level one
    - Second level two
 - Toplevel two
    - Second level one
        - Third level
    - Second level two
 1. Ordered toplevel one
    1. Ordered second level one
    1. Ordered second level two
 1. Ordered toplevel two
    1. Ordered second level one
        1. Ordered third level
    1. Ordered second level two
--- a/token.c
+++ b/token.c
@ -0,0 +1,99 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include "token.h"
 #define STRING_BUFF_SIZE 1024
 static char stringBuff[STRING_BUFF_SIZE];
 char* printableOnly(char* input);
 TokenList*
 TokenListAdd(TokenList* current, Token* next)
 {
    if (current->token == NULL)
    {
        printf("current->token == null\n");
        current->token = next;
        return current;
    }
    TokenList* nl = malloc(sizeof(TokenList));
    nl->token = next;
    current->next = nl;
    return nl;
 }
 char*
 TokenString(Token* t)
 {
    //char* str = malloc(sizeof(char) * 1000);
    snprintf(stringBuff, 1000, "[%d:%d] Type: %s Literal: '%s'",
            t->line,
            t->column,
            TokenTypeString(t->type),
            printableOnly(t->literal)
            );
    return stringBuff;
 }
 char*
 TokenTypeString(TokenType tt)
 {
    switch (tt) {
    case TT_ILLEGAL:
        return "TT_ILLEGAL";
    case TT_EOF:
        return "TT_EOF";
    case TT_HASH:
        return "TT_HASH";
    case TT_ASTERISK:
        return "TT_ASTERISK";
    case TT_UNDERSCORE:
        return "TT_UNDERSCORE";
    case TT_DASH:
        return "TT_DASH";
    case TT_PERIOD:
        return "TT_PERIOD";
    case TT_BACKTICK:
        return "TT_BACKTICK";
    case TT_WHITESPACE:
        return "TT_WHITESPACE";
    case TT_NEWLINE:
        return "TT_NEWLINE";
    case TT_WORD:
        return "TT_WORD";
    case TT_NUMBER:
        return "TT_NUMBER";
    }
    return "\0";
 }
 char*
 printableOnly(char* input)
 {
    char *str = malloc(sizeof(char) * ((strlen(input)*4)+1));
    int i, j;
    int len = strlen(input);
    for(i = 0, j = 0; i < len; i++, j++)
    {
        if(input[i] < 0x20 || input[i] > 0x7F)
        {
            // hex notation
            snprintf(&str[j], 5, "\\x%02X", input[i]);
            j+=3;
        }
        else
        {
            str[j] = input[i];
        }
    }
    str[j] = '\0';
    return str;
 }
--- a/token.h
+++ b/token.h
@ -0,0 +1,38 @@
 #ifndef TOKEN_H
 #define TOKEN_H
 typedef enum {
    TT_ILLEGAL,
    TT_EOF,
    TT_HASH, // #
    TT_ASTERISK,
    TT_UNDERSCORE,
    TT_DASH,
    TT_PERIOD,
    TT_BACKTICK,
    TT_WHITESPACE,
    TT_NEWLINE,
    TT_WORD,
    TT_NUMBER,
 } TokenType;
 typedef struct Token {
    TokenType type;
    char* literal;
    int line;
    int column;
    char* printBuff;
 } Token;
 typedef struct TokenList {
    Token* token;
    struct TokenList* next;
 } TokenList;
 TokenList* TokenListAdd(TokenList* current, Token* next);
 char* TokenString(Token* t);
 char* TokenTypeString(TokenType tt);
 #endif