278 lines
4.3 KiB
Go
278 lines
4.3 KiB
Go
package dasmlbl
|
|
|
|
import (
|
|
"unicode"
|
|
"unicode/utf8"
|
|
"strings"
|
|
"fmt"
|
|
)
|
|
|
|
var (
|
|
eof LexItem = LexItem{lex_EOF, "EOF"}
|
|
unicode_EOF rune = '\uFFFC'
|
|
)
|
|
|
|
type stateFn func(l *Lexer) stateFn
|
|
|
|
type Lexer struct {
|
|
input string
|
|
start int
|
|
pos int
|
|
width int
|
|
items chan LexItem
|
|
}
|
|
|
|
func NewLexer(input string) (*Lexer, chan LexItem) {
|
|
l := &Lexer{
|
|
input: input,
|
|
items: make(chan LexItem),
|
|
}
|
|
//fmt.Println("new lex")
|
|
return l, l.items
|
|
}
|
|
|
|
func (l *Lexer) Run() {
|
|
//fmt.Println("lex run()")
|
|
for state := lexStateStart; state != nil; {
|
|
state = state(l)
|
|
}
|
|
close(l.items)
|
|
}
|
|
|
|
func (l *Lexer) emit(t lexItemType) {
|
|
itm := LexItem{t, l.input[l.start:l.pos]}
|
|
//fmt.Println(">", itm)
|
|
l.items <- itm
|
|
l.start = l.pos
|
|
}
|
|
|
|
func (l *Lexer) next() rune {
|
|
if l.pos >= len(l.input) {
|
|
l.width = 0
|
|
return unicode_EOF
|
|
}
|
|
|
|
r, size := utf8.DecodeRuneInString(l.input[l.pos:])
|
|
if size == 0 {
|
|
panic(fmt.Sprintf("zero width at %d", l.pos))
|
|
}
|
|
l.width = size
|
|
l.pos += l.width
|
|
return r
|
|
}
|
|
|
|
func (l *Lexer) peek() rune {
|
|
if l.pos >= len(l.input) {
|
|
l.width = 0
|
|
return unicode_EOF
|
|
}
|
|
|
|
r, size := utf8.DecodeRuneInString(l.input[l.pos:])
|
|
if size == 0 {
|
|
panic(fmt.Sprintf("zero width at %d", l.pos))
|
|
}
|
|
return r
|
|
}
|
|
|
|
func (l *Lexer) backup() {
|
|
l.pos -= l.width
|
|
}
|
|
|
|
func (l *Lexer) ignore() {
|
|
l.start = l.pos
|
|
}
|
|
|
|
func (l *Lexer) accept(valid string) bool {
|
|
if strings.IndexRune(valid, l.next()) >= 0 {
|
|
return true
|
|
}
|
|
l.backup()
|
|
return false
|
|
}
|
|
|
|
func (l *Lexer) acceptRun(valid string) {
|
|
for {
|
|
r := l.next()
|
|
idx := strings.IndexRune(valid, r)
|
|
if idx < 0 {
|
|
break
|
|
}
|
|
}
|
|
l.backup()
|
|
}
|
|
|
|
type lexItemType int
|
|
|
|
const (
|
|
lex_EOF lexItemType = iota
|
|
lex_Ident
|
|
lex_OpenBracket
|
|
lex_CloseBracket
|
|
lex_Semicolon
|
|
lex_String
|
|
lex_Number
|
|
lex_Pound
|
|
lex_Space
|
|
lex_Error
|
|
)
|
|
|
|
func (lit lexItemType) String() string {
|
|
switch lit {
|
|
case lex_Ident:
|
|
return "lex_Ident"
|
|
case lex_OpenBracket:
|
|
return "lex_OpenBracket"
|
|
case lex_CloseBracket:
|
|
return "lex_CloseBracket"
|
|
case lex_Semicolon:
|
|
return "lex_Semicolon"
|
|
case lex_String:
|
|
return "lex_String"
|
|
case lex_Number:
|
|
return "lex_Number"
|
|
case lex_Pound:
|
|
return "lex_Pound"
|
|
case lex_Space:
|
|
return "lex_Space"
|
|
case lex_EOF:
|
|
return "lex_EOF"
|
|
case lex_Error:
|
|
return "lex_Error"
|
|
}
|
|
return "lex_UNKNOWN"
|
|
}
|
|
|
|
type LexItem struct {
|
|
typ lexItemType
|
|
val string
|
|
}
|
|
|
|
func (itm LexItem) String() string {
|
|
return fmt.Sprintf("%s: %q", itm.typ, itm.val)
|
|
}
|
|
|
|
func lexComment(l *Lexer) stateFn {
|
|
for {
|
|
r := l.next()
|
|
if r == '\n' {
|
|
l.backup()
|
|
l.emit(lex_String)
|
|
break
|
|
}
|
|
if r == unicode_EOF {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
return lexStateStart
|
|
}
|
|
|
|
func lexIdent(l *Lexer) stateFn {
|
|
l.acceptRun("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
|
|
l.emit(lex_Ident)
|
|
|
|
return lexStateStart
|
|
}
|
|
|
|
func lexNumber(l *Lexer) stateFn {
|
|
l.accept("-+")
|
|
l.accept("0x")
|
|
l.accept("$%")
|
|
l.acceptRun("0123456789ABCDEFabcdef")
|
|
l.emit(lex_Number)
|
|
return lexStateStart
|
|
}
|
|
|
|
func lexString(l *Lexer) stateFn {
|
|
for {
|
|
r := l.next()
|
|
if r == unicode_EOF {
|
|
l.items <- LexItem{lex_Error, "EOF before string end"}
|
|
return nil
|
|
}
|
|
|
|
if r == '\\' && l.peek() == '"' {
|
|
r = l.next()
|
|
//fmt.Printf("consuming %c\n", r) // consume
|
|
continue
|
|
}
|
|
|
|
if r == '"' {
|
|
break
|
|
}
|
|
if r == '\n' {
|
|
l.items <- LexItem{lex_Error, fmt.Sprintf("newline in string; pos: %d", l.pos)}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
l.backup()
|
|
l.emit(lex_String)
|
|
l.pos++
|
|
l.ignore()
|
|
return lexStateStart
|
|
}
|
|
|
|
// consume until the first non-space
|
|
func lexStateStart(l *Lexer) stateFn {
|
|
for {
|
|
r := l.next()
|
|
if r == unicode_EOF {
|
|
break
|
|
}
|
|
|
|
if unicode.IsSpace(r) {
|
|
l.ignore()
|
|
continue
|
|
}
|
|
|
|
switch r {
|
|
case '{':
|
|
l.emit(lex_OpenBracket)
|
|
continue
|
|
case '}':
|
|
l.emit(lex_CloseBracket)
|
|
continue
|
|
case ';':
|
|
l.emit(lex_Semicolon)
|
|
continue
|
|
case '#':
|
|
l.emit(lex_Pound)
|
|
return lexComment
|
|
case '"':
|
|
//l.backup()
|
|
l.ignore()
|
|
return lexString
|
|
}
|
|
|
|
if r == '$' || r == '%' || r == '+' || r == '-' || ('0' <= r && r <= '9') {
|
|
l.backup()
|
|
return lexNumber
|
|
}
|
|
|
|
if unicode.IsLetter(r) {
|
|
l.backup()
|
|
return lexIdent
|
|
}
|
|
}
|
|
|
|
// eof
|
|
if l.pos > l.start {
|
|
l.emit(lex_Space)
|
|
}
|
|
l.emit(lex_EOF)
|
|
return nil
|
|
}
|
|
|
|
func isAlphaNumeric(r rune) bool {
|
|
if !isAlpha(r) {
|
|
return false
|
|
}
|
|
|
|
return '0' <= r && r <= '9'
|
|
}
|
|
|
|
func isAlpha(r rune) bool {
|
|
return ('A' <= r && r <= 'Z') || ('a' <= r && r <= 'Z')
|
|
}
|