go-studybox/script/parser.go

376 lines
9.3 KiB
Go

package script
import (
"fmt"
"os"
"errors"
)
var (
ErrEarlyEOF = errors.New("Unexpected EOF when reading OP arguments")
ErrInvalidInstruction = errors.New("Invalid instruction")
ErrNavigation = errors.New("SmartParse navigation error")
)
type Parser struct {
rawinput []byte
current int
startAddr int
script *Script
cdl *CodeDataLog
}
func ParseFile(filename string, startAddr int, cdl *CodeDataLog) (*Script, error) {
rawfile, err := os.ReadFile(filename)
if err != nil {
return nil, fmt.Errorf("unable to read file: %w", err)
}
return Parse(rawfile, startAddr, cdl)
}
func SmartParseFile(filename string, startAddr int, cdl *CodeDataLog) (*Script, error) {
rawfile, err := os.ReadFile(filename)
if err != nil {
return nil, fmt.Errorf("unable to read file: %w", err)
}
return SmartParse(rawfile, startAddr, cdl)
}
func SmartParse(rawinput []byte, startAddr int, cdl *CodeDataLog) (*Script, error) {
if len(rawinput) < 3 {
return nil, fmt.Errorf("not enough bytes for script")
}
p := &Parser{
// Don't use CDL that was passed in
cdl: &CodeDataLog{},
script: &Script{
Tokens: []*Token{},
Warnings: []string{},
StackAddress: (int(rawinput[1])<<8) | int(rawinput[0]),
StartAddress: startAddr,
Labels: make(map[int]*Label), // map[location]name
},
rawinput: rawinput,
startAddr: startAddr,
}
tokenMap := make(map[int]*Token)
// starting point is the third byte in the script.
branches := []int{ 2 }
visited := make([]bool, len(p.rawinput))
for len(branches) > 0 {
//fmt.Printf("start @ $%04X\n", branches[0]+startAddr)
INNER:
for p.current = branches[0]; p.current < len(p.rawinput); p.current++ {
//branches = branches[1:]
if p.current < 0 {
return p.script, errors.Join(ErrNavigation,
fmt.Errorf("HOW IS CURRENT NEGATIVE?????"))
}
if visited[p.current] {
//fmt.Printf("found visited at $%04X\n", p.current+startAddr)
break
}
visited[p.current] = true
raw := p.rawinput[p.current]
token := &Token{
Offset: startAddr+p.current,
Raw: raw,
Inline: []InlineVal{},
}
p.script.Tokens = append(p.script.Tokens, token)
tokenMap[token.Offset] = token
//fmt.Printf("{$%04X} %s\n", token.Offset, token.String(map[int]*Label{}))
p.cdl.setCode(p.current)
if raw < 0x80 {
continue
}
err := p.parseToken(token, raw)
if err != nil {
return p.script, err
}
//fmt.Println(token.String(map[int]*Label{}))
switch raw {
case 0x86, 0xAC, 0xFF, 0x81, 0x9B, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xFD: // return, long_return, break_engine & halts
//fmt.Printf("[$%04X] %s\n",
// token.Offset, token.Instruction.Name)
break INNER
case 0x84, 0xBF, 0xC0, 0x85: // jump_abs, jump_not_zero, jump_zero, call_abs
if len(token.Inline) < 1 {
return p.script, errors.Join(ErrNavigation,
fmt.Errorf("jump missing target"))
}
if len(token.Inline) > 1 {
return p.script, errors.Join(ErrNavigation,
fmt.Errorf("jump has too many targets"))
}
val := token.Inline[0].Int()
//fmt.Printf("[$%04X] %s $%04X\n",
// token.Offset, token.Instruction.Name, val)
branches = append(branches, val-startAddr)
p.script.Labels[val] = AutoLabel(val)
if raw == 0x84 { // not jump_abs
break INNER
}
case 0xC1, 0xEE: // jump_switch, call_switch
if len(token.Inline) < 2 {
return p.script, errors.Join(ErrNavigation,
fmt.Errorf("switch missing targets"))
}
count := token.Inline[0].Int()
if len(token.Inline) != count+1 {
return p.script, errors.Join(ErrNavigation,
fmt.Errorf("switch target missmatch (expected %d, got %d)", count, len(token.Inline)-1))
}
for _, val := range token.Inline[1:] {
//fmt.Printf("[$%04X] %s $%04X\n",
// token.Offset, token.Instruction.Name, val.Int())
branches = append(branches, val.Int()-startAddr)
p.script.Labels[val.Int()] = AutoLabel(val.Int())
}
if raw == 0xC1 { // jump_switch
break INNER
}
}
if token.Instruction.OpCount == 2 {
val := token.Inline[0].Int()
if _, ok := p.script.Labels[val]; !ok {
p.script.Labels[val] = AutoLabelVar(val)
}
}
}
if len(branches) == 1 {
break
}
branches = branches[1:]
}
return p.script, nil
}
func Parse(rawinput []byte, startAddr int, cdl *CodeDataLog) (*Script, error) {
if len(rawinput) < 3 {
return nil, fmt.Errorf("not enough bytes for script")
}
p := &Parser{
cdl: cdl,
script: &Script{
Tokens: []*Token{},
Warnings: []string{},
StackAddress: (int(rawinput[1])<<8) | int(rawinput[0]),
StartAddress: startAddr,
Labels: make(map[int]*Label), // map[location]name
},
rawinput: rawinput,
startAddr: startAddr,
}
tokenMap := make(map[int]*Token)
if p.cdl == nil {
p.cdl = &CodeDataLog{}
}
//earliestVar := len(p.rawinput)-2
//fmt.Printf("var start bounds: $%04X, $%04X\n", startAddr, startAddr+len(p.rawinput))
for p.current = 2; p.current < len(p.rawinput); p.current++ {
//if p.current >= earliestVar {
// fmt.Printf("Earliest Variable found at offset %d ($%04X)\n", p.current, startAddr+p.current)
// break
//}
raw := p.rawinput[p.current]
token := &Token{
Offset: startAddr+p.current,
Raw: raw,
Inline: []InlineVal{},
}
p.script.Tokens = append(p.script.Tokens, token)
tokenMap[token.Offset] = token
if raw < 0x80 || p.cdl.IsData(p.current+startAddr) { // || p.current >= earliestVar {
if p.cdl.IsData(p.current+startAddr) {
token.IsData = true
//fmt.Print(".")
//fmt.Printf("%#v\n", token)
}
continue
}
err := p.parseToken(token, raw)
if err != nil {
return p.script, err
}
}
// Find and mark labels for a few instructions
for _, t := range p.script.Tokens {
if t.Instruction == nil {
continue
}
switch t.Raw {
case 0x84, 0x85, 0xBF, 0xC0: // jmp/call
if len(t.Inline) == 0 {
//return nil, fmt.Errorf("jump/call missing address ($%04X)", t.Offset)
p.script.Warnings = append(p.script.Warnings,
fmt.Sprintf("jump/call missing addresses ($%04X)", t.Offset))
continue
}
addr := t.Inline[0].Int()
found := false
for _, tok := range p.script.Tokens {
if tok.Offset == addr {
tok.IsTarget = true
found = true
p.script.Labels[addr] = AutoLabel(addr) //fmt.Sprintf("L%04X", addr)
break
}
}
if !found {
p.script.Warnings = append(p.script.Warnings, fmt.Sprintf("Warning: no target found for jump/call at offset $%04X; value $%04X", t.Offset, addr))
}
case 0xC1, 0xEE: // switches
if len(t.Inline) < 2 {
//return nil, fmt.Errorf("jump/call switch missing addresses")
p.script.Warnings = append(p.script.Warnings,
fmt.Sprintf("jump/call switch missing addresses ($%04X)", t.Offset))
continue
}
for _, v := range t.Inline[1:] {
addr := v.Int()
found := false
for _, tok := range p.script.Tokens {
if tok.Offset == addr {
tok.IsTarget = true
found = true
p.script.Labels[addr] = AutoLabel(addr) //fmt.Sprintf("L%04X", addr)
break
}
}
if !found {
p.script.Warnings = append(p.script.Warnings, fmt.Sprintf("Warning: no target found for jump/call switch at offset $%04X; value: $%04X", t.Offset, addr))
}
}
default:
// if word arg, see if it's something in this script
if t.Instruction == nil {
//if t.IsData {
// fmt.Print(",")
//}
continue
}
if t.Instruction.OpCount == 2 && !t.Instruction.InlineImmediate {
addr := t.Inline[0].Int()
if tok, ok := tokenMap[addr]; ok {
tok.IsVariable = true
p.script.Labels[addr] = AutoLabelVar(addr) //fmt.Sprintf("Var_%04X", addr)
}
}
}
}
return p.script, nil
}
func (p *Parser) parseToken(token *Token, raw byte) error {
op, ok := InstrMap[raw]
if !ok {
return errors.Join(ErrInvalidInstruction,
fmt.Errorf("OP 0x%02X not in instruction map", raw))
}
token.Instruction = op
args := []InlineVal{}
switch op.OpCount {
case -1: // null terminated
for ; p.current < len(p.rawinput); p.current++ {
val := ByteVal(p.rawinput[p.current])
args = append(args, val)
if p.rawinput[p.current] == 0x00 {
break
}
}
case -2: // count then count words
// FIXME: wtf makes this different from -3??
p.current++
l := int(p.rawinput[p.current])
args = append(args, ByteVal(l))
p.current++
for c := 0; c < l; c++ {
if len(p.rawinput) <= p.current+1 {
return errors.Join(ErrEarlyEOF,
fmt.Errorf("OP early end at offset 0x%X (%d) {%d} %#v", p.current, p.current, l, op))
}
args = append(args, WordVal([2]byte{p.rawinput[p.current], p.rawinput[p.current+1]}))
p.current+=2
}
p.current--
case -3: // count then count words. "default" is no call (skip Code_Pointer to after args)
p.current++
l := int(p.rawinput[p.current])
args = append(args, ByteVal(l))
p.current++
for c := 0; c < l; c++ {
args = append(args, WordVal([2]byte{p.rawinput[p.current], p.rawinput[p.current+1]}))
p.current+=2
}
p.current--
case 2:
args = append(args, WordVal([2]byte{p.rawinput[p.current+1], p.rawinput[p.current+2]}))
p.current+=2
//fmt.Printf("var at $%04X\n", val.Int())
//if val.Int() > p.startAddr && val.Int() < p.startAddr+len(p.rawinput) && p.earliestVar > val.Int() {
// fmt.Printf("new earliest: $%04X\n", val.Int())
// p.earliestVar = val.Int()
//}
case 1:
p.current++
args = append(args, ByteVal(p.rawinput[p.current]))
}
token.Inline = args
return nil
}