commit 6c72977f178601d31bf6e04333424fdba1d5f8b8 Author: Zorchenhimer Date: Sat Apr 27 15:19:05 2024 -0400 Initial commit Starting with some script parsing. This script is the Domain Specific Language that is on the tapes of the studybox. Not every opcode is fully decoded/named yet. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..36f971e --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +bin/* diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..59d485c --- /dev/null +++ b/Makefile @@ -0,0 +1,8 @@ +.PHONY: all + +COMMANDS: bin/script-decode + +all: $(COMMANDS) + +bin/%: cmd/%.go script/*.go + go build -o $@ $< diff --git a/cmd/script-decode.go b/cmd/script-decode.go new file mode 100644 index 0000000..6edd828 --- /dev/null +++ b/cmd/script-decode.go @@ -0,0 +1,76 @@ +package main + +import ( + "fmt" + "os" + "strings" + "strconv" + + "github.com/alexflint/go-arg" + "git.zorchenhimer.com/Zorchenhimer/go-studybox/script" +) + +type Arguments struct { + Input string `arg:"positional,required"` + Output string `arg:"positional"` + StartAddr string `arg:"--start" default:"0x6000" help:"base address for the start of the script"` + start int +} + +func run(args *Arguments) error { + if args.StartAddr == "" { + return fmt.Errorf("start address cannot be empty") + } + + if strings.HasPrefix(args.StartAddr, "$") { + args.StartAddr = "0x"+args.StartAddr[1:] + } + + val, err := strconv.ParseInt(args.StartAddr, 0, 32) + if err != nil { + return fmt.Errorf("invalid start address %q: %w", args.StartAddr, err) + } + + args.start = int(val) + + scr, err := script.ParseFile(args.Input, args.start) + if err != nil { + return err + } + + outfile := os.Stdout + if args.Output != "" { + outfile, err = os.Create(args.Output) + if err != nil { + return fmt.Errorf("unable to create output file: %w", err) + } + defer outfile.Close() + } + + for _, w := range scr.Warnings { + fmt.Fprintln(os.Stderr, w) + if args.Output != "" { + fmt.Fprintln(outfile, "; "+w) + } + } + + fmt.Fprintf(outfile, "; Start address: $%04X\n", scr.StartAddress) + fmt.Fprintf(outfile, "; Stack address: $%04X\n\n", scr.StackAddress) + + for _, token := range scr.Tokens { + fmt.Fprintln(outfile, token) + } + + return nil +} + +func main() { + args := &Arguments{} + arg.MustParse(args) + + err := run(args) + if err != nil { + fmt.Fprintln(os.Stderr, err) + os.Exit(1) + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..dc82727 --- /dev/null +++ b/go.mod @@ -0,0 +1,7 @@ +module git.zorchenhimer.com/Zorchenhimer/go-studybox + +go 1.22.2 + +require github.com/alexflint/go-arg v1.4.3 + +require github.com/alexflint/go-scalar v1.1.0 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..9ef1991 --- /dev/null +++ b/go.sum @@ -0,0 +1,16 @@ +github.com/alexflint/go-arg v1.4.3 h1:9rwwEBpMXfKQKceuZfYcwuc/7YY7tWJbFsgG5cAU/uo= +github.com/alexflint/go-arg v1.4.3/go.mod h1:3PZ/wp/8HuqRZMUUgu7I+e1qcpUbvmS258mRXkFH4IA= +github.com/alexflint/go-scalar v1.1.0 h1:aaAouLLzI9TChcPXotr6gUhq+Scr8rl0P9P4PnltbhM= +github.com/alexflint/go-scalar v1.1.0/go.mod h1:LoFvNMqS1CPrMVltza4LvnGKhaSpc3oyLEBUZVhhS2o= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= +github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY= +github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= +gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/script/instructions.go b/script/instructions.go new file mode 100644 index 0000000..894db26 --- /dev/null +++ b/script/instructions.go @@ -0,0 +1,173 @@ +package script + +import ( +) + +var InstrMap map[byte]*Instruction + +func init() { + InstrMap = make(map[byte]*Instruction) + for _, i := range Instructions { + InstrMap[i.Opcode] = i + } +} + +var Instructions []*Instruction = []*Instruction{ + &Instruction{ 0x80, 0, 0, 0, "play_beep"}, + &Instruction{ 0x81, 0, 0, 0, "halt"}, + &Instruction{ 0x82, 0, 0, 0, "tape_nmi_shenanigans"}, + &Instruction{ 0x83, 0, 0, 0, "tape_wait"}, + &Instruction{ 0x84, 0, 2, 0, "jump_abs"}, + &Instruction{ 0x85, 0, 2, 0, "call_abs"}, + &Instruction{ 0x86, 0, 0, 0, "return"}, + &Instruction{ 0x87, 0, 0, 0, "loop"}, + &Instruction{ 0x88, 0, 0, 0, "play_sound"}, + &Instruction{ 0x89, 3, 0, 0, ""}, + &Instruction{ 0x8A, 0, 2, 0, "pop_string_to_addr"}, + &Instruction{ 0x8B, 1, 0, 0, ""}, + &Instruction{ 0x8C, 0, 0, 1, "string_length"}, + &Instruction{ 0x8D, 0, 0, 1, "string_to_int"}, + &Instruction{ 0x8E, 0, 0, 16, "string_concat"}, + &Instruction{ 0x8F, 0, 0, 1, "strings_equal"}, + + &Instruction{ 0x90, 0, 0, 1, "strings_not_equal"}, + &Instruction{ 0x91, 0, 0, 1, "string_less_than"}, + &Instruction{ 0x92, 0, 0, 1, "string_less_than_equal"}, + &Instruction{ 0x93, 0, 0, 1, "string_greater_than_equal"}, + &Instruction{ 0x94, 0, 0, 1, "string_greater_than"}, + &Instruction{ 0x95, 1, 0, 0, ""}, + &Instruction{ 0x96, 0, 2, 0, "set_word_4E"}, + &Instruction{ 0x97, 2, 0, 0, ""}, + &Instruction{ 0x98, 1, 0, 0, ""}, + &Instruction{ 0x99, 1, 0, 0, ""}, + &Instruction{ 0x9A, 0, 0, 0, ""}, + &Instruction{ 0x9B, 0, 0, 0, "halt"}, + &Instruction{ 0x9C, 0, 0, 0, "toggle_44FE"}, + &Instruction{ 0x9D, 2, 0, 0, "something_tape"}, + &Instruction{ 0x9E, 2, 0, 0, ""}, + &Instruction{ 0x9F, 6, 0, 0, ""}, + + &Instruction{ 0xA0, 2, 0, 1, ""}, + &Instruction{ 0xA1, 1, 0, 0, ""}, + &Instruction{ 0xA2, 1, 0, 0, "buffer_palette"}, + &Instruction{ 0xA3, 1, 0, 0, ""}, + &Instruction{ 0xA4, 3, 0, 0, ""}, + &Instruction{ 0xA5, 1, 0, 0, "set_470A"}, + &Instruction{ 0xA6, 1, 0, 0, "set_470B"}, + &Instruction{ 0xA7, 0, 0, 0, "call_asm"}, // built-in ACE, lmao + &Instruction{ 0xA8, 5, 0, 0, ""}, + &Instruction{ 0xA9, 1, 0, 0, ""}, + &Instruction{ 0xAA, 1, 0, 0, ""}, + &Instruction{ 0xAB, 1, 0, 0, "long_call"}, + &Instruction{ 0xAC, 0, 0, 0, "long_return"}, + &Instruction{ 0xAD, 1, 0, 1, "absolute"}, + &Instruction{ 0xAE, 1, 0, 1, "compare"}, + &Instruction{ 0xAF, 0, 0, 1, ""}, + + &Instruction{ 0xB0, 1, 0, 16, ""}, + &Instruction{ 0xB1, 1, 0, 16, "to_hex_string"}, + &Instruction{ 0xB2, 0, 0, 1, ""}, + &Instruction{ 0xB3, 7, 0, 0, ""}, // possible 16-bit inline? + &Instruction{ 0xB4, 0, 0, 0, ""}, + &Instruction{ 0xB5, 0, 0, 0, ""}, + &Instruction{ 0xB6, 0, 0, 0, ""}, + &Instruction{ 0xB7, 0, 2, 0, "deref_ptr"}, + &Instruction{ 0xB8, 0, 2, 0, "push_word"}, + &Instruction{ 0xB9, 0, 2, 0, "push_word_indexed"}, + &Instruction{ 0xBA, 0, 2, 0, "push"}, + &Instruction{ 0xBB, 0, -1, 0, "push_data"}, + &Instruction{ 0xBC, 0, 2, 0, "push_string_from_table"}, + &Instruction{ 0xBD, 0, 2, 0, "pop"}, + &Instruction{ 0xBE, 0, 2, 0, "write_to_table"}, + &Instruction{ 0xBF, 0, 2, 0, "jump_not_zero"}, + + &Instruction{ 0xC0, 1, 2, 0, "jump_zero"}, + &Instruction{ 0xC1, 1, -2, 0, "jump_switch"}, + &Instruction{ 0xC2, 1, 0, 1, "equals_zero"}, + &Instruction{ 0xC3, 2, 0, 1, "and_a_b"}, + &Instruction{ 0xC4, 2, 0, 1, "or_a_b"}, + &Instruction{ 0xC5, 2, 0, 1, "equal"}, + &Instruction{ 0xC6, 2, 0, 1, "not_equal"}, + &Instruction{ 0xC7, 2, 0, 1, "less_than"}, + &Instruction{ 0xC8, 2, 0, 1, "less_than_equal"}, + &Instruction{ 0xC9, 2, 0, 1, "greater_than"}, + &Instruction{ 0xCA, 2, 0, 1, "greater_than_equal"}, + &Instruction{ 0xCB, 2, 0, 1, "sum"}, + &Instruction{ 0xCC, 2, 0, 1, "subtract"}, + &Instruction{ 0xCD, 2, 0, 1, "multiply"}, + &Instruction{ 0xCE, 2, 0, 1, "signed_divide"}, + &Instruction{ 0xCF, 1, 0, 1, "negate"}, + + &Instruction{ 0xD0, 1, 0, 1, "modulus"}, + &Instruction{ 0xD1, 2, 0, 1, "expansion_controller"}, + &Instruction{ 0xD2, 2, 0, 1, ""}, + &Instruction{ 0xD3, 2, 0, 16, ""}, + &Instruction{ 0xD4, 3, 0, 0, ""}, + &Instruction{ 0xD5, 1, 0, 0, "wait_for_tape"}, + &Instruction{ 0xD6, 1, 0, 16, "truncate_string"}, + &Instruction{ 0xD7, 1, 0, 16, "trim_string"}, + &Instruction{ 0xD8, 1, 0, 16, "trim_string_start"}, + &Instruction{ 0xD9, 2, 0, 16, "trim_string_start"}, + &Instruction{ 0xDA, 1, 0, 16, "to_int_string"}, + &Instruction{ 0xDB, 3, 0, 0, ""}, + &Instruction{ 0xDC, 5, 0, 0, ""}, + &Instruction{ 0xDD, 5, 0, 0, ""}, + &Instruction{ 0xDE, 3, 0, 0, ""}, + &Instruction{ 0xDF, 3, 0, 0, ""}, + + &Instruction{ 0xE0, 2, 0, 1, "signed_divide"}, + &Instruction{ 0xE1, 4, 0, 0, ""}, + &Instruction{ 0xE2, 7, 0, 0, "setup_sprite"}, + &Instruction{ 0xE3, 1, 0, 1, "get_byte_at_arg_a"}, + &Instruction{ 0xE4, 2, 0, 0, "swap_ram_bank"}, + &Instruction{ 0xE5, 1, 0, 0, "disable_sprite"}, + &Instruction{ 0xE6, 1, 0, 0, "tape_nmi_setup"}, + &Instruction{ 0xE7, 7, 0, 0, ""}, + &Instruction{ 0xE8, 1, 0, 0, "setup_tape_nmi"}, + &Instruction{ 0xE9, 0, 1, 0, "setup_loop"}, + &Instruction{ 0xEA, 0, 0, 0, "string_write_to_table"}, + &Instruction{ 0xEB, 4, 0, 0, ""}, + &Instruction{ 0xEC, 2, 0, 0, "scroll"}, + &Instruction{ 0xED, 1, 0, 0, "disable_sprites"}, + &Instruction{ 0xEE, 1, -3, 0, "call_switch"}, + &Instruction{ 0xEF, 6, 0, 0, ""}, + + &Instruction{ 0xF0, 0, 0, 0, "disable_sprites"}, + &Instruction{ 0xF1, 4, 0, 0, ""}, + &Instruction{ 0xF2, 0, 0, 0, "halt"}, + &Instruction{ 0xF3, 0, 0, 0, "halt"}, + &Instruction{ 0xF4, 0, 0, 16, "halt"}, + &Instruction{ 0xF5, 1, 0, 1, "halt"}, + &Instruction{ 0xF6, 1, 0, 0, "halt"}, + &Instruction{ 0xF7, 0, 0, 0, "halt"}, + &Instruction{ 0xF8, 2, 0, 0, "halt"}, + &Instruction{ 0xF9, 0, 0, 1, ""}, + &Instruction{ 0xFA, 0, 0, 1, ""}, + &Instruction{ 0xFB, 1, 0, 0, "jump_arg_a"}, + &Instruction{ 0xFC, 2, 0, 1, ""}, + &Instruction{ 0xFD, 0, 0, 16, "halt"}, + &Instruction{ 0xFE, 4, 0, 0, ""}, + &Instruction{ 0xFF, 0, 0, 0, "break_engine"}, // code handler is $FFFF +} + +type Instruction struct { + Opcode byte + ArgCount int // stack arguments + OpCount int // inline operands. length in bytes. + // -1: nul-terminated + // -2: first byte is count, followed by that number of words + // -3: like -2, but with one additional word + RetCount int // return count + Name string +} + +func (i Instruction) String() string { + if i.Name != "" { + //return fmt.Sprintf("$%02X_%s", i.Opcode, i.Name) + return i.Name + } + + //return fmt.Sprintf("$%02X_unknown", i.Opcode) + return "unknown" +} + diff --git a/script/parser.go b/script/parser.go new file mode 100644 index 0000000..3a9e1a9 --- /dev/null +++ b/script/parser.go @@ -0,0 +1,137 @@ +package script + +import ( + "fmt" + "os" +) + +func ParseFile(filename string, startAddr int) (*Script, error) { + rawfile, err := os.ReadFile(filename) + if err != nil { + return nil, fmt.Errorf("unable to read file: %w", err) + } + + return Parse(rawfile, startAddr) +} + +func Parse(rawinput []byte, startAddr int) (*Script, error) { + if len(rawinput) < 3 { + return nil, fmt.Errorf("not enough bytes for script") + } + + script := &Script{ + Tokens: []*Token{}, + Warnings: []string{}, + StackAddress: (int(rawinput[1])<<8) | int(rawinput[0]), + StartAddress: startAddr, + } + + for i := 2; i < len(rawinput); i++ { + raw := rawinput[i] + + token := &Token{ + Offset: startAddr+i, + Raw: raw, + Inline: []InlineVal{}, + } + script.Tokens = append(script.Tokens, token) + + if raw < 0x80 { + continue + } + + op, ok := InstrMap[raw] + if !ok { + return nil, fmt.Errorf("OP %02X not in instruction map", raw) + } + token.Instruction = op + + args := []InlineVal{} + switch op.OpCount { + case -1: // null terminated + for ; i < len(rawinput); i++ { + val := ByteVal(rawinput[i]) + args = append(args, val) + if rawinput[i] == 0x00 { + break + } + } + + case -2: // count then count words + i++ + l := int(rawinput[i]) + args = append(args, ByteVal(l)) + i++ + for c := 0; c < l; c++ { + args = append(args, WordVal([2]byte{rawinput[i], rawinput[i+1]})) + i+=2 + } + + case -3: // count then count+1 words (extra is default case) + i++ + l := int(rawinput[i]) + args = append(args, ByteVal(l)) + i++ + for c := 0; c < l+1; c++ { + args = append(args, WordVal([2]byte{rawinput[i], rawinput[i+1]})) + i+=2 + } + + case 2: + args = append(args, WordVal([2]byte{rawinput[i+1], rawinput[i+2]})) + i+=2 + + case 1: + i++ + args = append(args, ByteVal(rawinput[i])) + } + + token.Inline = args + } + + for _, t := range script.Tokens { + switch t.Raw { + case 0x84, 0x85, 0xBF, 0xC0: // jmp/call + if len(t.Inline) == 0 { + return nil, fmt.Errorf("jump/call missing address") + } + + addr := t.Inline[0].Int() + found := false + for _, tok := range script.Tokens { + if tok.Offset == addr { + tok.IsTarget = true + found = true + break + } + } + + if !found { + script.Warnings = append(script.Warnings, fmt.Sprintf("Warning: no target found for jump/call at offset $%04X; value $%04X", t.Offset, addr)) + } + + case 0xC1, 0xEE: // switches + if len(t.Inline) < 2 { + return nil, fmt.Errorf("jump/call switch missing addresses") + } + + for _, v := range t.Inline[1:] { + addr := v.Int() + found := false + for _, tok := range script.Tokens { + if tok.Offset == addr { + tok.IsTarget = true + found = true + break + } + } + + if !found { + script.Warnings = append(script.Warnings, fmt.Sprintf("Warning: no target found for jump/call switch at offset $%04X; value: $%04X", t.Offset, addr)) + } + } + } + } + + return script, nil +} diff --git a/script/script.go b/script/script.go new file mode 100644 index 0000000..81183df --- /dev/null +++ b/script/script.go @@ -0,0 +1,12 @@ +package script + +import ( +) + +type Script struct { + Tokens []*Token + Warnings []string + + StartAddress int + StackAddress int +} diff --git a/script/tokens.go b/script/tokens.go new file mode 100644 index 0000000..1db8704 --- /dev/null +++ b/script/tokens.go @@ -0,0 +1,115 @@ +package script + +import ( + "fmt" + "strings" +) + +type Token struct { + Offset int + Raw byte + Inline []InlineVal + IsTarget bool // target of a call/jump? + + Instruction *Instruction +} + +func (t Token) String() string { + suffix := "" + switch t.Raw { + case 0x86: + suffix = "\n" + } + + prefix := "" + if t.IsTarget { + prefix = fmt.Sprintf("\nL%04X:\n", t.Offset) + } + + if t.Raw < 0x80 { + return fmt.Sprintf("%s[%04X] %02X %-5s : %d%s", + prefix, + t.Offset, + t.Raw, + "", + int(t.Raw), + suffix, + ) + } + + if len(t.Inline) == 0 { + return fmt.Sprintf("%s[%04X] %02X %-5s : %s%s", + prefix, + t.Offset, + t.Raw, + "", + t.Instruction.String(), + suffix, + ) + } + + argstr := []string{} + for _, a := range t.Inline { + argstr = append(argstr, a.HexString()) + } + + bytestr := []string{} + for _, a := range t.Inline { + for _, b := range a.Bytes() { + bytestr = append(bytestr, fmt.Sprintf("%02X", b)) + } + } + + switch t.Raw { + case 0xBB: + bs := []byte{} + for _, val := range t.Inline { + bs = append(bs, val.Bytes()...) + } + + return fmt.Sprintf("%s[%04X] %02X (...) : %s %q%s", + prefix, + t.Offset, + t.Raw, + t.Instruction.String(), + string(bs[1:len(bs)-1]), + //strings.Join(argstr[1:], " "), + suffix, + ) + + //case 0x84, 0x85, 0xBF, 0xC0, // jmp/call + + + case 0xC1, 0xEE: // switches + return fmt.Sprintf("%s[%04X] %02X %-5s : %s %s%s", + prefix, + t.Offset, + t.Raw, + "", + t.Instruction.String(), + strings.Join(argstr, " "), + suffix, + ) + + default: + return fmt.Sprintf("%s[%04X] %02X %-5s : %s %s%s", + prefix, + t.Offset, + t.Raw, + strings.Join(bytestr, " "), + t.Instruction.String(), + strings.Join(argstr, " "), + suffix, + ) + + } + + return fmt.Sprintf("%s%04X: %s %s%s", + prefix, + t.Offset, + t.Instruction.String(), + strings.Join(argstr, " "), + suffix, + ) +} + diff --git a/script/values.go b/script/values.go new file mode 100644 index 0000000..eb56521 --- /dev/null +++ b/script/values.go @@ -0,0 +1,47 @@ +package script + +import ( + "fmt" +) + +type InlineVal interface { + HexString() string + Bytes() []byte + Int() int +} + +type ByteVal byte + +func (bv ByteVal) HexString() string { + return fmt.Sprintf("$%02X", bv) +} + +func (bv ByteVal) Bytes() []byte { + return []byte{byte(bv)} +} + +func (bv ByteVal) Int() int { + return int(bv) +} + +type WordVal [2]byte + +func NewWordVal(v []byte) WordVal { + if len(v) != 2 { + panic("WordVal must be two bytes") + } + + return WordVal([2]byte{v[0], v[1]}) +} + +func (wv WordVal) HexString() string { + return fmt.Sprintf("$%02X%02X", wv[1], wv[0]) +} + +func (wv WordVal) Bytes() []byte { + return []byte{wv[0], wv[1]} +} + +func (wv WordVal) Int() int { + return (int(wv[1]) << 8) | int(wv[0]) +}