diff --git a/cmd/script-decode.go b/cmd/script-decode.go index 5a4929e..fa0612b 100644 --- a/cmd/script-decode.go +++ b/cmd/script-decode.go @@ -5,7 +5,6 @@ import ( "os" "strings" "strconv" - "bufio" "slices" "errors" @@ -23,6 +22,7 @@ type Arguments struct { CDL string `arg:"--cdl" help:"CodeDataLog json file"` CDLOutput string `arg:"--cdl-output"` Smart bool `arg:"--smart"` + NoAddrPrefix bool `arg:"--no-addr-prefix"` start int } @@ -45,10 +45,13 @@ func run(args *Arguments) error { var cdl *script.CodeDataLog if args.CDL != "" { - //fmt.Println(" CDL:", args.CDL) cdl, err = script.CdlFromJsonFile(args.CDL) if err != nil { - //return fmt.Errorf("CDL Parse error: %w", err) + if errors.Is(err, os.ErrNotExist) { + fmt.Println("WARN: CDL file doesn't exist") + } else { + return fmt.Errorf("CDL Parse error: %w", err) + } cdl = nil } } @@ -69,14 +72,24 @@ func run(args *Arguments) error { } if args.LabelFile != "" { - labels, err := parseLabelFile(args.LabelFile) + err = scr.LabelsFromJsonFile(args.LabelFile) + //labels, err := parseLabelFile(args.LabelFile) if err != nil { - return fmt.Errorf("Labels parse error: %w", err) + if errors.Is(err, os.ErrNotExist) { + fmt.Println("WARN: Label file doesn't exist") + } else { + return fmt.Errorf("Labels parse error: %w", err) + } } - for _, label := range labels { - scr.Labels[label.Address] = label + err = scr.WriteLabelsToFile(args.LabelFile) + if err != nil { + return fmt.Errorf("Labels write error: %w", err) } + + //for _, label := range labels { + // scr.Labels[label.Address] = label + //} } outfile := os.Stdout @@ -105,7 +118,7 @@ func run(args *Arguments) error { }) for _, token := range scr.Tokens { - fmt.Fprintln(outfile, token.String(scr.Labels)) + fmt.Fprintln(outfile, token.String(scr.Labels, args.NoAddrPrefix)) } if args.StatsFile != "" { @@ -135,67 +148,16 @@ func run(args *Arguments) error { if err != nil { return fmt.Errorf("Error writing CDL file: %w", err) } + + err = scr.DebugCDL(cdlout+".dbg") + if err != nil { + return fmt.Errorf("Error writing CDL debug file: %w", err) + } } return nil } -func parseLabelFile(filename string) ([]*script.Label, error) { - file, err := os.Open(filename) - if err != nil { - return nil, err - } - defer file.Close() - - labels := []*script.Label{} - scanner := bufio.NewScanner(file) - for scanner.Scan() { - line := strings.TrimSpace(scanner.Text()) - if line == "" || strings.HasPrefix(line, "#") { - continue - } - - line = strings.ReplaceAll(line, "\t", " ") - parts := strings.Split(line, " ") - - parts = slices.DeleteFunc(parts, func(str string) bool { - return str == "" - }) - - if len(parts) < 2 { - fmt.Println("Ignoring", line) - continue - } - - if strings.HasPrefix(parts[0], "$") { - parts[0] = "0x"+parts[0][1:] - } - - addr, err := strconv.ParseInt(parts[0], 0, 32) - if err != nil { - fmt.Printf("Address parse error for %q: %s\n", line, err) - continue - } - - lbl := &script.Label{ - Name: parts[1], - Address: int(addr), - } - - if lbl.Name == "$" { - lbl.Name = "" - } - - if len(parts) > 2 { - lbl.Comment = strings.Join(parts[2:], " ") - } - - labels = append(labels, lbl) - } - - return labels, nil -} - func main() { args := &Arguments{} arg.MustParse(args) diff --git a/script/cdl.go b/script/cdl.go index 4d6e7d5..8b54852 100644 --- a/script/cdl.go +++ b/script/cdl.go @@ -13,8 +13,10 @@ type CodeDataLog struct { Code []CdlRange Data []CdlRange + EntryPoints []string + + entries []int cache map[int]cdlBit - offset int } type CdlRange struct { @@ -32,6 +34,26 @@ var ( //cdlOpCode cdlBit = 0x04 ) +func (c cdlBit) String() string { + switch c { + case cdlUnknown: + return "UNKN" + case cdlCode: + return "CODE" + case cdlData: + return "DATA" + default: + return "????" + } +} + +func NewCDL() *CodeDataLog { + return &CodeDataLog{ + entries: []int{}, + cache: make(map[int]cdlBit), + } +} + func (cdl *CodeDataLog) WriteToFile(filename string) error { file, err := os.Create(filename) if err != nil { @@ -47,6 +69,10 @@ func (cdl *CodeDataLog) WriteToFile(filename string) error { return werr } +func (cdl *CodeDataLog) getEntries() []int { + return cdl.entries +} + func getRanges(list []int) []CdlRange { //fmt.Printf("getRanges(%v)\n", list) data := []CdlRange{} @@ -102,6 +128,10 @@ func (cdl *CodeDataLog) WriteTo(w io.Writer) (int64, error) { data := []int{} for _, k := range keys { + if k < 0x6000 { + continue + } + b := cdl.cache[k] if b & cdlCode == cdlCode { code = append(code, k) @@ -115,6 +145,10 @@ func (cdl *CodeDataLog) WriteTo(w io.Writer) (int64, error) { clean.Code = getRanges(code) clean.Data = getRanges(data) + for _, ent := range cdl.entries { + clean.EntryPoints = append(clean.EntryPoints, fmt.Sprintf("0x%X", ent)) + } + raw, err := json.MarshalIndent(clean, "", "\t") if err != nil { return 0, err @@ -124,26 +158,12 @@ func (cdl *CodeDataLog) WriteTo(w io.Writer) (int64, error) { return int64(n), err } -func (cdl *CodeDataLog) setData(scriptOffset int) { - if cdl.cache == nil { - err := cdl.doCache() - if err != nil { - panic(fmt.Sprintf("CDL data error: %w", err)) - } - } - - cdl.cache[scriptOffset+cdl.offset] |= cdlData +func (cdl *CodeDataLog) setData(addr int) { + cdl.cache[addr] |= cdlData } -func (cdl *CodeDataLog) setCode(scriptOffset int) { - if cdl.cache == nil { - err := cdl.doCache() - if err != nil { - panic(fmt.Sprintf("CDL data error: %w", err)) - } - } - - cdl.cache[scriptOffset+cdl.offset] |= cdlCode +func (cdl *CodeDataLog) setCode(addr int) { + cdl.cache[addr] |= cdlCode } func (cdl *CodeDataLog) doCache() error { @@ -161,10 +181,6 @@ func (cdl *CodeDataLog) doCache() error { } for i := int(start); i <= int(end); i++ { - if _, ok := cdl.cache[i]; !ok { - cdl.cache[i] = cdlUnknown - } - cdl.cache[i] |= cdlCode } } @@ -181,25 +197,34 @@ func (cdl *CodeDataLog) doCache() error { } for i := int(start); i <= int(end); i++ { - if _, ok := cdl.cache[i]; !ok { - cdl.cache[i] = cdlUnknown - } - cdl.cache[i] |= cdlData } } + cdl.entries = []int{} + for _, ent := range cdl.EntryPoints { + addr, err := strconv.ParseInt(ent, 0, 32) + if err != nil { + return fmt.Errorf("Invalid entry point: %q", ent) + } + + cdl.entries = append(cdl.entries, int(addr)) + } + return nil } func CdlFromJson(r io.Reader) (*CodeDataLog, error) { - cdl := &CodeDataLog{} + cdl := NewCDL() dec := json.NewDecoder(r) err := dec.Decode(cdl) if err != nil { return nil, err } + //cdl.Data = []CdlRange{} + cdl.doCache() + return cdl, nil } @@ -213,13 +238,6 @@ func CdlFromJsonFile(filename string) (*CodeDataLog, error) { } func (cdl *CodeDataLog) IsData(addr int) bool { - if cdl.cache == nil { - err := cdl.doCache() - if err != nil { - panic(fmt.Sprintf("CDL data error: %w", err)) - } - } - val, ok := cdl.cache[addr] if !ok { return false @@ -229,13 +247,6 @@ func (cdl *CodeDataLog) IsData(addr int) bool { } func (cdl *CodeDataLog) IsCode(addr int) bool { - if cdl.cache == nil { - err := cdl.doCache() - if err != nil { - panic(fmt.Sprintf("CDL data error: %w", err)) - } - } - val, ok := cdl.cache[addr] if !ok { return false diff --git a/script/labels.go b/script/labels.go index 09af803..ff497e5 100644 --- a/script/labels.go +++ b/script/labels.go @@ -2,6 +2,10 @@ package script import ( "fmt" + "io" + "os" + "encoding/json" + "strconv" ) type Label struct { @@ -11,6 +15,36 @@ type Label struct { FarLabel bool } +type JsonLabel struct { + Address string + Name string + Comment string + FarLabel bool +} + +func (l Label) JsonLabel() JsonLabel { + return JsonLabel{ + Address: fmt.Sprintf("0x%X", l.Address), + Name: l.Name, + Comment: l.Comment, + FarLabel: l.FarLabel, + } +} + +func (l JsonLabel) Label() (*Label, error) { + addr, err := strconv.ParseInt(l.Address, 0, 32) + if err != nil { + return nil, fmt.Errorf("Invalid address: %q", l.Address) + } + + return &Label{ + Address: int(addr), + Name: l.Name, + Comment: l.Comment, + FarLabel: l.FarLabel, + }, nil +} + func AutoLabel(address int) *Label { return &Label{ Address: address, @@ -40,3 +74,61 @@ func NewLabel(address int, name string) *Label { } } +func (s *Script) LabelsFromJsonFile(filename string) error { + file, err := os.Open(filename) + if err != nil { + return err + } + defer file.Close() + + return s.LabelsFromJson(file) +} + +func (s *Script) LabelsFromJson(r io.Reader) error { + lbls := []JsonLabel{} + dec := json.NewDecoder(r) + err := dec.Decode(&lbls) + if err != nil { + return err + } + + if s.Labels == nil { + s.Labels = make(map[int]*Label) + } + + for _, lbl := range lbls { + l, err := lbl.Label() + if err != nil { + return err + } + + s.Labels[l.Address] = l + } + + return nil +} + +func (s *Script) WriteLabelsToFile(filename string) error { + file, err := os.Create(filename) + if err != nil { + return err + } + defer file.Close() + + return s.WriteLabels(file) +} + +func (s *Script) WriteLabels(w io.Writer) error { + slice := []JsonLabel{} + for _, lbl := range s.Labels { + slice = append(slice, lbl.JsonLabel()) + } + + raw, err := json.MarshalIndent(slice, "", "\t") + if err != nil { + return err + } + + _, err = w.Write(raw) + return err +} diff --git a/script/parser.go b/script/parser.go index 7ef55d2..78397a5 100644 --- a/script/parser.go +++ b/script/parser.go @@ -54,6 +54,7 @@ func SmartParse(rawinput []byte, startAddr int, cdl *CodeDataLog) (*Script, erro Labels: make(map[int]*Label), // map[location]name CDL: cdl, + origSize: len(rawinput), }, rawinput: rawinput, @@ -61,17 +62,28 @@ func SmartParse(rawinput []byte, startAddr int, cdl *CodeDataLog) (*Script, erro } if p.script.CDL == nil { - p.script.CDL = &CodeDataLog{} + p.script.CDL = NewCDL() } tokenMap := make(map[int]*Token) // starting point is the third byte in the script. branches := []int{ 2 } + for _, ent := range p.script.CDL.getEntries() { + addr := ent-startAddr + if addr > 0 { + branches = append(branches, addr) + } + } + + visited := make([]bool, len(p.rawinput)) for len(branches) > 0 { - //fmt.Printf("start @ $%04X\n", branches[0]+startAddr) + st := branches[0]+startAddr + //fmt.Printf("start @ $%04X\n", st) + p.script.Labels[st] = AutoLabel(st) + INNER: for p.current = branches[0]; p.current < len(p.rawinput); p.current++ { //branches = branches[1:] @@ -99,7 +111,7 @@ INNER: //fmt.Printf("{$%04X} %s\n", token.Offset, token.String(map[int]*Label{})) - p.script.CDL.setCode(p.current) + p.script.CDL.setCode(p.current+p.startAddr) if raw < 0x80 { continue } @@ -112,7 +124,7 @@ INNER: //fmt.Println(token.String(map[int]*Label{})) switch raw { - case 0x86, 0xAC, 0xFF, 0x81, 0x9B, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xFD: // return, long_return, break_engine & halts + case 0x86, 0xAC, 0xAA, 0xFF, 0x81, 0x9B, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8, 0xFD: // return, long_return, long_jump, break_engine & halts //fmt.Printf("[$%04X] %s\n", // token.Offset, token.Instruction.Name) break INNER @@ -162,11 +174,13 @@ INNER: } } - if token.Instruction.OpCount == 2 { + if token.Instruction.OpCount == 2 && !token.Instruction.InlineImmediate { val := token.Inline[0].Int() - if _, ok := p.script.Labels[val]; !ok { + if _, ok := p.script.Labels[val]; !ok {//&& val >= startAddr { p.script.Labels[val] = AutoLabelVar(val) } + p.script.CDL.setData(val) + p.script.CDL.setData(val+1) } } @@ -176,6 +190,40 @@ INNER: branches = branches[1:] } + // Add data tokens + for addr, bit := range p.script.CDL.cache { + if addr < 0x6002 { + continue + } + + // ignore code bytes + if bit & cdlCode == cdlCode { + continue + } + + // ignore labels outside the script's address range + if addr > len(rawinput)+0x6000 { + continue + } + + if _, ok := p.script.Labels[addr]; ok { + p.script.Tokens = append(p.script.Tokens, &Token{ + Offset: addr, + Inline: []InlineVal{NewWordVal([]byte{rawinput[addr-0x6000], rawinput[addr+1-0x6000]})}, + IsVariable: true, + IsData: true, + cdl: bit.String(), + }) + } else { + p.script.Tokens = append(p.script.Tokens, &Token{ + Offset: addr, + Raw: rawinput[addr-0x6000], + IsData: true, + cdl: bit.String(), + }) + } + } + return p.script, nil } @@ -192,6 +240,7 @@ func Parse(rawinput []byte, startAddr int, cdl *CodeDataLog) (*Script, error) { StartAddress: startAddr, Labels: make(map[int]*Label), // map[location]name CDL: cdl, + origSize: len(rawinput), }, rawinput: rawinput, startAddr: startAddr, @@ -199,7 +248,7 @@ func Parse(rawinput []byte, startAddr int, cdl *CodeDataLog) (*Script, error) { tokenMap := make(map[int]*Token) if p.script.CDL == nil { - p.script.CDL = &CodeDataLog{} + p.script.CDL = NewCDL() } //earliestVar := len(p.rawinput)-2 @@ -302,8 +351,8 @@ func Parse(rawinput []byte, startAddr int, cdl *CodeDataLog) (*Script, error) { if t.Instruction.OpCount == 2 && !t.Instruction.InlineImmediate { addr := t.Inline[0].Int() - if tok, ok := tokenMap[addr]; ok { - tok.IsVariable = true + if _, ok := tokenMap[addr]; ok { + //tok.IsVariable = true p.script.Labels[addr] = AutoLabelVar(addr) //fmt.Sprintf("Var_%04X", addr) } } @@ -325,7 +374,7 @@ func (p *Parser) parseToken(token *Token, raw byte) error { switch op.OpCount { case -1: // null terminated for ; p.current < len(p.rawinput); p.current++ { - p.script.CDL.setCode(p.current) + p.script.CDL.setCode(p.current+p.startAddr) val := ByteVal(p.rawinput[p.current]) args = append(args, val) if p.rawinput[p.current] == 0x00 { @@ -336,10 +385,12 @@ func (p *Parser) parseToken(token *Token, raw byte) error { case -2: // count then count words // FIXME: wtf makes this different from -3?? p.current++ + l := int(p.rawinput[p.current]) - p.script.CDL.setCode(p.current) + p.script.CDL.setCode(p.current+p.startAddr) args = append(args, ByteVal(l)) p.current++ + for c := 0; c < l; c++ { if len(p.rawinput) <= p.current+1 { return errors.Join(ErrEarlyEOF, @@ -347,26 +398,37 @@ func (p *Parser) parseToken(token *Token, raw byte) error { } args = append(args, WordVal([2]byte{p.rawinput[p.current], p.rawinput[p.current+1]})) + p.script.CDL.setCode(p.current+p.startAddr) + p.script.CDL.setCode(p.current+p.startAddr+1) p.current+=2 } p.current-- case -3: // count then count words. "default" is no call (skip Code_Pointer to after args) p.current++ + l := int(p.rawinput[p.current]) args = append(args, ByteVal(l)) - p.script.CDL.setCode(p.current) + p.script.CDL.setCode(p.current+p.startAddr) p.current++ + for c := 0; c < l; c++ { + if len(p.rawinput) <= p.current+1 { + return errors.Join(ErrEarlyEOF, + fmt.Errorf("OP early end at offset 0x%X (%d) {%d} %#v", p.current, p.current, l, op)) + } + args = append(args, WordVal([2]byte{p.rawinput[p.current], p.rawinput[p.current+1]})) + p.script.CDL.setCode(p.current+p.startAddr) + p.script.CDL.setCode(p.current+p.startAddr+1) p.current+=2 } p.current-- case 2: args = append(args, WordVal([2]byte{p.rawinput[p.current+1], p.rawinput[p.current+2]})) - p.script.CDL.setCode(p.current+1) - p.script.CDL.setCode(p.current+2) + p.script.CDL.setCode(p.current+p.startAddr+1) + p.script.CDL.setCode(p.current+p.startAddr+2) p.current+=2 //fmt.Printf("var at $%04X\n", val.Int()) @@ -377,7 +439,7 @@ func (p *Parser) parseToken(token *Token, raw byte) error { case 1: p.current++ - p.script.CDL.setCode(p.current) + p.script.CDL.setCode(p.current+p.startAddr) args = append(args, ByteVal(p.rawinput[p.current])) } diff --git a/script/script.go b/script/script.go index 4fcf37d..ba969da 100644 --- a/script/script.go +++ b/script/script.go @@ -1,6 +1,8 @@ package script import ( + "fmt" + "os" ) type Script struct { @@ -12,6 +14,8 @@ type Script struct { Labels map[int]*Label CDL *CodeDataLog + + origSize int // size of the binary input } func (s *Script) Stats() Stats { @@ -34,3 +38,30 @@ func (s *Script) Stats() Stats { return st } + +func (s *Script) DebugCDL(filename string) error { + if s.origSize == 0 { + return fmt.Errorf("origSize == 0") + } + + if s.CDL.cache == nil { + err := s.CDL.doCache() + if err != nil { + return fmt.Errorf("doCache() error: %w", err) + } + } + + dat := make([]byte, s.origSize) + for i := 2; i < len(dat); i++ { + if val, ok := s.CDL.cache[i+0x6000]; ok { + dat[i] = byte(val) + } + } + + err := os.WriteFile(filename, dat, 0644) + if err != nil { + return fmt.Errorf("WriteFile() error: %w", err) + } + + return nil +} diff --git a/script/tokens.go b/script/tokens.go index 018496b..eb2e4e5 100644 --- a/script/tokens.go +++ b/script/tokens.go @@ -13,16 +13,23 @@ type Token struct { IsVariable bool // target of something else IsData bool // from CDL + cdl string // CDL string type + Instruction *Instruction } -func (t Token) String(labels map[int]*Label) string { +func (t Token) String(labels map[int]*Label, suppAddr bool) string { suffix := "" switch t.Raw { - case 0x86: // Newline after return + case 0x86, 0xAC, 0xAA: // Newline after return, long_return, & long_jump suffix = "\n" } + offset := "" + if !suppAddr { + offset = fmt.Sprintf("[%04X] ", t.Offset) + } + prefix := "" if lbl, ok := labels[t.Offset]; ok { comment := "" @@ -36,21 +43,44 @@ func (t Token) String(labels map[int]*Label) string { prefix = "\n"+comment+name } - if t.Instruction == nil { - return fmt.Sprintf("%s[%04X] %02X %-5s : %d%s", + if t.IsVariable { + return fmt.Sprintf("%s%s%02X %-5s : %d %s%s", prefix, - t.Offset, + offset, t.Raw, "", - int(t.Raw), + t.Inline[0].Int(), + t.Inline[0].HexString(), suffix, ) } + if t.Instruction == nil { + if t.IsData == false { + return fmt.Sprintf("%s%s%02X %-5s : %d%s", + prefix, + offset, + t.Raw, + "", + int(t.Raw), + suffix, + ) + } else if t.IsData { + return fmt.Sprintf("%s%s%02X %-5s : %d%s", + prefix, + offset, + t.Raw, + t.cdl, + int(t.Raw), + suffix, + ) + } + } + if len(t.Inline) == 0 { - return fmt.Sprintf("%s[%04X] %02X %-5s : %s%s", + return fmt.Sprintf("%s%s%02X %-5s : %s%s", prefix, - t.Offset, + offset, t.Raw, "", t.Instruction.String(), @@ -60,7 +90,7 @@ func (t Token) String(labels map[int]*Label) string { argstr := []string{} for _, a := range t.Inline { - if lbl, ok := labels[a.Int()]; ok { + if lbl, ok := labels[a.Int()]; ok && !t.Instruction.InlineImmediate { argstr = append(argstr, lbl.Name) } else { argstr = append(argstr, a.HexString()) @@ -80,18 +110,56 @@ func (t Token) String(labels map[int]*Label) string { switch t.Raw { case 0xBB: // push_data - bs := []byte{} - for _, val := range t.Inline { - bs = append(bs, val.Bytes()...) + raw := []byte{} + + ascii := true + for _, val := range t.Inline[1:len(t.Inline)-1] { + for _, b := range val.Bytes() { + raw = append(raw, b) + if b < 0x20 || b > 0x7E { + ascii = false + } + } } - return fmt.Sprintf("%s[%04X] %02X (...) : %s %q%s", + bs := "" + if ascii { + bs = fmt.Sprintf("%q", string(raw)) + } else { + vals := []string{} + for _, b := range raw { + if b >= 0x20 && b <= 0x7E { + vals = append(vals, fmt.Sprintf("0x%02X{%c}", b, b)) + } else { + vals = append(vals, fmt.Sprintf("0x%02X", b)) + } + } + bs = "["+strings.Join(vals, " ")+"]" + } + + //for _, val := range t.Inline { + // //bs = append(bs, val.Bytes()...) + // for _, b := range val.Bytes() { + // // These strings are strictly binary or ascii. If there's + // // non-ascii, don't try and read it as unicode if we find + // // some "valid" code points. Eg, 0xD?, 0xB? (%110?_????, %10??_????) + // if b < 0x20 || b > 0x7E { + // bs = append(bs, fmt.Sprintf("\\x%02x", b)) + // } else { + // bs = append(bs, string(b)) + // } + // } + //} + + return fmt.Sprintf("%s%s%02X (...) : %s %s%s", prefix, - t.Offset, + offset, t.Raw, t.Instruction.String(), - string(bs[1:len(bs)-1]), + //string(bs[1:len(bs)-1]), + //strings.Join(bs[1:len(bs)-1], ""), //strings.Join(argstr[1:], " "), + bs, suffix, ) @@ -99,9 +167,9 @@ func (t Token) String(labels map[int]*Label) string { case 0xC1, 0xEE: // switches - return fmt.Sprintf("%s[%04X] %02X %-5s : %s %s%s", + return fmt.Sprintf("%s%s%02X %-5s : %s %s%s", prefix, - t.Offset, + offset, t.Raw, "", t.Instruction.String(), @@ -110,9 +178,9 @@ func (t Token) String(labels map[int]*Label) string { ) default: - return fmt.Sprintf("%s[%04X] %02X %-5s : %s %s%s", + return fmt.Sprintf("%s%s%02X %-5s : %s %s%s", prefix, - t.Offset, + offset, t.Raw, strings.Join(bytestr, " "), t.Instruction.String(), @@ -122,9 +190,9 @@ func (t Token) String(labels map[int]*Label) string { } - return fmt.Sprintf("%s%04X: %s %s%s", + return fmt.Sprintf("%s%s%s %s%s", prefix, - t.Offset, + offset, t.Instruction.String(), strings.Join(argstr, " "), suffix,