INL-retro-progdump/firmware/source/asm_stm/swim.s

533 lines
16 KiB
ArmAsm

//;Ensure that your assembly code complies with the Procedure Call Standard for the ARM Architecture (AAPCS).
//;
//;The AAPCS describes a contract between caller functions and callee functions. For example, for integer or pointer types, it specifies that:
//;
//; Registers R0-R3 pass argument values to the callee function, with subsequent arguments passed on the stack.
//; Register R0 passes the result value back to the caller function.
//; Caller functions must preserve R0-R3 and R12, because these registers are allowed to be corrupted by the callee function.
//; Callee functions must preserve R4-R11 and LR, because these registers are not allowed to be corrupted by the callee function.
//;
//;For more information, see the Procedure Call Standard for the ARM Architecture (AAPCS).
//;WARNING!!! logic instructions all affect flags despite the 's' postfix arm_none_eabi_gcc doesn't like 's' instructions
//; but the 's' affect flag instructions are the only ones the M0 supports
//; because of this, issue the following code isn't compatible with other cores.
.equ BSSR, 0x18
.equ BRR, 0x28
.equ OTYPER, 0x04
.equ IDR, 0x10
.equ NO_RESP, 0xFF
.equ ACK, 0x01
.equ NAK, 0x00
.equ HERR, 0x0E
.equ PERR, 0x09
.macro swim_lo
strh swim_mask, [swim_base, #BRR]
.endm
.macro swim_hi
strh swim_mask, [swim_base, #BSSR]
.endm
.macro swim_pp
strh pushpull, [swim_base, #OTYPER]
.endm
.macro swim_od
strh opendrain, [swim_base, #OTYPER]
.endm
.equ SWIM_RD, 0x01
.equ SWIM_WR, 0x02
.equ SWIM_HS_BIT, 4
.equ SWIM_HS_MSK, 0x10
.equ HS_DELAY, 4
.equ LS_DELAY, 22
.globl swim_xfr
.p2align 2
.type swim_xfr,%function
//;r0 - r3 contain function args (excess on stack)
//;swim_xfr( data_pb, spddir_len, swim_base, swim_mask);
//;spddir_len = (SWIM_RD_LS<<16) | len;
//; stream .req a1 this arg is moved to variable reg after stack push
stream_arg .req a1
rv .req r0
len .req a2
swim_base .req a3
swim_mask .req a4
swim_xfr: //;Function entry point.
.fnstart
//;need a few extra variable registers, but they need to be preserved
pushpull .req v1
opendrain .req v2
stream .req v3
speed .req v4
//;high registers r8-r12 are very limited following instructions can utilize them
//; ADD, CMP, LDR PC-rel, BX, BLX, MSR, MSR
//; Docs list that MOV can only use R0-7, but testing and compilation proves otherwise
//; Additionally, arm_none_eabi_gcc uses MOV R8, R8 as it's NOP!!
rdwr .req v5
push {pushpull, opendrain, stream, speed, lr}
mov speed, rdwr //;preserve r8 high register can't pop/push
push {speed}
//; move stream arg out of r0, and into variable register so r0 is free
mov stream, stream_arg
//; len contains speed and direction data, must trim off and move
//; into variable registers
//;spddir_len = (SWIM_RD/WR_LS/HS<<16) | len;
mov r0, len
lsr r0, #16
cmp r0, #SWIM_HS_MSK
bpl high_speed
mov speed, #LS_DELAY
b speed_dir
high_speed:
mov speed, #HS_DELAY
speed_dir:
//;mask out speed bit and store in rdwr
//;shift speed bit left past carry
lsl r0, #(16-SWIM_HS_BIT + 16) //;16-BIT shifts bit to b16, 16 shifts to carry = 28
lsr r0, #(16-SWIM_HS_BIT + 16) //;carry doesn't shift in
mov rdwr, r0
//; mask out upper bits of len
mov r0, #0xFF
and len, r0
//; ~83nsec per unit of delay change
//;mov speed, #22 //;22 = 2.75usec bit time delay variable
//;mov speed, #4 //;4 = 1.25usec bit time delay variable
//;mov speed, #22
//;TODO should probably disable interrupts while transferring data via SWIM as it's timing sensitive
//; haven't touched this code in awhile and can't get myself to make this update right now..
//; may want something similar when entering swim activation
//; set pushpull and opendrain to values we can write to otyper register
//; to quickly change direction of the SWIM pin only
ldr pushpull, [swim_base, #OTYPER]
mov opendrain, pushpull
//; variables hold current OTYPER register value
//; set bit for opendrain, clear for pushpull
orr opendrain, swim_mask
bic pushpull, swim_mask
//; now these registers can be written directly to otyper GPIO reg
//; to quickly change SWIM pin direction and not affect other pins
//;set flags so first bit is header '0' "from host"
//;the stream comes in as 16bit value with stream bit 7 in bit position 15
//;shift the stream left so the current transfer bit is in bit position 31
//;15 -> 30 is 15bit shifts, this leaves header zero in bit position 31
//;store stream in r4
lsl stream, #15
bit_start:
//;always start going low
swim_lo
//;current bit is stored in bit31 and Negative flag is set if
//;current bit is '1'
bpl cur_bit_zero
//;delay to extend low time for '1'
nop
nop
nop
nop
nop
nop
nop
//;go high since current bit is '1'
swim_pp
swim_hi
swim_od
b det_next_bit
cur_bit_zero:
//;must delay same amount of time as instructions above since branch
//;add delay here to make '0' longer until equal to '1'
mov r0, #1
bl delay_r0
det_next_bit:
//;determine if this is the last bit
sub len, #1
//;if last bit, go to stream end to prepare for ACK/NAK latch
bmi out_end
//;delay until 'go high' time for '0'
//;add delay here to make all bit transfers longer
//;20-> 2.56usec bit time
//;21-> 2.65usec
//;22-> 2.73usec
mov r0, speed ;//4-HS 22-LS
bl delay_r0
nop //;22+nop = 2.75usec = low speed timing perfect!
//; high speed bit time has same 0-high, 1-lo time
//; only difference is bit time is 1.25usec
//;Negative flag is now set for '1', and clear for '0'
//;always go high for '0' (no effect if already high for '1')
swim_pp
swim_hi
swim_od
//;delay to extend high time of '0'
nop
nop
//;determine next bit value
lsl stream, #1
//;go to bit start
b bit_start
out_end:
//;delay until 'go high' time for '0'
mov r0, speed
sub r0, #1 ;//this decrement keeps both HS and LS perfectly aligned
bl delay_r0
//;always go high for '0' (no effect if already high for '1')
swim_pp
swim_hi
swim_od
//;delay until time to latch ACK/NAK from device
mov r0, #4 //;1-2:NR 3: usually RESP, not always/5-varies RESP/NO RESP
//; sometime the device takes longer...
//; 3 was failing for some, inc to 5
//; 5 works, '1' NAK has low pulse width of 500-600nsec, trying 4
//; 4 seems stable. low pulse time is 400-550nsec
bl delay_r0
//;first need to ensure device is actually responding
//;sample when output should be low for a 1 or 0
//;appears the device inserts a little delay ~220nsec between final host bit
//;and ACK/NAK
//;total time between host high (parity bit = '0')
//;and device ACK/NAK low is ~450nsec
//;debug toggle pushpull below
//;measurements showed pulse ~100nsec after device took SWIM low with 3 above
//;swim_pp
//;swim_od
//;latch SWIM pin value from GPIO IDR
ldrh rv, [swim_base, #IDR]
and rv, swim_mask
//;if it wasn't low, then the device didn't respond, so return error designating that
//;__asm volatile ("bne no_response\n\t");
beq wait_ack_nak
//;return 0xFF
mov rv, #NO_RESP
b exit_swim
wait_ack_nak:
//;don't have a strong enough pull-up resistor on SWIM pin
//;to compensate for this we can cheat by quickly toggling
//;to push-pull after device should have gone high for ACK = '0'
//;this does create bus contension for breif period but don't have much choice here..
//;only other alternative is to install pullup on board/programmer
//;swim_pp no delay_r0 outputs pulse ~240nsec after device goes low with 3 above
//;swim_od
//; 3 above, and 2 here equates to pulse high 640nsec after device goes low
//; 3 above, and 1 here equates to pulse high 460nsec after device goes low
//; 3 above wasn't stable, incremented to 4 and works, give 400-550nsec pulse low
//; mov r0, #1
//; bl delay_r0
;//r0=1: for ACK, artf pullup enabled 150-300nsec after device stops driving low
;//this is okay timing allowance as being early would cause misread
;//could possibly tighten with nops instead..
nop
nop
nop
nop //;4 nops = 250-350nsec low pulse width for ACK '1'
nop
nop //;5 nops = 300-450nsec low pulse width
swim_pp
swim_od
//;now we can sample for NAK/ACK as artifical pullup has been inserted above
//;if device output ACK, the artificial pullup doesn't cause contension
//;but if device output NAK "0" device should still be driving low
//;latch SWIM pin value from GPIO IDR
ldrh rv, [swim_base, #IDR]
and rv, swim_mask
//; NAK: rv=0, ACK: rv=swim_mask
beq return_nak
mov rv, #ACK
//; device sent ACK, if this is a read operation
//; need to capture data sent by the device
mov stream, #SWIM_WR
cmp rdwr, stream
beq exit_swim
//; Sent the last byte of the command successfully
//; Now read in data from device
//; A bit of a challenge because of lacking legit pullup
//; setup for read transfer
mov len, #9 //;read 9bits total, then output ACK
//; poll until device takes SWIM low for header bit
//; perhaps setting an interrupt and waiting for it would be better
//; due to less jitter from polling..
poll_header:
ldrh stream, [swim_base, #IDR]
and stream, swim_mask
bne poll_header
//;device took SWIM low
//;pulse for artifical pullup
mov r0, #1 //;1: 350-450nsec equates to 100-200nsec delay ~okay
bl delay_r0
//;nop
//;nop
//;nop //; 3xnop = 250-300nsec low pulse for '1' header from device
//;nop //; 4xnop = 250-350nsec
//;nop //; 5xnop = 250-300 never seems to change!!!!
//; the device seems to stall for a little bit due to the delayed pull-up
//; the bit time for the header seems to extend to ~3usec
//; but perhaps we can take advantage of this to better align with the device
//; add delay between here and read_next_bit to lengthen header bit
//; seemed to be a little early at times..
swim_pp
nop
swim_od
//; adding delay to extend header bit which always seems 250nsec longer than it should be
nop //; 1x NOP fails pretty hard header errors @ slow speed
//; moved second NOP between pp->od to try and reduce header errors @ low speed
//; nop //; 2x NOP seems pretty good, sometimes fails in slow speed, but pretty good @ high speed
//; can usually get 1 out of 12 low speed to fail, and sometimes HS will fail hard
//; nop //; 3x NOP seems more likely to fail @ low speed than 3x
//; hard to say if 3x NOP is actually better, it might succeed more on reads, but switching to HS seems to fail more often
b read_next_bit
.p2align 4
read_next_bit:
swim_pp
swim_od
//; header bit '1' is now high
//; give a little delay between push pulse and reading
//; this instruction can be performed out of order
//; this didn't end up being a real problem, but it can't hurt
lsl stream, #1
//; read bit, should be '1' for header on first read
//; sample and place value in carry, then rotate in
ldrh r0, [swim_base, #IDR]
and r0, swim_mask
//; Z flag contains inverse of bit
mrs r0, APSR
//; bit 30 of r0 contains inverse of bit
//; shift left to mask away any upper bits
lsl r0, #1
//; shift right to mask away lower bits
lsr r0, #31
//; shift stream and or in r0 (stream does have mask bit set from poll loop)
//; moved up to provide delay between push and read
//;lsl stream, #1
orr stream, r0
//; now stream holds the inverse stream (plus mask junk on upper half)
//; wait bit time, enable artifical pullup, and sample
mov r0, speed
sub r0, #2 //; need to save some delay for after pushing '0' high
bl delay_r0
//; push high for logic '0'
//;swim_pp
//;swim_od
//;mov r0, #2
//;bl delay_r0
//; above isn't always getting SWIM pin high for '0'
//; can only assume that it's too early
swim_pp
swim_od
mov r0, #2
bl delay_r0
//; seems to drop out at times..
//; adding a check here to verify that SWIM is high looks like it would
//; catch when the device drops out
//; check if last bit in read
sub len, #1
//;if last bit, go to stream end to prepare for ACK/NAK latch
bpl read_next_bit
//; last bit calc and send parity, or just always send ACK
//; could send back to poll header but not sure it's worth retrying..
//; not sure a failure would even allow us to properly send a NAK
always_send_ack:
//; ACK is a '1' from host, so need a short pulse low
//; this is actually a little late, but seems the device sends it a little late as well
//; so this happens to align pretty well with the device's timing
swim_lo
swim_pp
mov r0, #1
bl delay_r0
swim_hi
swim_od
//; check that SWIM is actually high
//; if device failed it's possible pairity still passed
//; but device sensed reset condition due to lack of legit pullup
//; in which case it would likely be outputing low now for 16usec
//; organize return data
//; MSB NAK/NORESP from last write
//; if ACK, then return read result
//; TIMEOUT, HEADER error, PAIRITY error
//; ACK entire transfer good!
//; LSB data read back
//; stream data sturcture
//; upper bits may contain swim_mask value
//; all values inverted:
//; b9 header '1' -> '0'
//; b8-1 data inverted 1's compliment
//; b0 pairity inverted
//; write corrupted data for testing
//; inverted 1 header bit - F0 data - 0 pairitybit
//; 0 0F 1
//; mov stream, #0x1F ;// good data should report 0xF0 - ACK
//; mov stream, #0x1E ;// toggle pairity data should report PAIRITY ERROR
;// bad header data
//; mov stream, #0x80
//; mov rv, #2
//; lsl stream, rv
//; mov rv, #1 ;// set correct pairity
//; orr stream, rv
//; store result as-is
mov rv, stream
//; check that header was read as '1' (from device) and stored as '0'
mov len, #0x80 ;// bad header data would have bit 9 set ('1' device header is inverted)
mov swim_mask, #2
lsl len, swim_mask ;// shift bit 7 into bit 9
and stream, len
//; should be zero if header stored as '0'
bne header_error
//; calc pairity
//; must add to registers to add with carry
//; swim_mask no longer needed
mov swim_mask, #0
//; store stream temporarily in len reg while shifting pairity bit to carry
lsr len, rv, #1
//; stream should be zero on entry
//; add inverted pairity bit
adc stream, swim_mask
lsr len, #1
//; add bit0-3
adc stream, swim_mask
lsr len, #1
adc stream, swim_mask
lsr len, #1
adc stream, swim_mask
lsr len, #1
adc stream, swim_mask
lsr len, #1
//; add bit4-7
adc stream, swim_mask
lsr len, #1
adc stream, swim_mask
lsr len, #1
adc stream, swim_mask
lsr len, #1
adc stream, swim_mask
//; individual sumation of all bits should be even when pairity included
//; but for inverted data, the sum should be odd
//; shift sum lsbit into carry and verify it's set (equates to odd)
lsr stream, #1
bcc pairity_error
//; all is good, just return the inverted data!
lsr rv, #1
//; mask out data alone
mov len, #0xFF
and rv, len
//; invert data to true data
eor rv, len
//; shift to upper byte
lsl rv, #8
//; add in the ACK to lower byte
add rv, #ACK
//; since things are little endian
//; the output is a 16bit int
//; so the value we output will be byte swapped
//; when interpreted as 16bit int
b exit_swim
header_error:
//; header wasn't '1' as expected when reading from device
mov rv, #HERR
b exit_swim
pairity_error:
mov rv, #PERR
b exit_swim
return_nak:
mov rv, #NAK
exit_swim:
//;r0 contains return value on exit (already done prior to this point)
//;r4-r11 & lr must be preserved to entry values
pop {speed}
mov rdwr, speed //;restore r8 high register can't pop/push
pop {pushpull, opendrain, stream, speed, pc}
//; bx lr //;Return by branching to the address in the link register.
.fnend
//;.globl delay_r0
//; ~83nsec per unit of delay change
.p2align 2
.type delay_r0,%function
count .req a1
delay_r0: //;Function entry point.
.fnstart
sub count, #1
bne delay_r0
bx lr
.fnend