fel: thunks: Fix fel-to-spl-thunk to be ARMv5TE compatible

Currently the thunk we upload into the SRAM is using DSB and ISB
instructions, which were introduced in ARMv7. Also it relies on
movw/movt pairs, which became available in ARMv6T2.
The Allwinner F1Cx00 SoCs are using an ARMv5TE compliant core, so they
do not know these instructions.

Change the code to be ARMv5TE compliant, so it can run on all relevant
Allwinner ARM cores:
- One movw is just used to compare two bits, replace that with a
  tst/tsteq sequence to skip the load.
- The other movw/movt pairs get replaced with ldr's, that load from
  literal storage at the end of the code (from Icenowy).
- The DSB and ISB get replaced with their CP15 MCR counterparts. Those
  are deprecated in ARMv7, but still work, when the CP15BEN bit is set
  in SCTLR. We check for this in fel.c (from Icenowy). ISB is not
  implemented on the ARM926, so make this conditional. A simple branch
  takes care of the desired pipeline flush for the old SoC.

Also remove the rather pointless Ruby prolog that generates the header
file. We have a less awkward version of this in the Makefile, and need
that for the other thunks there anyway, so it's just duplicated code.
Embedding a header generator in Ruby in an assembly file is a cute
gimmick, but serves no purpose anymore.

This is based on work by Icenowy, who put a similar solution in a
separate file.

Originally-by: Icenowy Zheng <icenowy@aosc.io>
Signed-off-by: Andre Przywara <osp@andrep.de>
This commit is contained in:
Andre Przywara 2022-01-20 01:14:54 +00:00
parent d5f4fd1e12
commit 0b49f88acf
3 changed files with 85 additions and 103 deletions

View File

@ -27,7 +27,7 @@ AWK_O_TO_H := LC_ALL=C awk -f objdump_to_h.awk
# The SPL thunk requires a different output format. The "style" variable for
# awk controls this, and causes the htole32() conversion to be omitted.
fel-to-spl-thunk.h: fel-to-spl-thunk.S FORCE
$(AS) -o $(subst .S,.o,$<) $<
$(AS) -o $(subst .S,.o,$<) -march=armv5te $<
$(OBJDUMP) -d $(subst .S,.o,$<) | $(AWK_O_TO_H) -v style=old > $@
$(THUNKS): %.h: %.S FORCE

View File

@ -21,45 +21,7 @@
* DEALINGS IN THE SOFTWARE.
*/
/*************************************************************************/
/* Usage instructions: "ruby -x fel-to-spl-thunk.S > fel-to-spl-thunk.h" */
/*************************************************************************/
/* Open a comment for gas.
Do not close the comment until after the Ruby code terminator (__END__).
Write the '*' '/' sequence of characters as "\x2a/" in string literals to
avoid doing so.
#!/usr/bin/env ruby
def tool_exists(tool_name)
`which #{tool_name} > /dev/null 2>&1`
return $?.to_i == 0
end
toolchains = [
"arm-none-eabi-",
"arm-linux-gnueabihf-",
"arm-none-linux-gnueabi-",
"armv7a-hardfloat-linux-gnueabi-",
]
toolchain = toolchains.find { |toolchain| tool_exists("#{toolchain}as") }
abort "Can't find any ARM crosscompiler\n" unless toolchain
system("#{toolchain}as -o #{$PROGRAM_NAME}.o #{$PROGRAM_NAME}")
exit($?.to_i) if $?.to_i != 0
`#{toolchain}objdump -d #{$PROGRAM_NAME}.o`.each_line {|l|
next unless l =~ /(\h+)\:\s+(\h+)\s+(\S+)\s+([^;]*)/
printf("\t0x%s, /* %8s: %-10s %-28s \x2a/\n", $2, $1, $3, $4.strip)
}
__END__
*/
/*************************************************************************/
.arm
BUF1 .req r0
BUF2 .req r1
@ -75,14 +37,7 @@ entry_point:
b setup_stack
stack_begin:
nop
nop
nop
nop
nop
nop
nop
nop
.space 32, 0xff
stack_end:
nop
@ -118,15 +73,14 @@ setup_stack: /* Save the original SP, LR and CPSR to stack */
/* Check if the instructions or data cache is enabled */
mrc p15, 0, TMP1, c1, c0, 0
movw TMP2, #((1 << 12) | (1 << 2))
tst TMP1, TMP2
tst TMP1, #(1 << 2)
tsteq TMP1, #(1 << 12)
bne cache_is_unsupported
bl swap_all_buffers
verify_checksum:
movw CHECKSUM, #0x6c39
movt CHECKSUM, #0x5f0a
ldr CHECKSUM, checksum_seed
mov BUF1, SPL_ADDR
ldr FULLSIZE, [BUF1, #16]
check_next_word:
@ -140,13 +94,27 @@ check_next_word:
bne checksum_is_bad
/* Change 'eGON.BT0' -> 'eGON.FEL' */
movw TMP1, (('F' << 8) + '.')
movt TMP1, (('L' << 8) + 'E')
ldr TMP1, egon_fel_str
str TMP1, [SPL_ADDR, #8]
/* Call the SPL code */
dsb
isb
/*
* Call the SPL code, but before that make sure the CPU sees the
* recently uploaded code. This requires a DSB and ISB.
* The "dsb" and "isb" *instructions* are not available in ARMv5TE,
* but at least for DSB we can use the CP15 register encoding. This
* works for ARMv7 and v8 as well, because we have checked our SCTLR
* before (in fel.c), so we know that CP15BEN is set.
* The ARM926 core does not implement ISB, instead the TRM recommends
* just a branch to achieve the same "flush the pipeline" effect.
* As just this is not sufficient for later cores, check the MIDR
* register, and do the DSB only for ARMv6 or later.
* The input register for the CP15 instruction is ignored.
*/
mcr p15, 0, TMP1, c7, c10, 4 /* CP15DSB */
mrc p15, 0, TMP1, c0, c0, 0 /* read MIDR */
and TMP1, TMP1, #(0xf << 16) /* architecture */
cmp TMP1, #(0x6 << 16) /* ARMv5TEJ */
mcrgt p15, 0, TMP1, c7, c5, 4 /* CP15ISB, if > ARMv5TEJ */
blx SPL_ADDR
/* Return back to FEL */
@ -154,15 +122,13 @@ check_next_word:
cache_is_unsupported:
/* Bail out if cache is enabled and change 'eGON.BT0' -> 'eGON.???' */
movw TMP1, (('?' << 8) + '.')
movt TMP1, (('?' << 8) + '?')
ldr TMP1, cache_enabled_str
str TMP1, [SPL_ADDR, #8]
b return_to_fel_noswap
checksum_is_bad:
/* The checksum test failed, so change 'eGON.BT0' -> 'eGON.BAD' */
movw TMP1, (('B' << 8) + '.')
movt TMP1, (('D' << 8) + 'A')
ldr TMP1, checksum_failed_str
str TMP1, [SPL_ADDR, #8]
return_to_fel:
@ -173,6 +139,15 @@ return_to_fel_noswap:
ldr sp, [sp]
bx lr
checksum_seed:
.word 0x5f0a6c39
egon_fel_str:
.ascii ".FEL"
cache_enabled_str:
.ascii ".???"
checksum_failed_str:
.ascii ".BAD"
appended_data:
/*
* The appended data uses the following format:

View File

@ -1,18 +1,18 @@
/* <entry_point>: */
0xea000015, /* 0: b 5c <setup_stack> */
/* <stack_begin>: */
0xe1a00000, /* 4: nop */
0xe1a00000, /* 8: nop */
0xe1a00000, /* c: nop */
0xe1a00000, /* 10: nop */
0xe1a00000, /* 14: nop */
0xe1a00000, /* 18: nop */
0xe1a00000, /* 1c: nop */
0xe1a00000, /* 20: nop */
0xffffffff, /* 4: .word 0xffffffff */
0xffffffff, /* 8: .word 0xffffffff */
0xffffffff, /* c: .word 0xffffffff */
0xffffffff, /* 10: .word 0xffffffff */
0xffffffff, /* 14: .word 0xffffffff */
0xffffffff, /* 18: .word 0xffffffff */
0xffffffff, /* 1c: .word 0xffffffff */
0xffffffff, /* 20: .word 0xffffffff */
/* <stack_end>: */
0xe1a00000, /* 24: nop */
/* <swap_all_buffers>: */
0xe28f40dc, /* 28: add r4, pc, #220 */
0xe28f40e8, /* 28: add r4, pc, #232 */
/* <swap_next_buffer>: */
0xe4940004, /* 2c: ldr r0, [r4], #4 */
0xe4941004, /* 30: ldr r1, [r4], #4 */
@ -28,7 +28,7 @@
0x1afffff9, /* 54: bne 40 <swap_next_word> */
0xeafffff3, /* 58: b 2c <swap_next_buffer> */
/* <setup_stack>: */
0xe59f80a4, /* 5c: ldr r8, [pc, #164] */
0xe59f80b0, /* 5c: ldr r8, [pc, #176] */
0xe24f0044, /* 60: sub r0, pc, #68 */
0xe520d004, /* 64: str sp, [r0, #-4]! */
0xe1a0d000, /* 68: mov sp, r0 */
@ -37,43 +37,50 @@
0xe38220c0, /* 74: orr r2, r2, #192 */
0xe121f002, /* 78: msr CPSR_c, r2 */
0xee112f10, /* 7c: mrc 15, 0, r2, cr1, cr0, {0} */
0xe3013004, /* 80: movw r3, #4100 */
0xe1120003, /* 84: tst r2, r3 */
0x1a000012, /* 88: bne d8 <cache_is_unsupported> */
0xe3120004, /* 80: tst r2, #4 */
0x03120a01, /* 84: tsteq r2, #4096 */
0x1a000013, /* 88: bne dc <cache_is_unsupported> */
0xebffffe5, /* 8c: bl 28 <swap_all_buffers> */
/* <verify_checksum>: */
0xe3067c39, /* 90: movw r7, #27705 */
0xe3457f0a, /* 94: movt r7, #24330 */
0xe1a00008, /* 98: mov r0, r8 */
0xe5905010, /* 9c: ldr r5, [r0, #16] */
0xe59f706c, /* 90: ldr r7, [pc, #108] */
0xe1a00008, /* 94: mov r0, r8 */
0xe5905010, /* 98: ldr r5, [r0, #16] */
/* <check_next_word>: */
0xe4902004, /* a0: ldr r2, [r0], #4 */
0xe2555004, /* a4: subs r5, r5, #4 */
0xe0877002, /* a8: add r7, r7, r2 */
0x1afffffb, /* ac: bne a0 <check_next_word> */
0xe598200c, /* b0: ldr r2, [r8, #12] */
0xe0577082, /* b4: subs r7, r7, r2, lsl #1 */
0x1a00000a, /* b8: bne e8 <checksum_is_bad> */
0xe304262e, /* bc: movw r2, #17966 */
0xe3442c45, /* c0: movt r2, #19525 */
0xe5882008, /* c4: str r2, [r8, #8] */
0xf57ff04f, /* c8: dsb sy */
0xf57ff06f, /* cc: isb sy */
0xe12fff38, /* d0: blx r8 */
0xea000006, /* d4: b f4 <return_to_fel> */
0xe4902004, /* 9c: ldr r2, [r0], #4 */
0xe2555004, /* a0: subs r5, r5, #4 */
0xe0877002, /* a4: add r7, r7, r2 */
0x1afffffb, /* a8: bne 9c <check_next_word> */
0xe598200c, /* ac: ldr r2, [r8, #12] */
0xe0577082, /* b0: subs r7, r7, r2, lsl #1 */
0x1a00000b, /* b4: bne e8 <checksum_is_bad> */
0xe59f2048, /* b8: ldr r2, [pc, #72] */
0xe5882008, /* bc: str r2, [r8, #8] */
0xee072f9a, /* c0: mcr 15, 0, r2, cr7, cr10, {4} */
0xee102f10, /* c4: mrc 15, 0, r2, cr0, cr0, {0} */
0xe202280f, /* c8: and r2, r2, #983040 */
0xe3520806, /* cc: cmp r2, #393216 */
0xce072f95, /* d0: mcrgt 15, 0, r2, cr7, cr5, {4} */
0xe12fff38, /* d4: blx r8 */
0xea000004, /* d8: b f0 <return_to_fel> */
/* <cache_is_unsupported>: */
0xe3032f2e, /* d8: movw r2, #16174 */
0xe3432f3f, /* dc: movt r2, #16191 */
0xe59f2028, /* dc: ldr r2, [pc, #40] */
0xe5882008, /* e0: str r2, [r8, #8] */
0xea000003, /* e4: b f8 <return_to_fel_noswap> */
0xea000002, /* e4: b f4 <return_to_fel_noswap> */
/* <checksum_is_bad>: */
0xe304222e, /* e8: movw r2, #16942 */
0xe3442441, /* ec: movt r2, #17473 */
0xe5882008, /* f0: str r2, [r8, #8] */
0xe59f2020, /* e8: ldr r2, [pc, #32] */
0xe5882008, /* ec: str r2, [r8, #8] */
/* <return_to_fel>: */
0xebffffcb, /* f4: bl 28 <swap_all_buffers> */
0xebffffcc, /* f0: bl 28 <swap_all_buffers> */
/* <return_to_fel_noswap>: */
0xe8bd4004, /* f8: pop {r2, lr} */
0xe121f002, /* fc: msr CPSR_c, r2 */
0xe59dd000, /* 100: ldr sp, [sp] */
0xe12fff1e, /* 104: bx lr */
0xe8bd4004, /* f4: pop {r2, lr} */
0xe121f002, /* f8: msr CPSR_c, r2 */
0xe59dd000, /* fc: ldr sp, [sp] */
0xe12fff1e, /* 100: bx lr */
/* <checksum_seed>: */
0x5f0a6c39, /* 104: .word 0x5f0a6c39 */
/* <egon_fel_str>: */
0x4c45462e, /* 108: .word 0x4c45462e */
/* <cache_enabled_str>: */
0x3f3f3f2e, /* 10c: .word 0x3f3f3f2e */
/* <checksum_failed_str>: */
0x4441422e, /* 110: .word 0x4441422e */