Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improvements to non 32-bit exception handler #7816

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 87 additions & 54 deletions cores/esp8266/core_esp8266_non32xfer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
*/

/*
* This exception handler, allows for byte or short accesses to iRAM or PROGMEM
* to succeed without causing a crash. It is still preferred to use the xxx_P
* macros whenever possible, since they are probably 30x faster than this
* exception handler method.
* This exception handler handles EXCCAUSE_LOAD_STORE_ERROR. It allows for a
* byte or short access to iRAM or PROGMEM to succeed without causing a crash.
* When reading, it is still preferred to use the xxx_P macros when possible
* since they are probably 30x faster than this exception handler method.
*
* Code taken directly from @pvvx's public domain code in
* https://github.com/pvvx/esp8266web/blob/master/app/sdklib/system/app_main.c
Expand All @@ -37,6 +37,16 @@
#include <Schedule.h>
#include <debug.h>

// All of these optimization were tried and now work
// These results were from irammem.ino using GCC 10.2
// DRAM reference uint16 9 AVG cycles/transfer
// #pragma GCC optimize("O0") // uint16, 289 AVG cycles/transfer, IRAM: +180
// #pragma GCC optimize("O1") // uint16, 241 AVG cycles/transfer, IRAM: +16
#pragma GCC optimize("O2") // uint16, 230 AVG cycles/transfer, IRAM: +4
// #pragma GCC optimize("O3") // uint16, 230 AVG cycles/transfer, IRAM: +4
// #pragma GCC optimize("Ofast") // uint16, 230 AVG cycles/transfer, IRAM: +4
// #pragma GCC optimize("Os") // uint16, 233 AVG cycles/transfer, IRAM: 27556 +0

extern "C" {

#define LOAD_MASK 0x00f00fu
Expand All @@ -50,32 +60,55 @@ extern "C" {

static fn_c_exception_handler_t old_c_handler = NULL;

static IRAM_ATTR void non32xfer_exception_handler(struct __exception_frame *ef, int cause)
static
IRAM_ATTR void non32xfer_exception_handler(struct __exception_frame *ef, int cause)
{
do {
/*
Had to split out some of the asm, compiler was reusing a register that it
needed later. A crash would come or go away with the slightest unrelated
changes elsewhere in the function.

Register a15 was used for epc1, then clobbered for rsr. Maybe an
__asm("":::"memory") before starting the asm would help for these cases.
For this instance moved setting epc1 closer to where it was used.
Edit. "&" on output register would have resolved the problem.
Refactored to reduce and consolidate register usage.
In adapting the public domain version, a crash would come or go away with
the slightest unrelated changes elsewhere in the function. Observed that
register a15 was used for epc1, then clobbered by `rsr.` I now believe a
"&" on the output register would have resolved the problem.

However, I have refactored the Extended ASM to reduce and consolidate
register usage and corrected the issue.

The positioning of the Extended ASM block (as early as possible in the
compiled function) is in part controlled by the immediate need for
output variable `insn`. This placement aids in getting excvaddr read as
early as possible.
*/
uint32_t insn;
__asm(
"movi %0, ~3\n\t" /* prepare a mask for the EPC */
"and %0, %0, %1\n\t" /* apply mask for 32bit aligned base */
"ssa8l %1\n\t" /* set up shift register for src op */
"l32i %1, %0, 0\n\t" /* load part 1 */
"l32i %0, %0, 4\n\t" /* load part 2 */
"src %0, %0, %1\n\t" /* right shift to get faulting instruction */
:"=&r"(insn)
:"r"(ef->epc)
:
);
uint32_t insn, excvaddr;
#if 1
{
uint32_t tmp;
__asm__ (
"rsr.excvaddr %[vaddr]\n\t" /* Read faulting address as early as possible */
"movi.n %[tmp], ~3\n\t" /* prepare a mask for the EPC */
"and %[tmp], %[tmp], %[epc]\n\t" /* apply mask for 32-bit aligned base */
"ssa8l %[epc]\n\t" /* set up shift register for src op */
"l32i %[insn], %[tmp], 0\n\t" /* load part 1 */
"l32i %[tmp], %[tmp], 4\n\t" /* load part 2 */
"src %[insn], %[tmp], %[insn]\n\t" /* right shift to get faulting instruction */
: [vaddr]"=&r"(excvaddr), [insn]"=&r"(insn), [tmp]"=&r"(tmp)
: [epc]"r"(ef->epc) :);
}

#else
{
__asm__ __volatile__ ("rsr.excvaddr %0;" : "=r"(excvaddr):: "memory");
/*
"C" reference code for the ASM to document intent.
May also prove useful when issolating possible issues with Extended ASM,
optimizations, new compilers, etc.
*/
uint32_t epc = ef->epc;
uint32_t *pWord = (uint32_t *)(epc & ~3);
uint64_t big_word = ((uint64_t)pWord[1] << 32) | pWord[0];
uint32_t pos = (epc & 3) * 8;
insn = (uint32_t)(big_word >>= pos);
}
#endif

uint32_t what = insn & LOAD_MASK;
uint32_t valmask = 0;
Expand All @@ -102,10 +135,6 @@ static IRAM_ATTR void non32xfer_exception_handler(struct __exception_frame *ef,
--regno; /* account for skipped a1 in exception_frame */
}

uint32_t excvaddr;
/* read out the faulting address */
__asm("rsr %0, EXCVADDR;" :"=r"(excvaddr)::);

#ifdef DEBUG_ESP_MMU
/* debug option to validate address so we don't hide memory access bugs in APP */
if (mmu_is_iram((void *)excvaddr) || (is_read && mmu_is_icache((void *)excvaddr))) {
Expand All @@ -114,31 +143,34 @@ static IRAM_ATTR void non32xfer_exception_handler(struct __exception_frame *ef,
continue; /* fail */
}
#endif

if (is_read) {
/* Load, shift and mask down to correct size */
uint32_t val = (*(uint32_t *)(excvaddr & ~0x3));
val >>= (excvaddr & 0x3) * 8;
val &= valmask;

/* Sign-extend for L16SI, if applicable */
if (what == L16SI_MATCH && (val & 0x8000)) {
val |= 0xffff0000;
{
uint32_t *pWord = (uint32_t *)(excvaddr & ~0x3);
uint32_t pos = (excvaddr & 0x3) * 8;
uint32_t mem_val = *pWord;

if (is_read) {
/* shift and mask down to correct size */
mem_val >>= pos;
mem_val &= valmask;

/* Sign-extend for L16SI, if applicable */
if (what == L16SI_MATCH && (mem_val & 0x8000)) {
mem_val |= 0xffff0000;
}

ef->a_reg[regno] = mem_val; /* carry out the load */

} else { /* is write */
uint32_t val = ef->a_reg[regno]; /* get value to store from register */
val <<= pos;
valmask <<= pos;
val &= valmask;

/* mask out field, and merge */
mem_val &= (~valmask);
mem_val |= val;
*pWord = mem_val; /* carry out the store */
}

ef->a_reg[regno] = val; /* carry out the load */

} else { /* is write */
uint32_t val = ef->a_reg[regno]; /* get value to store from register */
val <<= (excvaddr & 0x3) * 8;
valmask <<= (excvaddr & 0x3) * 8;
val &= valmask;

/* Load, mask out field, and merge */
uint32_t dst_val = (*(uint32_t *)(excvaddr & ~0x3));
dst_val &= (~valmask);
dst_val |= val;
(*(uint32_t *)(excvaddr & ~0x3)) = dst_val; /* carry out the store */
}

ef->epc += 3; /* resume at following instruction */
Expand Down Expand Up @@ -201,6 +233,7 @@ static void _set_exception_handler_wrapper(int cause) {
}
}

void install_non32xfer_exception_handler(void) __attribute__((weak));
void install_non32xfer_exception_handler(void) {
if (NULL == old_c_handler) {
// Set the "C" exception handler the wrapper will call
Expand Down
4 changes: 3 additions & 1 deletion cores/esp8266/exc-c-wrapper-handler.S
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,9 @@ _xtos_c_wrapper_handler:
// Restore special registers
l32i a14, a1, UEXC_sar

// load early - saves two cycles - @mhightower83
movi a0, _xtos_return_from_exc

// For compatibility with Arduino interrupt architecture, we keep interrupts 100%
// disabled.
// /*
Expand All @@ -201,7 +204,6 @@ _xtos_c_wrapper_handler:
// rsil a12, 1 // XCHAL_EXCM_LEVEL
rsil a12, 15 // All levels blocked.
wsr a14, SAR
movi a0, _xtos_return_from_exc
jx a0

/* FIXME: what about _GeneralException ? */
Expand Down