diff --git a/CHANGELOG.md b/CHANGELOG.md index a1bae5fba5e..9d8d746eb74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Bumped MSRV to 1.67 (#798) +- Optimised multi-core critical section implementation (#797) ### Fixed diff --git a/esp-hal-common/Cargo.toml b/esp-hal-common/Cargo.toml index cce0468cb88..fe44cb716de 100644 --- a/esp-hal-common/Cargo.toml +++ b/esp-hal-common/Cargo.toml @@ -26,7 +26,6 @@ embedded-io = "0.5.0" esp-synopsys-usb-otg = { version = "0.3.2", optional = true, features = ["fs", "esp32sx"] } fugit = "0.3.7" log = { version = "0.4.20", optional = true } -lock_api = { version = "0.4.10", optional = true } nb = "1.1.0" paste = "1.0.14" procmacros = { version = "0.6.1", package = "esp-hal-procmacros", path = "../esp-hal-procmacros" } @@ -70,13 +69,13 @@ basic-toml = "0.1.4" serde = { version = "1.0.188", features = ["derive"] } [features] -esp32 = ["esp32/rt", "xtensa", "xtensa-lx/esp32", "xtensa-lx-rt/esp32", "lock_api", "procmacros/esp32"] +esp32 = ["esp32/rt", "xtensa", "xtensa-lx/esp32", "xtensa-lx-rt/esp32", "procmacros/esp32"] esp32c2 = ["esp32c2/rt", "riscv", "procmacros/esp32c2"] esp32c3 = ["esp32c3/rt", "riscv", "procmacros/esp32c3"] esp32c6 = ["esp32c6/rt", "riscv", "procmacros/esp32c6"] esp32h2 = ["esp32h2/rt", "riscv", "procmacros/esp32h2"] esp32s2 = ["esp32s2/rt", "xtensa", "xtensa-lx/esp32s2", "xtensa-lx-rt/esp32s2", "esp-synopsys-usb-otg", "usb-device", "procmacros/esp32s2"] -esp32s3 = ["esp32s3/rt", "xtensa", "xtensa-lx/esp32s3", "xtensa-lx-rt/esp32s3", "lock_api", "esp-synopsys-usb-otg", "usb-device", "procmacros/esp32s3"] +esp32s3 = ["esp32s3/rt", "xtensa", "xtensa-lx/esp32s3", "xtensa-lx-rt/esp32s3", "esp-synopsys-usb-otg", "usb-device", "procmacros/esp32s3"] # Crystal frequency selection (ESP32 and ESP32-C2 only!) esp32_26mhz = [] diff --git a/esp-hal-common/src/lib.rs b/esp-hal-common/src/lib.rs index 702e2c50753..8c3f97d3ae2 100644 --- a/esp-hal-common/src/lib.rs +++ b/esp-hal-common/src/lib.rs @@ -181,19 +181,23 @@ pub enum Cpu { AppCpu, } +#[cfg(all(xtensa, multi_core))] +fn get_raw_core() -> u32 { + xtensa_lx::get_processor_id() & 0x2000 +} + /// Which core the application is currently executing on +#[cfg(all(xtensa, multi_core))] pub fn get_core() -> Cpu { - #[cfg(all(xtensa, multi_core))] - match ((xtensa_lx::get_processor_id() >> 13) & 1) != 0 { - false => Cpu::ProCpu, - true => Cpu::AppCpu, + match get_raw_core() { + 0 => Cpu::ProCpu, + _ => Cpu::AppCpu, } +} - // #[cfg(all(riscv, multi_core))] - // TODO get hart_id - - // single core always has ProCpu only - #[cfg(single_core)] +/// Which core the application is currently executing on +#[cfg(not(all(xtensa, multi_core)))] +pub fn get_core() -> Cpu { Cpu::ProCpu } @@ -204,30 +208,50 @@ mod critical_section_impl { #[cfg(xtensa)] mod xtensa { + // PS has 15 useful bits. Bits 12..16 and 19..32 are unused, so we can use bit + // #31 as our reentry flag. + #[cfg(multi_core)] + const REENTRY_FLAG: u32 = 1 << 31; + unsafe impl critical_section::Impl for super::CriticalSection { unsafe fn acquire() -> critical_section::RawRestoreState { - let tkn: critical_section::RawRestoreState; + let mut tkn: critical_section::RawRestoreState; core::arch::asm!("rsil {0}, 5", out(reg) tkn); #[cfg(multi_core)] { - let guard = super::multicore::MULTICORE_LOCK.lock(); - core::mem::forget(guard); // forget it so drop doesn't run + use super::multicore::{LockKind, MULTICORE_LOCK}; + + match MULTICORE_LOCK.lock() { + LockKind::Lock => { + // We can assume the reserved bit is 0 otherwise + // rsil - wsr pairings would be undefined behavior + } + LockKind::Reentry => tkn |= REENTRY_FLAG, + } } tkn } unsafe fn release(token: critical_section::RawRestoreState) { - if token != 0 { - #[cfg(multi_core)] - { - debug_assert!(super::multicore::MULTICORE_LOCK.is_owned_by_current_thread()); - // safety: we logically own the mutex from acquire() - super::multicore::MULTICORE_LOCK.force_unlock(); + #[cfg(multi_core)] + { + use super::multicore::MULTICORE_LOCK; + + debug_assert!(MULTICORE_LOCK.is_owned_by_current_thread()); + + if token & REENTRY_FLAG != 0 { + return; } - core::arch::asm!( - "wsr.ps {0}", - "rsync", in(reg) token) + + MULTICORE_LOCK.unlock(); } + + const RESERVED_MASK: u32 = 0b1111_1111_1111_1000_1111_0000_0000_0000; + debug_assert!(token & RESERVED_MASK == 0); + + core::arch::asm!( + "wsr.ps {0}", + "rsync", in(reg) token) } } } @@ -236,28 +260,45 @@ mod critical_section_impl { mod riscv { use esp_riscv_rt::riscv; + #[cfg(multi_core)] + // The restore state is a u8 that is casted from a bool, so it has a value of + // 0x00 or 0x01 before we add the reentry flag to it. + const REENTRY_FLAG: u8 = 1 << 7; + unsafe impl critical_section::Impl for super::CriticalSection { unsafe fn acquire() -> critical_section::RawRestoreState { let mut mstatus = 0u32; core::arch::asm!("csrrci {0}, mstatus, 8", inout(reg) mstatus); - let interrupts_active = (mstatus & 0b1000) != 0; + let mut tkn = ((mstatus & 0b1000) != 0) as critical_section::RawRestoreState; + #[cfg(multi_core)] { - let guard = multicore::MULTICORE_LOCK.lock(); - core::mem::forget(guard); // forget it so drop doesn't run + use super::multicore::{LockKind, MULTICORE_LOCK}; + + match MULTICORE_LOCK.lock() { + LockKind::Lock => {} + LockKind::Reentry => tkn |= REENTRY_FLAG, + } } - interrupts_active as _ + tkn } unsafe fn release(token: critical_section::RawRestoreState) { - if token != 0 { - #[cfg(multi_core)] - { - debug_assert!(multicore::MULTICORE_LOCK.is_owned_by_current_thread()); - // safety: we logically own the mutex from acquire() - multicore::MULTICORE_LOCK.force_unlock(); + #[cfg(multi_core)] + { + use super::multicore::MULTICORE_LOCK; + + debug_assert!(MULTICORE_LOCK.is_owned_by_current_thread()); + + if token & REENTRY_FLAG != 0 { + return; } + + MULTICORE_LOCK.unlock(); + } + + if token != 0 { riscv::interrupt::enable(); } } @@ -266,50 +307,70 @@ mod critical_section_impl { #[cfg(multi_core)] mod multicore { - use core::sync::atomic::{AtomicBool, Ordering}; + use core::sync::atomic::{AtomicUsize, Ordering}; - use lock_api::{GetThreadId, GuardSend, RawMutex}; + // We're using a value that we know get_raw_core() will never return. This + // avoids an unnecessary increment of the core ID. + #[cfg(xtensa)] // TODO: first multi-core RISC-V target will show if this value is OK + // globally or only for Xtensa + const UNUSED_THREAD_ID_VALUE: usize = 0x0001; - use crate::get_core; + fn thread_id() -> usize { + crate::get_raw_core() as usize + } - /// Reentrant Mutex - /// - /// Currently implemented using an atomic spin lock. - /// In the future we can optimize this raw mutex to use some hardware - /// features. - pub(crate) static MULTICORE_LOCK: lock_api::ReentrantMutex = - lock_api::ReentrantMutex::const_new(RawSpinlock::INIT, RawThreadId::INIT, ()); + pub(super) static MULTICORE_LOCK: ReentrantMutex = ReentrantMutex::new(); - pub(crate) struct RawThreadId; + pub(super) enum LockKind { + Lock = 0, + Reentry, + } - unsafe impl lock_api::GetThreadId for RawThreadId { - const INIT: Self = RawThreadId; + pub(super) struct ReentrantMutex { + owner: AtomicUsize, + } - fn nonzero_thread_id(&self) -> core::num::NonZeroUsize { - core::num::NonZeroUsize::new((get_core() as usize) + 1).unwrap() + impl ReentrantMutex { + const fn new() -> Self { + Self { + owner: AtomicUsize::new(UNUSED_THREAD_ID_VALUE), + } } - } - pub(crate) struct RawSpinlock(AtomicBool); + pub fn is_owned_by_current_thread(&self) -> bool { + self.owner.load(Ordering::Relaxed) == thread_id() + } + + pub(super) fn lock(&self) -> LockKind { + let current_thread_id = thread_id(); - unsafe impl lock_api::RawMutex for RawSpinlock { - const INIT: RawSpinlock = RawSpinlock(AtomicBool::new(false)); + if self.try_lock(current_thread_id) { + return LockKind::Lock; + } + + let current_owner = self.owner.load(Ordering::Relaxed); + if current_owner == current_thread_id { + return LockKind::Reentry; + } - // A spinlock guard can be sent to another thread and unlocked there - type GuardMarker = GuardSend; + while !self.try_lock(current_thread_id) {} - fn lock(&self) { - while !self.try_lock() {} + LockKind::Lock } - fn try_lock(&self) -> bool { - self.0 - .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) + fn try_lock(&self, new_owner: usize) -> bool { + self.owner + .compare_exchange( + UNUSED_THREAD_ID_VALUE, + new_owner, + Ordering::Acquire, + Ordering::Relaxed, + ) .is_ok() } - unsafe fn unlock(&self) { - self.0.store(false, Ordering::Release); + pub(super) fn unlock(&self) { + self.owner.store(UNUSED_THREAD_ID_VALUE, Ordering::Release); } } } diff --git a/esp32s3-hal/ld/db-esp32s3.x b/esp32s3-hal/ld/db-esp32s3.x index 0d41b3a415a..37bf0675382 100644 --- a/esp32s3-hal/ld/db-esp32s3.x +++ b/esp32s3-hal/ld/db-esp32s3.x @@ -97,7 +97,7 @@ SECTIONS { } _text_size = _etext - _stext; - .rodata ORIGIN(RODATA) + 0x408 + _text_size : AT(_text_size + SIZEOF(.header) + SIZEOF(.pre_header)) + .rodata ORIGIN(RODATA) + 0x408 + _text_size : { _rodata_start = ABSOLUTE(.); . = ALIGN (4); @@ -106,8 +106,7 @@ SECTIONS { _rodata_end = ABSOLUTE(.); } - .rwtext ORIGIN(RWTEXT) + 0x408 + _text_size + SIZEOF(.rodata) : - AT(_text_size + SIZEOF(.header) + SIZEOF(.pre_header) + SIZEOF(.rodata)) + .rwtext ORIGIN(RWTEXT) + 0x408 + _text_size + SIZEOF(.rodata) : { _irwtext = ORIGIN(RODATA) + 0x408 + _text_size + SIZEOF(.rodata); _srwtext = .; @@ -156,7 +155,6 @@ SECTIONS { } .data ORIGIN(RWDATA) : - AT(_text_size + SIZEOF(.header) + SIZEOF(.pre_header) + SIZEOF(.rodata) + SIZEOF(.rwtext)) { _data_start = ABSOLUTE(.); . = ALIGN (4); @@ -164,10 +162,9 @@ SECTIONS { . = ALIGN (4); _data_end = ABSOLUTE(.); } - /* LMA of .data */ - _sidata = ORIGIN(RODATA) + _text_size + SIZEOF(.header) + SIZEOF(.pre_header) + SIZEOF(.rodata) + SIZEOF(.rwtext); + _sidata = _erwtext - ORIGIN(RWTEXT) + ORIGIN(RODATA); .bss (NOLOAD) : ALIGN(4) { @@ -193,7 +190,6 @@ SECTIONS { } > RWDATA .rtc_fast.text ORIGIN(rtc_fast_iram_seg) : - AT(_text_size + SIZEOF(.header) + SIZEOF(.pre_header) + SIZEOF(.rodata) + SIZEOF(.rwtext) ) { . = ALIGN(4); _rtc_fast_text_start = ABSOLUTE(.); @@ -204,7 +200,6 @@ SECTIONS { _irtc_fast_text = ORIGIN(RODATA) + _text_size + SIZEOF(.header) + SIZEOF(.pre_header) + SIZEOF(.rodata) + SIZEOF(.rwtext); .rtc_fast.data ORIGIN(rtc_fast_dram_seg) + SIZEOF(.rtc_fast.text) : - AT(_text_size + SIZEOF(.header) + SIZEOF(.pre_header) + SIZEOF(.rodata) + SIZEOF(.rwtext) + SIZEOF(.rtc_fast.text) ) { . = ALIGN(4); _rtc_fast_data_start = ABSOLUTE(.); @@ -215,8 +210,6 @@ SECTIONS { _irtc_fast_data = ORIGIN(RODATA) + _text_size + SIZEOF(.header) + SIZEOF(.pre_header) + SIZEOF(.rodata) + SIZEOF(.rwtext) + SIZEOF(.rtc_fast.text); .rtc_fast.bss ORIGIN(rtc_fast_dram_seg) + SIZEOF(.rtc_fast.text) + SIZEOF(.rtc_fast.data) (NOLOAD) : - AT(_text_size + SIZEOF(.header) + SIZEOF(.pre_header) + SIZEOF(.rodata) + - SIZEOF(.rwtext) + SIZEOF(.rtc_fast.text) + SIZEOF(.rtc_fast.data)) { . = ALIGN(4); _rtc_fast_bss_start = ABSOLUTE(.); @@ -233,8 +226,6 @@ SECTIONS { } .rtc_slow.text ORIGIN(rtc_slow_seg) : - AT(_text_size + SIZEOF(.header) + SIZEOF(.pre_header) + SIZEOF(.rodata) + SIZEOF(.rwtext) + - SIZEOF(.rtc_fast.text) + SIZEOF(.rtc_fast.data) + SIZEOF(.rtc_fast.bss)) { . = ALIGN(4); _rtc_slow_text_start = ABSOLUTE(.); @@ -246,8 +237,6 @@ SECTIONS { SIZEOF(.rtc_fast.text) + SIZEOF(.rtc_fast.data) + SIZEOF(.rtc_fast.bss); .rtc_slow.data ORIGIN(rtc_slow_seg) + SIZEOF(.rtc_slow.text) : - AT(_text_size + SIZEOF(.header) + SIZEOF(.pre_header) + SIZEOF(.rodata) + SIZEOF(.rwtext) + - SIZEOF(.rtc_fast.text) + SIZEOF(.rtc_fast.data) + SIZEOF(.rtc_fast.bss) + SIZEOF(.rtc_slow.text)) { . = ALIGN(4); _rtc_slow_data_start = ABSOLUTE(.); @@ -259,8 +248,6 @@ SECTIONS { SIZEOF(.rtc_fast.text) + SIZEOF(.rtc_fast.data) + SIZEOF(.rtc_fast.bss) + SIZEOF(.rtc_slow.text); .rtc_slow.bss ORIGIN(rtc_slow_seg) + SIZEOF(.rtc_slow.text) + SIZEOF(.rtc_slow.data) (NOLOAD) : - AT(_text_size + SIZEOF(.header) + SIZEOF(.pre_header) + SIZEOF(.rodata) + SIZEOF(.rwtext) + - SIZEOF(.rtc_fast.text) + SIZEOF(.rtc_fast.data) + SIZEOF(.rtc_fast.bss) + SIZEOF(.rtc_slow.text) + SIZEOF(.rtc_slow.data)) { . = ALIGN(4); _rtc_slow_bss_start = ABSOLUTE(.);