diff --git a/esp-hal-common/.gitignore b/esp-hal-common/.gitignore new file mode 100644 index 00000000000..8f61cef6fae --- /dev/null +++ b/esp-hal-common/.gitignore @@ -0,0 +1 @@ +rust-toolchain.toml diff --git a/esp-hal-common/Cargo.toml b/esp-hal-common/Cargo.toml index 1b350fcf4ea..4c275c22c7d 100644 --- a/esp-hal-common/Cargo.toml +++ b/esp-hal-common/Cargo.toml @@ -50,11 +50,11 @@ ufmt-write = { version = "0.1.0", optional = true } # Each supported device MUST have its PAC included below along with a # corresponding feature. We rename the PAC packages because we cannot # have dependencies and features with the same names. -esp32 = { version = "0.15.0", features = ["critical-section"], optional = true } +esp32 = { version = "0.16.0", features = ["critical-section"], optional = true } esp32c2 = { version = "0.5.1", features = ["critical-section"], optional = true } esp32c3 = { version = "0.8.1", features = ["critical-section"], optional = true } esp32s2 = { version = "0.6.0", features = ["critical-section"], optional = true } -esp32s3 = { version = "0.8.0", features = ["critical-section"], optional = true } +esp32s3 = { version = "0.9.0", features = ["critical-section"], optional = true } [features] esp32 = ["esp32/rt" , "procmacros/xtensa", "xtensa-lx-rt/esp32", "xtensa-lx/esp32", "critical-section/restore-state-u32", "lock_api"] diff --git a/esp-hal-common/src/lib.rs b/esp-hal-common/src/lib.rs index fedb8742437..aedc9493879 100644 --- a/esp-hal-common/src/lib.rs +++ b/esp-hal-common/src/lib.rs @@ -48,6 +48,7 @@ pub use self::{ timer::Timer, }; +pub mod sha; pub mod analog; pub mod clock; pub mod delay; diff --git a/esp-hal-common/src/sha.rs b/esp-hal-common/src/sha.rs new file mode 100644 index 00000000000..e56c20c2630 --- /dev/null +++ b/esp-hal-common/src/sha.rs @@ -0,0 +1,516 @@ +use core::convert::Infallible; + +use crate::pac::SHA; + +// All the hash algorithms introduced in FIPS PUB 180-4 Spec. +// – SHA-1 +// – SHA-224 +// – SHA-256 +// – SHA-384 +// – SHA-512 +// – SHA-512/224 +// – SHA-512/256 +// – SHA-512/t (not implemented yet) +// Two working modes +// – Typical SHA +// – DMA-SHA (not implemented yet) + +const ALIGN_SIZE: usize = core::mem::size_of::(); + +// ESP32 does reversed order +#[cfg(esp32)] +const U32_FROM_BYTES: fn([u8; 4]) -> u32 = u32::from_be_bytes; + +#[cfg(not(esp32))] +const U32_FROM_BYTES: fn([u8; 4]) -> u32 = u32::from_ne_bytes; + +// The alignment helper helps you write to registers that only accepts u32 using +// regular u8s (bytes) It keeps a write buffer of 4 u8 (could in theory be 3 but +// less convient) And if the incoming data is not convertable to u32 (i.e. not a +// multiple of 4 in length) it will store the remainder in the buffer until the +// next call +// +// It assumes incoming `dst` are aligned to desired layout (in future +// ptr.is_aligned can be used) It also assumes that writes are done in FIFO +// order +#[derive(Debug)] +struct AlignmentHelper { + buf: [u8; ALIGN_SIZE], + buf_fill: usize, +} + +impl AlignmentHelper { + pub fn default() -> AlignmentHelper { + AlignmentHelper { + buf: [0u8; ALIGN_SIZE], + buf_fill: 0, + } + } + + // This function will write any remaining buffer to dst and return the amount of + // *bytes* written (0 means no write) + pub unsafe fn flush_to(&mut self, dst: *mut u32) -> usize { + if self.buf_fill != 0 { + for i in self.buf_fill..ALIGN_SIZE { + self.buf[i] = 0; + } + + dst.write_volatile(U32_FROM_BYTES(self.buf)); + } + + let flushed = self.buf_fill; + self.buf_fill = 0; + + return flushed; + } + + // This function is similar to `volatile_set_memory` but will prepend data that + // was previously ingested and ensure aligned (u32) writes + #[allow(unused)] + pub unsafe fn volatile_write_bytes(&mut self, dst: *mut u32, val: u8, count: usize) { + let mut cursor = 0; + if self.buf_fill != 0 { + for i in self.buf_fill..ALIGN_SIZE { + self.buf[i] = val; + } + + dst.write_volatile(U32_FROM_BYTES(self.buf)); + cursor = 1; + + self.buf_fill = 0; + } + + core::ptr::write_bytes(dst.add(cursor), val, count); + } + + // This function is similar to `volatile_copy_nonoverlapping_memory`, however it + // buffers up to a u32 in order to always write to registers in an aligned + // way. Additionally it will keep stop writing when the end of the register + // (defined by `dst_bound` relative to `dst`) and returns the remaining data + // (if not possible to write everything), and if it wrote till dst_bound or + // exited early (due to lack of data). + pub unsafe fn aligned_volatile_copy<'a>( + &mut self, + dst: *mut u32, + src: &'a [u8], + dst_bound: usize, + ) -> (&'a [u8], bool) { + assert!(dst_bound > 0); + + let mut nsrc = src; + let mut cursor = 0; + if self.buf_fill != 0 { + // First prepend existing data + let max_fill = ALIGN_SIZE - self.buf_fill; + let (nbuf, src) = src.split_at(core::cmp::min(src.len(), max_fill)); + nsrc = src; + for i in 0..max_fill { + match nbuf.get(i) { + Some(v) => { + self.buf[self.buf_fill + i] = *v; + self.buf_fill += 1; + } + None => return (&[], false), // Used up entire buffer before filling buff_fil + } + } + + dst.write_volatile(U32_FROM_BYTES(self.buf)); + cursor += 1; + + self.buf_fill = 0; + } + + if dst_bound <= cursor * ALIGN_SIZE { + return (nsrc, true); + } + + let (to_write, remaining) = nsrc.split_at(core::cmp::min( + dst_bound - cursor * ALIGN_SIZE, + (nsrc.len() / ALIGN_SIZE) * ALIGN_SIZE, // TODO: unstable div_floor for clarity? + )); + + if to_write.len() > 0 { + // Raw v_c_n_m also works but only when src.len() >= 4 * ALIGN_SIZE, otherwise + // it be broken + // core::intrinsics::volatile_copy_nonoverlapping_memory::(dst.add(cursor), + // to_write.as_ptr() as *const u32, to_write.len()/alignment); + for (i, v) in to_write.chunks_exact(ALIGN_SIZE).enumerate() { + dst.add(i) + .write_volatile(U32_FROM_BYTES(v.try_into().unwrap()).to_be()); + } + } + + // If it's data we can't store we don't need to try and align it, just wait for + // next write Generally this applies when (src/4*4) != src + let was_bounded = dst_bound - to_write.len() == 0; + if remaining.len() > 0 && remaining.len() < 4 { + for i in 0..remaining.len() { + self.buf[i] = remaining[i]; + } + + self.buf_fill = remaining.len(); + + return (&[], was_bounded); + } + + return (remaining, was_bounded); + } +} + +#[derive(Debug)] +pub struct Sha { + sha: SHA, + mode: ShaMode, + alignment_helper: AlignmentHelper, + cursor: usize, + first_run: bool, + finished: bool, +} + +#[derive(Debug, Clone, Copy)] +pub enum ShaMode { + SHA1, + #[cfg(not(esp32))] + SHA224, + SHA256, + #[cfg(any(esp32s2, esp32s3, esp32))] + SHA384, + #[cfg(any(esp32s2, esp32s3, esp32))] + SHA512, + #[cfg(any(esp32s2, esp32s3))] + SHA512_224, + #[cfg(any(esp32s2, esp32s3))] + SHA512_256, + // SHA512_(u16) // Max 511 +} + +// TODO: Maybe make Sha Generic (Sha) in order to allow for better +// compiler optimizations? (Requires complex const generics which isn't stable +// yet) + +#[cfg(not(esp32))] +fn mode_as_bits(mode: ShaMode) -> u8 { + match mode { + ShaMode::SHA1 => 0, + ShaMode::SHA224 => 1, + ShaMode::SHA256 => 2, + #[cfg(any(esp32s2, esp32s3))] + ShaMode::SHA384 => 3, + #[cfg(any(esp32s2, esp32s3))] + ShaMode::SHA512 => 4, + #[cfg(any(esp32s2, esp32s3))] + ShaMode::SHA512_224 => 5, + #[cfg(any(esp32s2, esp32s3))] + ShaMode::SHA512_256 => 6, + // _ => 0 // TODO: SHA512/t + } +} + +// TODO: Allow/Implemenet SHA512_(u16) + +// A few notes on this implementation with regards to 'memcpy', +// - It seems that ptr::write_bytes already acts as volatile, while ptr::copy_* +// does not (in this case) +// - The registers are *not* cleared after processing, so padding needs to be +// written out +// - This component uses core::intrinsics::volatile_* which is unstable, but is +// the only way to +// efficiently copy memory with volatile +// - For this particular registers (and probably others), a full u32 needs to be +// written partial +// register writes (i.e. in u8 mode) does not work +// - This means that we need to buffer bytes coming in up to 4 u8's in order +// to create a full u32 + +// This implementation might fail after u32::MAX/8 bytes, to increase please see +// ::finish() length/self.cursor usage +impl Sha { + pub fn new(sha: SHA, mode: ShaMode) -> Self { + // Setup SHA Mode + #[cfg(not(esp32))] + sha.mode + .write(|w| unsafe { w.mode().bits(mode_as_bits(mode)) }); + + Self { + sha, + mode, + cursor: 0, + first_run: true, + finished: false, + alignment_helper: AlignmentHelper::default(), + } + } + + pub fn first_run(&self) -> bool { + self.first_run + } + + pub fn finished(&self) -> bool { + self.finished + } + + #[cfg(not(esp32))] + fn process_buffer(&mut self) { + // FIXME: SHA_START_REG & SHA_CONTINUE_REG are wrongly marked as RO (they are + // WO) + if self.first_run { + // Set SHA_START_REG + unsafe { + self.sha.start.as_ptr().write_volatile(1u32); + } + self.first_run = false; + } else { + // SET SHA_CONTINUE_REG + unsafe { + self.sha.continue_.as_ptr().write_volatile(1u32); + } + } + } + + #[cfg(esp32)] + fn process_buffer(&mut self) { + if self.first_run { + match self.mode { + ShaMode::SHA1 => self.sha.sha1_start.write(|w| unsafe { w.bits(1) }), + ShaMode::SHA256 => self.sha.sha256_start.write(|w| unsafe { w.bits(1) }), + ShaMode::SHA384 => self.sha.sha384_start.write(|w| unsafe { w.bits(1) }), + ShaMode::SHA512 => self.sha.sha512_start.write(|w| unsafe { w.bits(1) }), + } + self.first_run = false; + } else { + match self.mode { + ShaMode::SHA1 => self.sha.sha1_continue.write(|w| unsafe { w.bits(1) }), + ShaMode::SHA256 => self.sha.sha256_continue.write(|w| unsafe { w.bits(1) }), + ShaMode::SHA384 => self.sha.sha384_continue.write(|w| unsafe { w.bits(1) }), + ShaMode::SHA512 => self.sha.sha512_continue.write(|w| unsafe { w.bits(1) }), + } + } + } + + fn chunk_length(&self) -> usize { + return match self.mode { + ShaMode::SHA1 | ShaMode::SHA256 => 64, + #[cfg(not(esp32))] + ShaMode::SHA224 => 64, + _ => 128, + }; + } + + #[cfg(esp32)] + fn is_busy(&self) -> bool { + match self.mode { + ShaMode::SHA1 => self.sha.sha1_busy.read().sha1_busy().bit_is_set(), + ShaMode::SHA256 => self.sha.sha256_busy.read().sha256_busy().bit_is_set(), + ShaMode::SHA384 => self.sha.sha384_busy.read().sha384_busy().bit_is_set(), + ShaMode::SHA512 => self.sha.sha512_busy.read().sha512_busy().bit_is_set(), + } + } + + #[cfg(not(esp32))] + fn is_busy(&self) -> bool { + self.sha.busy.read().bits() != 0 + } + + pub fn digest_length(&self) -> usize { + match self.mode { + ShaMode::SHA1 => 20, + #[cfg(not(esp32))] + ShaMode::SHA224 => 28, + ShaMode::SHA256 => 32, + #[cfg(any(esp32, esp32s2, esp32s3))] + ShaMode::SHA384 => 48, + #[cfg(any(esp32, esp32s2, esp32s3))] + ShaMode::SHA512 => 64, + #[cfg(any(esp32s2, esp32s3))] + ShaMode::SHA512_224 => 28, + #[cfg(any(esp32s2, esp32s3))] + ShaMode::SHA512_256 => 32, + } + } + + #[cfg(not(esp32))] + fn input_ptr(&self) -> *mut u32 { + return self.sha.m_mem[0].as_ptr() as *mut u32; + } + + #[cfg(esp32)] + fn input_ptr(&self) -> *mut u32 { + return self.sha.text[0].as_ptr() as *mut u32; + } + + #[cfg(not(esp32))] + fn output_ptr(&self) -> *const u32 { + return self.sha.h_mem[0].as_ptr() as *const u32; + } + + #[cfg(esp32)] + fn output_ptr(&self) -> *const u32 { + return self.sha.text[0].as_ptr() as *const u32; + } + + fn flush_data(&mut self) -> nb::Result<(), Infallible> { + if self.is_busy() { + return Err(nb::Error::WouldBlock); + } + + unsafe { + let dst_ptr = self + .input_ptr() + .add((self.cursor % self.chunk_length()) / ALIGN_SIZE); + let flushed = self.alignment_helper.flush_to(dst_ptr); + if flushed != 0 { + self.cursor = self.cursor.wrapping_add(ALIGN_SIZE - flushed); + if self.cursor % self.chunk_length() == 0 { + self.process_buffer(); + } + } + } + + Ok(()) + } + + // This function ensures that incoming data is aligned to u32 (due to issues + // with cpy_mem) + fn write_data<'a>(&mut self, incoming: &'a [u8]) -> nb::Result<&'a [u8], Infallible> { + let mod_cursor = self.cursor % self.chunk_length(); + + unsafe { + let ptr = self.input_ptr().add(mod_cursor / ALIGN_SIZE); + let (remaining, bound_reached) = self.alignment_helper.aligned_volatile_copy( + ptr, + incoming, + self.chunk_length() - mod_cursor, + ); + self.cursor = self.cursor.wrapping_add(incoming.len() - remaining.len()); + if bound_reached { + self.process_buffer(); + } + + Ok(remaining) + } + } + + pub fn update<'a>(&mut self, buffer: &'a [u8]) -> nb::Result<&'a [u8], Infallible> { + if self.is_busy() { + return Err(nb::Error::WouldBlock); + } + + self.finished = false; + + let remaining = self.write_data(buffer)?; + + Ok(remaining) + } + + // Finish of the calculation (if not alreaedy) and copy result to output + // After `finish()` is called `update()`s will contribute to a new hash which + // can be calculated again with `finish()`. + // + // Typically output is expected to be the size of digest_length(), but smaller + // inputs can be given to get a "short hash" + pub fn finish(&mut self, output: &mut [u8]) -> nb::Result<(), Infallible> { + // The main purpose of this function is to dynamically generate padding for the + // input. Padding: Append "1" bit, Pad zeros until 512/1024 filled + // then set the message length in the LSB (overwriting the padding) + // If not enough free space for length+1, add length at end of a new zero'd + // block + + if self.is_busy() { + return Err(nb::Error::WouldBlock); + } + + let chunk_len = self.chunk_length(); + + if !self.finished { + // Store message length for padding + let length = self.cursor * 8; + nb::block!(self.update(&[0x80]))?; // Append "1" bit + nb::block!(self.flush_data())?; // Flush partial data, ensures aligned cursor + debug_assert!(self.cursor % 4 == 0); + + let mod_cursor = self.cursor % chunk_len; + if chunk_len - mod_cursor < chunk_len / 8 { + // Zero out remaining data if buffer is almost full (>=448/896), and process + // buffer + let pad_len = chunk_len - mod_cursor; + unsafe { + let m_cursor_ptr = self.input_ptr().add(mod_cursor / ALIGN_SIZE); + self.alignment_helper.volatile_write_bytes( + m_cursor_ptr, + 0, + pad_len / ALIGN_SIZE, + ); + } + self.process_buffer(); + self.cursor = self.cursor.wrapping_add(pad_len); + + // Spin-wait for finish + while self.is_busy() {} + } + + let mod_cursor = self.cursor % chunk_len; // Should be zero if branched above + unsafe { + let m_cursor_ptr = self.input_ptr(); + // Pad zeros + let pad_ptr = m_cursor_ptr.add(mod_cursor / ALIGN_SIZE); + let pad_len = (chunk_len - mod_cursor) - ALIGN_SIZE; + + self.alignment_helper + .volatile_write_bytes(pad_ptr, 0, pad_len / ALIGN_SIZE); + + // Write length (BE) to end + // NOTE: aligned_volatile_copy does not work here + // The decompiler suggest volatile_copy_memory/write_volatile is optimized to a + // simple *v = *pv; While the aligned_volatile_copy makes an + // actual call to memcpy, why this makes a difference when + // memcpy does works in other places, I don't know + let end_ptr = m_cursor_ptr.add((chunk_len / ALIGN_SIZE) - 1); + #[cfg(not(esp32))] + end_ptr.write_volatile(length.to_be() as u32); + #[cfg(esp32)] + end_ptr.write_volatile(length.to_le() as u32); + } + + self.process_buffer(); + // Spin-wait for final buffer to be processed + while self.is_busy() {} + + // ESP32 requires additional load to retrieve output + #[cfg(esp32)] + { + match self.mode { + ShaMode::SHA1 => unsafe { self.sha.sha1_load.write(|w| w.bits(1)) }, + ShaMode::SHA256 => unsafe { self.sha.sha256_load.write(|w| w.bits(1)) }, + ShaMode::SHA384 => unsafe { self.sha.sha384_load.write(|w| w.bits(1)) }, + ShaMode::SHA512 => unsafe { self.sha.sha512_load.write(|w| w.bits(1)) }, + } + + // Spin wait for result, 8-20 clock cycles according to manual + while self.is_busy() {} + } + + self.finished = true; + } + + unsafe { + + let digest_ptr = self.output_ptr(); + let out_ptr = output.as_mut_ptr() as *mut u32; + let digest_out = core::cmp::min(self.digest_length(), output.len()) / ALIGN_SIZE; + for i in 0..digest_out { + #[cfg(not(esp32))] + out_ptr.add(i).write(*digest_ptr.add(i)); + // ESP32 does reversed order + #[cfg(esp32)] + out_ptr.add(i).write((*digest_ptr.add(i)).to_be()); + } + } + + + Ok(()) + } + + pub fn free(self) -> SHA { + self.sha + } +} diff --git a/esp32-hal/Cargo.toml b/esp32-hal/Cargo.toml index 39ce77c8ccb..be12d26932e 100644 --- a/esp32-hal/Cargo.toml +++ b/esp32-hal/Cargo.toml @@ -40,6 +40,7 @@ embassy-executor = { package = "embassy-executor", git = "https://github.com/em embedded-graphics = "0.7.1" esp-backtrace = { version = "0.3.0", features = ["esp32", "panic-handler", "exception-handler", "print-uart"] } esp-println = { version = "0.3.1", features = ["esp32"] } +sha2 = { version = "0.10.6", default-features = false} smart-leds = "0.3.0" ssd1306 = "0.7.1" static_cell = "1.0.0" diff --git a/esp32-hal/examples/sha.rs b/esp32-hal/examples/sha.rs new file mode 100644 index 00000000000..79a1abc9503 --- /dev/null +++ b/esp32-hal/examples/sha.rs @@ -0,0 +1,80 @@ +//! Demonstrates the use of the SHA peripheral and compares the speed of hardware-accelerated and pure software hashing. +//! + +#![no_std] +#![no_main] + +use esp32_hal::{ + clock::ClockControl, + pac::Peripherals, + prelude::*, + timer::TimerGroup, + Rtc, + sha::{Sha, ShaMode}, +}; +use nb::block; +use esp_backtrace as _; +use esp_println::println; +use xtensa_lx_rt::entry; +use sha2::{Sha512, Digest}; + +#[entry] +fn main() -> ! { + let peripherals = Peripherals::take().unwrap(); + let system = peripherals.DPORT.split(); + let clocks = ClockControl::boot_defaults(system.clock_control).freeze(); + + let timer_group0 = TimerGroup::new(peripherals.TIMG0, &clocks); + let mut wdt = timer_group0.wdt; + let mut rtc = Rtc::new(peripherals.RTC_CNTL); + + // Disable MWDT and RWDT (Watchdog) flash boot protection + wdt.disable(); + rtc.rwdt.disable(); + + + let source_data = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".as_bytes(); + let mut remaining = source_data.clone(); + let mut hasher = Sha::new(peripherals.SHA, ShaMode::SHA512); + + // Short hashes can be created by decreasing the output buffer to the desired length + let mut output = [0u8; 64]; + + let pre_calc = xtensa_lx::timer::get_cycle_count(); + // The hardware implementation takes a subslice of the input, and returns the unprocessed parts + // The unprocessed parts can be input in the next iteration, you can always add more data until + // finish() is called. After finish() is called update()'s will contribute to a new hash which + // can be extracted again with finish(). + + while remaining.len() > 0 { + // Can add println to view progress, however println takes a few orders of magnitude longer than + // the Sha function itself so not useful for comparing processing time + // println!("Remaining len: {}", remaining.len()); + + // All the HW Sha functions are infallible so unwrap is fine to use if you use block! + remaining = block!(hasher.update(remaining)).unwrap(); + } + + // Finish can be called as many times as desired to get mutliple copies of the output. + block!(hasher.finish(output.as_mut_slice())).unwrap(); + + let post_calc = xtensa_lx::timer::get_cycle_count(); + let hw_time = post_calc - pre_calc; + println!("Took {} cycles", hw_time); + println!("SHA512 Hash output {:02x?}", output); + let _usha = hasher.free(); + + + let pre_calc = xtensa_lx::timer::get_cycle_count(); + let mut hasher = Sha512::new(); + hasher.update(source_data); + let soft_result = hasher.finalize(); + let post_calc = xtensa_lx::timer::get_cycle_count(); + let soft_time = post_calc - pre_calc; + println!("Took {} cycles", soft_time); + println!("SHA512 Hash output {:02x?}", soft_result); + + println!("HW SHA is {}x faster", soft_time/hw_time); + + loop {} +} diff --git a/esp32-hal/src/lib.rs b/esp32-hal/src/lib.rs index bfba115d858..70be3d5496a 100644 --- a/esp32-hal/src/lib.rs +++ b/esp32-hal/src/lib.rs @@ -29,6 +29,7 @@ pub use esp_hal_common::{ Rtc, Rwdt, Serial, + sha }; pub use self::gpio::IO; diff --git a/esp32c2-hal/Cargo.toml b/esp32c2-hal/Cargo.toml index fb586217cf7..8bac9eb0b95 100644 --- a/esp32c2-hal/Cargo.toml +++ b/esp32c2-hal/Cargo.toml @@ -41,6 +41,7 @@ embassy-executor = { package = "embassy-executor", git = "https://github.com/em embedded-graphics = "0.7.1" esp-backtrace = { version = "0.3.0", features = ["esp32c2", "panic-handler", "exception-handler", "print-uart"] } esp-println = { version = "0.3.1", features = ["esp32c2"] } +sha2 = { version = "0.10.6", default-features = false} ssd1306 = "0.7.1" static_cell = "1.0.0" diff --git a/esp32c2-hal/examples/sha.rs b/esp32c2-hal/examples/sha.rs new file mode 100644 index 00000000000..07234e64cd5 --- /dev/null +++ b/esp32c2-hal/examples/sha.rs @@ -0,0 +1,79 @@ +//! Demonstrates the use of the SHA peripheral and compares the speed of hardware-accelerated and pure software hashing. +//! + +#![no_std] +#![no_main] + +use esp32c2_hal::{ + clock::ClockControl, + pac::Peripherals, + prelude::*, + timer::TimerGroup, + Rtc, + sha::{Sha, ShaMode}, +}; +use nb::block; +use esp_backtrace as _; +use esp_println::println; +use riscv_rt::entry; +use sha2::{Sha256, Digest}; + +#[entry] +fn main() -> ! { + let peripherals = Peripherals::take().unwrap(); + let system = peripherals.SYSTEM.split(); + let clocks = ClockControl::boot_defaults(system.clock_control).freeze(); + + let timer_group0 = TimerGroup::new(peripherals.TIMG0, &clocks); + let mut wdt = timer_group0.wdt; + let mut rtc = Rtc::new(peripherals.RTC_CNTL); + + // Disable MWDT and RWDT (Watchdog) flash boot protection + wdt.disable(); + rtc.rwdt.disable(); + + + let source_data = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".as_bytes(); + let mut remaining = source_data.clone(); + let mut hasher = Sha::new(peripherals.SHA, ShaMode::SHA256); + + // Short hashes can be created by decreasing the output buffer to the desired length + let mut output = [0u8; 32]; + + //let pre_calc = xtensa_lx::timer::get_cycle_count(); + // The hardware implementation takes a subslice of the input, and returns the unprocessed parts + // The unprocessed parts can be input in the next iteration, you can always add more data until + // finish() is called. After finish() is called update()'s will contribute to a new hash which + // can be extracted again with finish(). + + while remaining.len() > 0 { + // Can add println to view progress, however println takes a few orders of magnitude longer than + // the Sha function itself so not useful for comparing processing time + // println!("Remaining len: {}", remaining.len()); + + // All the HW Sha functions are infallible so unwrap is fine to use if you use block! + remaining = block!(hasher.update(remaining)).unwrap(); + } + + // Finish can be called as many times as desired to get mutliple copies of the output. + block!(hasher.finish(output.as_mut_slice())).unwrap(); + //let post_calc = xtensa_lx::timer::get_cycle_count(); + //let hw_time = post_calc - pre_calc; + //println!("Took {} cycles", hw_time); + println!("SHA256 Hash output {:02x?}", output); + let _usha = hasher.free(); + + + //let pre_calc = xtensa_lx::timer::get_cycle_count(); + let mut hasher = Sha256::new(); + hasher.update(source_data); + let soft_result = hasher.finalize(); + //let post_calc = xtensa_lx::timer::get_cycle_count(); + //let soft_time = post_calc - pre_calc; + //println!("Took {} cycles", soft_time); + println!("SHA256 Hash output {:02x?}", soft_result); + + //println!("HW SHA is {}x faster", soft_time/hw_time); + + loop {} +} diff --git a/esp32c2-hal/src/lib.rs b/esp32c2-hal/src/lib.rs index 3c4aca22530..38321d7fd39 100644 --- a/esp32c2-hal/src/lib.rs +++ b/esp32c2-hal/src/lib.rs @@ -28,6 +28,7 @@ pub use esp_hal_common::{ Rtc, Rwdt, Serial, + sha, }; pub use self::gpio::IO; diff --git a/esp32c3-hal/Cargo.toml b/esp32c3-hal/Cargo.toml index adb30430577..6119c37811b 100644 --- a/esp32c3-hal/Cargo.toml +++ b/esp32c3-hal/Cargo.toml @@ -42,6 +42,7 @@ embassy-executor = { package = "embassy-executor", git = "https://github.com/em embedded-graphics = "0.7.1" esp-backtrace = { version = "0.3.0", features = ["esp32c3", "panic-handler", "exception-handler", "print-uart"] } esp-println = { version = "0.3.1", features = ["esp32c3"] } +sha2 = { version = "0.10.6", default-features = false} smart-leds = "0.3.0" ssd1306 = "0.7.1" static_cell = "1.0.0" diff --git a/esp32c3-hal/examples/sha.rs b/esp32c3-hal/examples/sha.rs new file mode 100644 index 00000000000..27eca891b85 --- /dev/null +++ b/esp32c3-hal/examples/sha.rs @@ -0,0 +1,79 @@ +//! Demonstrates the use of the SHA peripheral and compares the speed of hardware-accelerated and pure software hashing. +//! + +#![no_std] +#![no_main] + +use esp32c3_hal::{ + clock::ClockControl, + pac::Peripherals, + prelude::*, + timer::TimerGroup, + Rtc, + sha::{Sha, ShaMode}, +}; +use nb::block; +use esp_backtrace as _; +use esp_println::println; +use riscv_rt::entry; +use sha2::{Sha256, Digest}; + +#[entry] +fn main() -> ! { + let peripherals = Peripherals::take().unwrap(); + let system = peripherals.SYSTEM.split(); + let clocks = ClockControl::boot_defaults(system.clock_control).freeze(); + + let timer_group0 = TimerGroup::new(peripherals.TIMG0, &clocks); + let mut wdt = timer_group0.wdt; + let mut rtc = Rtc::new(peripherals.RTC_CNTL); + + // Disable MWDT and RWDT (Watchdog) flash boot protection + wdt.disable(); + rtc.rwdt.disable(); + + + let source_data = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".as_bytes(); + let mut remaining = source_data.clone(); + let mut hasher = Sha::new(peripherals.SHA, ShaMode::SHA256); + + // Short hashes can be created by decreasing the output buffer to the desired length + let mut output = [0u8; 32]; + + //let pre_calc = xtensa_lx::timer::get_cycle_count(); + // The hardware implementation takes a subslice of the input, and returns the unprocessed parts + // The unprocessed parts can be input in the next iteration, you can always add more data until + // finish() is called. After finish() is called update()'s will contribute to a new hash which + // can be extracted again with finish(). + + while remaining.len() > 0 { + // Can add println to view progress, however println takes a few orders of magnitude longer than + // the Sha function itself so not useful for comparing processing time + // println!("Remaining len: {}", remaining.len()); + + // All the HW Sha functions are infallible so unwrap is fine to use if you use block! + remaining = block!(hasher.update(remaining)).unwrap(); + } + + // Finish can be called as many times as desired to get mutliple copies of the output. + block!(hasher.finish(output.as_mut_slice())).unwrap(); + //let post_calc = xtensa_lx::timer::get_cycle_count(); + //let hw_time = post_calc - pre_calc; + //println!("Took {} cycles", hw_time); + println!("SHA256 Hash output {:02x?}", output); + let _usha = hasher.free(); + + + //let pre_calc = xtensa_lx::timer::get_cycle_count(); + let mut hasher = Sha256::new(); + hasher.update(source_data); + let soft_result = hasher.finalize(); + //let post_calc = xtensa_lx::timer::get_cycle_count(); + //let soft_time = post_calc - pre_calc; + //println!("Took {} cycles", soft_time); + println!("SHA256 Hash output {:02x?}", soft_result); + + //println!("HW SHA is {}x faster", soft_time/hw_time); + + loop {} +} diff --git a/esp32c3-hal/src/lib.rs b/esp32c3-hal/src/lib.rs index 7d9921d3b57..cf7f7fbc169 100644 --- a/esp32c3-hal/src/lib.rs +++ b/esp32c3-hal/src/lib.rs @@ -34,6 +34,7 @@ pub use esp_hal_common::{ Rwdt, Serial, UsbSerialJtag, + sha }; #[cfg(feature = "embassy")] diff --git a/esp32s2-hal/Cargo.toml b/esp32s2-hal/Cargo.toml index 2abc2ac4d89..f9287125187 100644 --- a/esp32s2-hal/Cargo.toml +++ b/esp32s2-hal/Cargo.toml @@ -41,6 +41,7 @@ embassy-executor = { package = "embassy-executor", git = "https://github.com/em embedded-graphics = "0.7.1" esp-backtrace = { version = "0.3.0", features = ["esp32s2", "panic-handler", "print-uart"] } esp-println = { version = "0.3.1", features = ["esp32s2"] } +sha2 = { version = "0.10.6", default-features = false} smart-leds = "0.3.0" ssd1306 = "0.7.1" usb-device = { version = "0.2.9" } diff --git a/esp32s2-hal/examples/sha.rs b/esp32s2-hal/examples/sha.rs new file mode 100644 index 00000000000..1e16b0f4842 --- /dev/null +++ b/esp32s2-hal/examples/sha.rs @@ -0,0 +1,79 @@ +//! Demonstrates the use of the SHA peripheral and compares the speed of hardware-accelerated and pure software hashing. +//! + +#![no_std] +#![no_main] + +use esp32s2_hal::{ + clock::ClockControl, + pac::Peripherals, + prelude::*, + timer::TimerGroup, + Rtc, + sha::{Sha, ShaMode}, +}; +use nb::block; +use esp_backtrace as _; +use esp_println::println; +use xtensa_lx_rt::entry; +use sha2::{Sha512, Digest}; + +#[entry] +fn main() -> ! { + let peripherals = Peripherals::take().unwrap(); + let system = peripherals.SYSTEM.split(); + let clocks = ClockControl::boot_defaults(system.clock_control).freeze(); + + let timer_group0 = TimerGroup::new(peripherals.TIMG0, &clocks); + let mut wdt = timer_group0.wdt; + let mut rtc = Rtc::new(peripherals.RTC_CNTL); + + // Disable MWDT and RWDT (Watchdog) flash boot protection + wdt.disable(); + rtc.rwdt.disable(); + + + let source_data = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".as_bytes(); + let mut remaining = source_data.clone(); + let mut hasher = Sha::new(peripherals.SHA, ShaMode::SHA512); + + // Short hashes can be created by decreasing the output buffer to the desired length + let mut output = [0u8; 64]; + + let pre_calc = xtensa_lx::timer::get_cycle_count(); + // The hardware implementation takes a subslice of the input, and returns the unprocessed parts + // The unprocessed parts can be input in the next iteration, you can always add more data until + // finish() is called. After finish() is called update()'s will contribute to a new hash which + // can be extracted again with finish(). + + while remaining.len() > 0 { + // Can add println to view progress, however println takes a few orders of magnitude longer than + // the Sha function itself so not useful for comparing processing time + // println!("Remaining len: {}", remaining.len()); + + // All the HW Sha functions are infallible so unwrap is fine to use if you use block! + remaining = block!(hasher.update(remaining)).unwrap(); + } + + // Finish can be called as many times as desired to get mutliple copies of the output. + block!(hasher.finish(output.as_mut_slice())).unwrap(); + let post_calc = xtensa_lx::timer::get_cycle_count(); + let hw_time = post_calc - pre_calc; + println!("Took {} cycles", hw_time); + println!("SHA512 Hash output {:02x?}", output); + let _usha = hasher.free(); + + + let pre_calc = xtensa_lx::timer::get_cycle_count(); + let mut hasher = Sha512::new(); + hasher.update(source_data); + let soft_result = hasher.finalize(); + let post_calc = xtensa_lx::timer::get_cycle_count(); + let soft_time = post_calc - pre_calc; + println!("Took {} cycles", soft_time); + println!("SHA512 Hash output {:02x?}", soft_result); + + println!("HW SHA is {}x faster", soft_time/hw_time); + + loop {} +} diff --git a/esp32s2-hal/src/lib.rs b/esp32s2-hal/src/lib.rs index b109e6d5077..e806c15fd1a 100644 --- a/esp32s2-hal/src/lib.rs +++ b/esp32s2-hal/src/lib.rs @@ -30,6 +30,7 @@ pub use esp_hal_common::{ Rtc, Rwdt, Serial, + sha }; #[cfg(feature = "embassy")] diff --git a/esp32s3-hal/Cargo.toml b/esp32s3-hal/Cargo.toml index 63bcc433726..c5439e216bf 100644 --- a/esp32s3-hal/Cargo.toml +++ b/esp32s3-hal/Cargo.toml @@ -42,6 +42,7 @@ embassy-executor = { package = "embassy-executor", git = "https://github.com/em embedded-graphics = "0.7.1" esp-backtrace = { version = "0.3.0", features = ["esp32s3", "panic-handler", "exception-handler", "print-uart"] } esp-println = { version = "0.3.1", features = ["esp32s3"] } +sha2 = { version = "0.10.6", default-features = false} smart-leds = "0.3.0" ssd1306 = "0.7.1" usb-device = { version = "0.2.9" } diff --git a/esp32s3-hal/examples/sha.rs b/esp32s3-hal/examples/sha.rs new file mode 100644 index 00000000000..b47b11f515b --- /dev/null +++ b/esp32s3-hal/examples/sha.rs @@ -0,0 +1,79 @@ +//! Demonstrates the use of the SHA peripheral and compares the speed of hardware-accelerated and pure software hashing. +//! + +#![no_std] +#![no_main] + +use esp32s3_hal::{ + clock::ClockControl, + pac::Peripherals, + prelude::*, + timer::TimerGroup, + Rtc, + sha::{Sha, ShaMode}, +}; +use nb::block; +use esp_backtrace as _; +use esp_println::println; +use xtensa_lx_rt::entry; +use sha2::{Sha512, Digest}; + +#[entry] +fn main() -> ! { + let peripherals = Peripherals::take().unwrap(); + let system = peripherals.SYSTEM.split(); + let clocks = ClockControl::boot_defaults(system.clock_control).freeze(); + + let timer_group0 = TimerGroup::new(peripherals.TIMG0, &clocks); + let mut wdt = timer_group0.wdt; + let mut rtc = Rtc::new(peripherals.RTC_CNTL); + + // Disable MWDT and RWDT (Watchdog) flash boot protection + wdt.disable(); + rtc.rwdt.disable(); + + + let source_data = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa".as_bytes(); + let mut remaining = source_data.clone(); + let mut hasher = Sha::new(peripherals.SHA, ShaMode::SHA512); + + // Short hashes can be created by decreasing the output buffer to the desired length + let mut output = [0u8; 64]; + + let pre_calc = xtensa_lx::timer::get_cycle_count(); + // The hardware implementation takes a subslice of the input, and returns the unprocessed parts + // The unprocessed parts can be input in the next iteration, you can always add more data until + // finish() is called. After finish() is called update()'s will contribute to a new hash which + // can be extracted again with finish(). + + while remaining.len() > 0 { + // Can add println to view progress, however println takes a few orders of magnitude longer than + // the Sha function itself so not useful for comparing processing time + // println!("Remaining len: {}", remaining.len()); + + // All the HW Sha functions are infallible so unwrap is fine to use if you use block! + remaining = block!(hasher.update(remaining)).unwrap(); + } + + // Finish can be called as many times as desired to get mutliple copies of the output. + block!(hasher.finish(output.as_mut_slice())).unwrap(); + let post_calc = xtensa_lx::timer::get_cycle_count(); + let hw_time = post_calc - pre_calc; + println!("Took {} cycles", hw_time); + println!("SHA512 Hash output {:02x?}", output); + let _usha = hasher.free(); + + + let pre_calc = xtensa_lx::timer::get_cycle_count(); + let mut hasher = Sha512::new(); + hasher.update(source_data); + let soft_result = hasher.finalize(); + let post_calc = xtensa_lx::timer::get_cycle_count(); + let soft_time = post_calc - pre_calc; + println!("Took {} cycles", soft_time); + println!("SHA512 Hash output {:02x?}", soft_result); + + println!("HW SHA is {}x faster", soft_time/hw_time); + + loop {} +} diff --git a/esp32s3-hal/src/lib.rs b/esp32s3-hal/src/lib.rs index 94389bf4304..fd161de7632 100644 --- a/esp32s3-hal/src/lib.rs +++ b/esp32s3-hal/src/lib.rs @@ -33,6 +33,7 @@ pub use esp_hal_common::{ Rwdt, Serial, UsbSerialJtag, + sha }; #[cfg(feature = "embassy")]