From e1c2d30d449555ff0e6b1a2e64b2c194eab9b382 Mon Sep 17 00:00:00 2001 From: overlookmotel <557937+overlookmotel@users.noreply.github.com> Date: Sat, 19 Oct 2024 15:43:54 +0000 Subject: [PATCH] fix(allocator)!: make `Vec` non-drop (#6623) `oxc_allocator::Vec` is intended for storing AST types in the arena. `Vec` is intended to be non-drop, because all AST types are non-drop. If they were not, it would be a memory leak, because those types will not have `drop` called on them when the allocator is dropped. However, `oxc_allocator::Vec` is currently a wrapper around `allocator_api2::vec::Vec`, which *is* drop. That unintentionally makes `oxc_allocator::Vec` drop too. This PR fixes this by wrapping the inner `allocator_api2::vec::Vec` in `ManuallyDrop`. This makes `oxc_allocator::Vec` non-drop. The wider consequence of this change is that the compiler is now able to see that loads of other types which contain `oxc_allocator::Vec` are also non-drop. Once it can prove that, it can remove a load of code which handles dropping these types in the event of a panic. This probably also then allows it to make many further optimizations on that simplified code. Strictly speaking, this PR is a breaking change. If `oxc_allocator::Vec` is abused to store drop types, then in some circumstances this change could produce a memory leak where there was none before. However, we've always been clear that only non-drop types should be stored in the arena, so such usage was always a bug. #6622 fixes the only place in Oxc where we mistakenly stored non-drop types in `Vec`. The change to `oxc_prettier` is because compiler can now deduce that `Doc` is non-drop, which causes clippy to raise a warning about using `then` instead of `then_some`. As follow-up, we should: 1. Wrap other `allocator_api2` types (e.g. `IntoIter`) in `ManuallyDrop` too, so compiler can prove they are non-drop too (or reimplement `Vec` ourselves - #6488). 2. Introduce static checks to prevent non-drop types being used in `Box` and `Vec`, to make memory leaks impossible, and detect them at compile time. --- crates/oxc_allocator/src/vec.rs | 43 ++++++++++++++++++++++++--------- crates/oxc_prettier/src/lib.rs | 2 +- 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/crates/oxc_allocator/src/vec.rs b/crates/oxc_allocator/src/vec.rs index 653fc0e928cd5..273120f69e66c 100644 --- a/crates/oxc_allocator/src/vec.rs +++ b/crates/oxc_allocator/src/vec.rs @@ -4,8 +4,9 @@ use std::{ self, - fmt::Debug, + fmt::{self, Debug}, hash::{Hash, Hasher}, + mem::ManuallyDrop, ops, ptr::NonNull, }; @@ -17,9 +18,18 @@ use serde::{ser::SerializeSeq, Serialize, Serializer}; use crate::{Allocator, Box}; -/// Bumpalo Vec -#[derive(Debug, PartialEq, Eq)] -pub struct Vec<'alloc, T>(vec::Vec); +/// A `Vec` without [`Drop`], which stores its data in the arena allocator. +/// +/// Should only be used for storing AST types. +/// +/// Must NOT be used to store types which have a [`Drop`] implementation. +/// `T::drop` will NOT be called on the `Vec`'s contents when the `Vec` is dropped. +/// If `T` owns memory outside of the arena, this will be a memory leak. +/// +/// Note: This is not a soundness issue, as Rust does not support relying on `drop` +/// being called to guarantee soundness. +#[derive(PartialEq, Eq)] +pub struct Vec<'alloc, T>(ManuallyDrop>); impl<'alloc, T> Vec<'alloc, T> { /// Constructs a new, empty `Vec`. @@ -38,7 +48,7 @@ impl<'alloc, T> Vec<'alloc, T> { /// ``` #[inline] pub fn new_in(allocator: &'alloc Allocator) -> Self { - Self(vec::Vec::new_in(allocator)) + Self(ManuallyDrop::new(vec::Vec::new_in(allocator))) } /// Constructs a new, empty `Vec` with at least the specified capacity @@ -90,7 +100,7 @@ impl<'alloc, T> Vec<'alloc, T> { /// ``` #[inline] pub fn with_capacity_in(capacity: usize, allocator: &'alloc Allocator) -> Self { - Self(vec::Vec::with_capacity_in(capacity, allocator)) + Self(ManuallyDrop::new(vec::Vec::with_capacity_in(capacity, allocator))) } /// Create a new [`Vec`] whose elements are taken from an iterator and @@ -102,7 +112,7 @@ impl<'alloc, T> Vec<'alloc, T> { let iter = iter.into_iter(); let hint = iter.size_hint(); let capacity = hint.1.unwrap_or(hint.0); - let mut vec = vec::Vec::with_capacity_in(capacity, &**allocator); + let mut vec = ManuallyDrop::new(vec::Vec::with_capacity_in(capacity, &**allocator)); vec.extend(iter); Self(vec) } @@ -128,7 +138,8 @@ impl<'alloc, T> Vec<'alloc, T> { /// /// [owned slice]: Box pub fn into_boxed_slice(self) -> Box<'alloc, [T]> { - let slice = self.0.leak(); + let inner = ManuallyDrop::into_inner(self.0); + let slice = inner.leak(); let ptr = NonNull::from(slice); // SAFETY: `ptr` points to a valid slice `[T]`. // `allocator_api2::vec::Vec::leak` consumes the inner `Vec` without dropping it. @@ -160,7 +171,10 @@ impl<'alloc, T> IntoIterator for Vec<'alloc, T> { type Item = T; fn into_iter(self) -> Self::IntoIter { - self.0.into_iter() + let inner = ManuallyDrop::into_inner(self.0); + // TODO: `allocator_api2::vec::Vec::IntoIter` is `Drop`. + // Wrap it in `ManuallyDrop` to prevent that. + inner.into_iter() } } @@ -198,7 +212,7 @@ where S: Serializer, { let mut seq = s.serialize_seq(Some(self.0.len()))?; - for e in &self.0 { + for e in self.0.iter() { seq.serialize_element(e)?; } seq.end() @@ -207,12 +221,19 @@ where impl<'alloc, T: Hash> Hash for Vec<'alloc, T> { fn hash(&self, state: &mut H) { - for e in &self.0 { + for e in self.0.iter() { e.hash(state); } } } +impl<'alloc, T: Debug> Debug for Vec<'alloc, T> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let inner = &*self.0; + f.debug_tuple("Vec").field(inner).finish() + } +} + #[cfg(test)] mod test { use super::Vec; diff --git a/crates/oxc_prettier/src/lib.rs b/crates/oxc_prettier/src/lib.rs index 74d093c221c2e..af18238df132a 100644 --- a/crates/oxc_prettier/src/lib.rs +++ b/crates/oxc_prettier/src/lib.rs @@ -130,7 +130,7 @@ impl<'a> Prettier<'a> { } pub fn semi(&self) -> Option> { - self.options.semi.then(|| Doc::Str(";")) + self.options.semi.then_some(Doc::Str(";")) } pub fn should_print_es5_comma(&self) -> bool {