Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
cf2d0ce
diagnostics: suggest changing `s@self::{macro}@::macro` for exported
notriddle Nov 13, 2022
98091c0
diagnostics: add `};` only if `{` was added too
notriddle Nov 13, 2022
dbc0ed2
Unify stable and unstable sort implementations in same core module
Voultapher Nov 20, 2022
1ec59cd
Remove debug unused
Voultapher Nov 21, 2022
4b5844f
Document all unsafe blocks
Voultapher Nov 21, 2022
04af19f
diagnostics: use `module_path` to check crate import instead of strings
notriddle Dec 6, 2022
f2070c3
diagnostics: remvoe unnecessary use of `source_map.start_point`
notriddle Dec 6, 2022
280f69d
Fix IndexVec::drain_enumerated
compiler-errors Jan 19, 2023
c9c8e29
HACK: self ty ambiguity hack
compiler-errors Jan 19, 2023
ac4956b
Support `.comment` section like GCC/Clang (`!llvm.ident`)
ojeda May 28, 2022
aee75f2
Assert goal is fully normalized during assemble
compiler-errors Jan 19, 2023
f53f5b4
swap Ambiguity and Unimplemented in new trait engine
compiler-errors Jan 19, 2023
69890b2
trait solver: PointerSized
compiler-errors Jan 18, 2023
ed6aebb
trait solver: Implement Fn traits and tuple trait
compiler-errors Jan 19, 2023
05889fc
rustdoc: remove redundant CSS selector `.sidebar .current`
notriddle Jan 19, 2023
734f375
Change `bindings_with_variant_name` to deny-by-default
timrobertsdev Nov 8, 2022
1cbce72
Add `compile_fail` to doctest for `bindings_with_variant_name`
timrobertsdev Nov 8, 2022
1adb4d6
Fix typo in opaque_types.rs
eltociear Jan 20, 2023
a641b92
remove leading comma when there are no args in check macro expansion
DebugSteven Jan 19, 2023
c3e30f1
Rollup merge of #97550 - ojeda:comment-section, r=bjorn3
compiler-errors Jan 20, 2023
bb5f43e
Rollup merge of #104154 - timrobertsdev:deny-by-default-bindings_with…
compiler-errors Jan 20, 2023
993932b
Rollup merge of #104347 - notriddle:notriddle/import-macro-from-self-…
compiler-errors Jan 20, 2023
4b7bec8
Rollup merge of #104672 - Voultapher:unify-sort-modules, r=thomcc
compiler-errors Jan 20, 2023
8a66e39
Rollup merge of #107061 - compiler-errors:new-solver-new-candidates-3…
compiler-errors Jan 20, 2023
7c5f572
Rollup merge of #107095 - notriddle:notriddle/sidebar-current, r=Guil…
compiler-errors Jan 20, 2023
644e0cc
Rollup merge of #107112 - eltociear:patch-19, r=albertlarsan68
compiler-errors Jan 20, 2023
222510e
Rollup merge of #107124 - DebugSteven:check-macro-expansion, r=albert…
compiler-errors Jan 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Unify stable and unstable sort implementations in same core module
This moves the stable sort implementation to the core::slice::sort module. By
virtue of being in core it can't access `Vec`. The two `Vec` used by merge sort,
`buf` and `runs`, are modelled as custom types that implement the very limited
required `Vec` interface with the help of provided allocation and free
functions. This is done to allow future re-use of functions and logic between
stable and unstable sort. Such as `insert_head`.
  • Loading branch information
Voultapher committed Nov 20, 2022
commit dbc0ed2a109a3409bd21529ca8375a43d5580739
348 changes: 39 additions & 309 deletions library/alloc/src/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ use core::cmp::Ordering::{self, Less};
use core::mem::{self, SizedTypeProperties};
#[cfg(not(no_global_oom_handling))]
use core::ptr;
#[cfg(not(no_global_oom_handling))]
use core::slice::sort;

use crate::alloc::Allocator;
#[cfg(not(no_global_oom_handling))]
use crate::alloc::Global;
use crate::alloc::{self, Global};
#[cfg(not(no_global_oom_handling))]
use crate::borrow::ToOwned;
use crate::boxed::Box;
Expand Down Expand Up @@ -203,7 +205,7 @@ impl<T> [T] {
where
T: Ord,
{
merge_sort(self, T::lt);
stable_sort(self, T::lt);
}

/// Sorts the slice with a comparator function.
Expand Down Expand Up @@ -259,7 +261,7 @@ impl<T> [T] {
where
F: FnMut(&T, &T) -> Ordering,
{
merge_sort(self, |a, b| compare(a, b) == Less);
stable_sort(self, |a, b| compare(a, b) == Less);
}

/// Sorts the slice with a key extraction function.
Expand Down Expand Up @@ -302,7 +304,7 @@ impl<T> [T] {
F: FnMut(&T) -> K,
K: Ord,
{
merge_sort(self, |a, b| f(a).lt(&f(b)));
stable_sort(self, |a, b| f(a).lt(&f(b)));
}

/// Sorts the slice with a key extraction function.
Expand Down Expand Up @@ -809,324 +811,52 @@ impl<T: Clone> ToOwned for [T] {
// Sorting
////////////////////////////////////////////////////////////////////////////////

/// Inserts `v[0]` into pre-sorted sequence `v[1..]` so that whole `v[..]` becomes sorted.
///
/// This is the integral subroutine of insertion sort.
#[cfg(not(no_global_oom_handling))]
fn insert_head<T, F>(v: &mut [T], is_less: &mut F)
where
F: FnMut(&T, &T) -> bool,
{
if v.len() >= 2 && is_less(&v[1], &v[0]) {
unsafe {
// There are three ways to implement insertion here:
//
// 1. Swap adjacent elements until the first one gets to its final destination.
// However, this way we copy data around more than is necessary. If elements are big
// structures (costly to copy), this method will be slow.
//
// 2. Iterate until the right place for the first element is found. Then shift the
// elements succeeding it to make room for it and finally place it into the
// remaining hole. This is a good method.
//
// 3. Copy the first element into a temporary variable. Iterate until the right place
// for it is found. As we go along, copy every traversed element into the slot
// preceding it. Finally, copy data from the temporary variable into the remaining
// hole. This method is very good. Benchmarks demonstrated slightly better
// performance than with the 2nd method.
//
// All methods were benchmarked, and the 3rd showed best results. So we chose that one.
let tmp = mem::ManuallyDrop::new(ptr::read(&v[0]));

// Intermediate state of the insertion process is always tracked by `hole`, which
// serves two purposes:
// 1. Protects integrity of `v` from panics in `is_less`.
// 2. Fills the remaining hole in `v` in the end.
//
// Panic safety:
//
// If `is_less` panics at any point during the process, `hole` will get dropped and
// fill the hole in `v` with `tmp`, thus ensuring that `v` still holds every object it
// initially held exactly once.
let mut hole = InsertionHole { src: &*tmp, dest: &mut v[1] };
ptr::copy_nonoverlapping(&v[1], &mut v[0], 1);

for i in 2..v.len() {
if !is_less(&v[i], &*tmp) {
break;
}
ptr::copy_nonoverlapping(&v[i], &mut v[i - 1], 1);
hole.dest = &mut v[i];
}
// `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
}
}

// When dropped, copies from `src` into `dest`.
struct InsertionHole<T> {
src: *const T,
dest: *mut T,
}

impl<T> Drop for InsertionHole<T> {
fn drop(&mut self) {
unsafe {
ptr::copy_nonoverlapping(self.src, self.dest, 1);
}
}
}
}

/// Merges non-decreasing runs `v[..mid]` and `v[mid..]` using `buf` as temporary storage, and
/// stores the result into `v[..]`.
///
/// # Safety
///
/// The two slices must be non-empty and `mid` must be in bounds. Buffer `buf` must be long enough
/// to hold a copy of the shorter slice. Also, `T` must not be a zero-sized type.
#[cfg(not(no_global_oom_handling))]
unsafe fn merge<T, F>(v: &mut [T], mid: usize, buf: *mut T, is_less: &mut F)
where
F: FnMut(&T, &T) -> bool,
{
let len = v.len();
let v = v.as_mut_ptr();
let (v_mid, v_end) = unsafe { (v.add(mid), v.add(len)) };

// The merge process first copies the shorter run into `buf`. Then it traces the newly copied
// run and the longer run forwards (or backwards), comparing their next unconsumed elements and
// copying the lesser (or greater) one into `v`.
//
// As soon as the shorter run is fully consumed, the process is done. If the longer run gets
// consumed first, then we must copy whatever is left of the shorter run into the remaining
// hole in `v`.
//
// Intermediate state of the process is always tracked by `hole`, which serves two purposes:
// 1. Protects integrity of `v` from panics in `is_less`.
// 2. Fills the remaining hole in `v` if the longer run gets consumed first.
//
// Panic safety:
//
// If `is_less` panics at any point during the process, `hole` will get dropped and fill the
// hole in `v` with the unconsumed range in `buf`, thus ensuring that `v` still holds every
// object it initially held exactly once.
let mut hole;

if mid <= len - mid {
// The left run is shorter.
unsafe {
ptr::copy_nonoverlapping(v, buf, mid);
hole = MergeHole { start: buf, end: buf.add(mid), dest: v };
}

// Initially, these pointers point to the beginnings of their arrays.
let left = &mut hole.start;
let mut right = v_mid;
let out = &mut hole.dest;

while *left < hole.end && right < v_end {
// Consume the lesser side.
// If equal, prefer the left run to maintain stability.
unsafe {
let to_copy = if is_less(&*right, &**left) {
get_and_increment(&mut right)
} else {
get_and_increment(left)
};
ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
}
}
} else {
// The right run is shorter.
unsafe {
ptr::copy_nonoverlapping(v_mid, buf, len - mid);
hole = MergeHole { start: buf, end: buf.add(len - mid), dest: v_mid };
}

// Initially, these pointers point past the ends of their arrays.
let left = &mut hole.dest;
let right = &mut hole.end;
let mut out = v_end;

while v < *left && buf < *right {
// Consume the greater side.
// If equal, prefer the right run to maintain stability.
unsafe {
let to_copy = if is_less(&*right.sub(1), &*left.sub(1)) {
decrement_and_get(left)
} else {
decrement_and_get(right)
};
ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
}
}
}
// Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
// it will now be copied into the hole in `v`.

unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
let old = *ptr;
*ptr = unsafe { ptr.add(1) };
old
}

unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
*ptr = unsafe { ptr.sub(1) };
*ptr
}

// When dropped, copies the range `start..end` into `dest..`.
struct MergeHole<T> {
start: *mut T,
end: *mut T,
dest: *mut T,
}

impl<T> Drop for MergeHole<T> {
fn drop(&mut self) {
// `T` is not a zero-sized type, and these are pointers into a slice's elements.
unsafe {
let len = self.end.sub_ptr(self.start);
ptr::copy_nonoverlapping(self.start, self.dest, len);
}
}
}
}

/// This merge sort borrows some (but not all) ideas from TimSort, which is described in detail
/// [here](https://github.com/python/cpython/blob/main/Objects/listsort.txt).
///
/// The algorithm identifies strictly descending and non-descending subsequences, which are called
/// natural runs. There is a stack of pending runs yet to be merged. Each newly found run is pushed
/// onto the stack, and then some pairs of adjacent runs are merged until these two invariants are
/// satisfied:
///
/// 1. for every `i` in `1..runs.len()`: `runs[i - 1].len > runs[i].len`
/// 2. for every `i` in `2..runs.len()`: `runs[i - 2].len > runs[i - 1].len + runs[i].len`
///
/// The invariants ensure that the total running time is *O*(*n* \* log(*n*)) worst-case.
#[inline]
#[cfg(not(no_global_oom_handling))]
fn merge_sort<T, F>(v: &mut [T], mut is_less: F)
fn stable_sort<T, F>(v: &mut [T], mut is_less: F)
where
F: FnMut(&T, &T) -> bool,
{
// Slices of up to this length get sorted using insertion sort.
const MAX_INSERTION: usize = 20;
// Very short runs are extended using insertion sort to span at least this many elements.
const MIN_RUN: usize = 10;

// Sorting has no meaningful behavior on zero-sized types.
if T::IS_ZST {
// Sorting has no meaningful behavior on zero-sized types. Do nothing.
return;
}

let len = v.len();

// Short arrays get sorted in-place via insertion sort to avoid allocations.
if len <= MAX_INSERTION {
if len >= 2 {
for i in (0..len - 1).rev() {
insert_head(&mut v[i..], &mut is_less);
}
}
return;
}

// Allocate a buffer to use as scratch memory. We keep the length 0 so we can keep in it
// shallow copies of the contents of `v` without risking the dtors running on copies if
// `is_less` panics. When merging two sorted runs, this buffer holds a copy of the shorter run,
// which will always have length at most `len / 2`.
let mut buf = Vec::with_capacity(len / 2);
let elem_alloc_fn = |len: usize| -> *mut T {
// SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
// v.len(). Alloc in general will only be used as 'shadow-region' to store temporary swap
// elements.
unsafe { alloc::alloc(alloc::Layout::array::<T>(len).unwrap_unchecked()) as *mut T }
};

// In order to identify natural runs in `v`, we traverse it backwards. That might seem like a
// strange decision, but consider the fact that merges more often go in the opposite direction
// (forwards). According to benchmarks, merging forwards is slightly faster than merging
// backwards. To conclude, identifying runs by traversing backwards improves performance.
let mut runs = vec![];
let mut end = len;
while end > 0 {
// Find the next natural run, and reverse it if it's strictly descending.
let mut start = end - 1;
if start > 0 {
start -= 1;
unsafe {
if is_less(v.get_unchecked(start + 1), v.get_unchecked(start)) {
while start > 0 && is_less(v.get_unchecked(start), v.get_unchecked(start - 1)) {
start -= 1;
}
v[start..end].reverse();
} else {
while start > 0 && !is_less(v.get_unchecked(start), v.get_unchecked(start - 1))
{
start -= 1;
}
}
}
}

// Insert some more elements into the run if it's too short. Insertion sort is faster than
// merge sort on short sequences, so this significantly improves performance.
while start > 0 && end - start < MIN_RUN {
start -= 1;
insert_head(&mut v[start..end], &mut is_less);
let elem_dealloc_fn = |buf_ptr: *mut T, len: usize| {
// SAFETY: Creating the layout is safe as long as merge_sort never calls this with len >
// v.len(). The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
// len.
unsafe {
alloc::dealloc(buf_ptr as *mut u8, alloc::Layout::array::<T>(len).unwrap_unchecked());
}
};

// Push this run onto the stack.
runs.push(Run { start, len: end - start });
end = start;

// Merge some pairs of adjacent runs to satisfy the invariants.
while let Some(r) = collapse(&runs) {
let left = runs[r + 1];
let right = runs[r];
unsafe {
merge(
&mut v[left.start..right.start + right.len],
left.len,
buf.as_mut_ptr(),
&mut is_less,
);
}
runs[r] = Run { start: left.start, len: left.len + right.len };
runs.remove(r + 1);
let run_alloc_fn = |len: usize| -> *mut sort::TimSortRun {
// SAFETY: Creating the layout is safe as long as merge_sort never calls this with an
// obscene length or 0.
unsafe {
alloc::alloc(alloc::Layout::array::<sort::TimSortRun>(len).unwrap_unchecked())
as *mut sort::TimSortRun
}
}

// Finally, exactly one run must remain in the stack.
debug_assert!(runs.len() == 1 && runs[0].start == 0 && runs[0].len == len);
};

// Examines the stack of runs and identifies the next pair of runs to merge. More specifically,
// if `Some(r)` is returned, that means `runs[r]` and `runs[r + 1]` must be merged next. If the
// algorithm should continue building a new run instead, `None` is returned.
//
// TimSort is infamous for its buggy implementations, as described here:
// http://envisage-project.eu/timsort-specification-and-verification/
//
// The gist of the story is: we must enforce the invariants on the top four runs on the stack.
// Enforcing them on just top three is not sufficient to ensure that the invariants will still
// hold for *all* runs in the stack.
//
// This function correctly checks invariants for the top four runs. Additionally, if the top
// run starts at index 0, it will always demand a merge operation until the stack is fully
// collapsed, in order to complete the sort.
#[inline]
fn collapse(runs: &[Run]) -> Option<usize> {
let n = runs.len();
if n >= 2
&& (runs[n - 1].start == 0
|| runs[n - 2].len <= runs[n - 1].len
|| (n >= 3 && runs[n - 3].len <= runs[n - 2].len + runs[n - 1].len)
|| (n >= 4 && runs[n - 4].len <= runs[n - 3].len + runs[n - 2].len))
{
if n >= 3 && runs[n - 3].len < runs[n - 1].len { Some(n - 3) } else { Some(n - 2) }
} else {
None
let run_dealloc_fn = |buf_ptr: *mut sort::TimSortRun, len: usize| {
// SAFETY: The caller must ensure that buf_ptr was created by elem_alloc_fn with the same
// len.
unsafe {
alloc::dealloc(
buf_ptr as *mut u8,
alloc::Layout::array::<sort::TimSortRun>(len).unwrap_unchecked(),
);
}
}
};

#[derive(Clone, Copy)]
struct Run {
start: usize,
len: usize,
}
sort::merge_sort(v, &mut is_less, elem_alloc_fn, elem_dealloc_fn, run_alloc_fn, run_dealloc_fn);
}
Loading