-
Notifications
You must be signed in to change notification settings - Fork 14
Introduce SliceWithHeader::from_slice #62
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry, but this is UB as written 😅
You're copying the items out of the slice argument, but Item could be non-Copy. So if Item is e.g. Box<_>, you've duplicated the unique pointer and will get a double-drop.
There's two potential solutions here:
- Bound
Item: Copyso that we can actually do the copy without extra ceremony, or - (far future) Use const generics, take
[Item; N], and justptr::writeit into place.
|
Whoops, good catch! |
|
I was curious, so here's a quick test of the overhead for a copyable slice: pub fn new_slice(s: &[u8]) -> Box<SliceWithHeader<(), u8>> {
SliceWithHeader::new((), s.iter().copied())
}
pub fn new_str(s: &str) -> Box<slice_dst::StrWithHeader<()>> {
StrWithHeader::new((), s)
}ASMplayground::new_slice:
push rbp
push r14
push rsi
push rdi
push rbx
sub rsp, 80
lea rbp, [rsp, +, 80]
mov qword, ptr, [rbp, -, 8], -2
mov rbx, rdx
add rbx, 8
jb .LBB4_14
cmp rbx, -7
jae .LBB4_14
mov rsi, rdx
mov r14, rcx
add rbx, 7
and rbx, -8
je .LBB4_3
mov edx, 8
mov rcx, rbx
call __rust_alloc
test rax, rax
jne .LBB4_5
mov edx, 8
mov rcx, rbx
call slice_dst::alloc_slice_dst_in::{{closure}}
ud2
.LBB4_3:
mov eax, 8
.LBB4_5:
mov qword, ptr, [rbp, -, 40], rax
mov qword, ptr, [rbp, -, 32], rsi
mov qword, ptr, [rbp, -, 24], rbx
mov qword, ptr, [rbp, -, 16], 8
test rsi, rsi
je .LBB4_13
lea rcx, [rsi, -, 1]
xor ebx, ebx
xor edi, edi
.LBB4_7:
test bl, 1
jne .LBB4_8
mov rdx, rdi
add rdi, 1
movzx ebx, byte, ptr, [r14, +, rdx]
mov byte, ptr, [rax, +, rdx, +, 8], bl
cmp rcx, rdx
sete bl
cmp rsi, rdi
jne .LBB4_7
cmp rcx, rdx
jne .LBB4_12
.LBB4_13:
mov qword, ptr, [rax], rsi
mov rdx, rsi
add rsp, 80
pop rbx
pop rdi
pop rsi
pop r14
pop rbp
ret
.LBB4_8:
lea rcx, [rip, +, __unnamed_1]
lea r8, [rip, +, __unnamed_2]
mov edx, 38
call core::option::expect_failed
.LBB4_9:
ud2
.LBB4_14:
lea rax, [rip, +, __unnamed_3]
mov qword, ptr, [rsp, +, 32], rax
lea rcx, [rip, +, __unnamed_4]
lea r9, [rip, +, __unnamed_5]
lea r8, [rbp, -, 40]
mov edx, 43
call core::result::unwrap_failed
ud2
.LBB4_12:
lea rcx, [rip, +, __unnamed_6]
lea r8, [rip, +, __unnamed_7]
mov edx, 39
call core::panicking::panic
jmp .LBB4_9
playground::new_str:
push r14
push rsi
push rdi
push rbx
sub rsp, 56
mov rbx, rdx
add rbx, 8
jb .LBB5_6
cmp rbx, -7
jae .LBB5_6
mov rsi, rdx
mov r14, rcx
add rbx, 7
and rbx, -8
je .LBB5_3
mov edx, 8
mov rcx, rbx
call __rust_alloc
mov rdi, rax
test rax, rax
jne .LBB5_5
mov edx, 8
mov rcx, rbx
call slice_dst::alloc_slice_dst_in::{{closure}}
ud2
.LBB5_3:
mov edi, 8
.LBB5_5:
mov qword, ptr, [rdi], rsi
lea rcx, [rdi, +, 8]
mov rdx, r14
mov r8, rsi
call memcpy
mov rax, rdi
mov rdx, rsi
add rsp, 56
pop rbx
pop rdi
pop rsi
pop r14
ret
.LBB5_6:
lea rax, [rip, +, __unnamed_8]
mov qword, ptr, [rsp, +, 32], rax
lea rcx, [rip, +, __unnamed_4]
lea r9, [rip, +, __unnamed_5]
lea r8, [rsp, +, 48]
mov edx, 43
call core::result::unwrap_failed
ud2
slice_dst::alloc_slice_dst_in::{{closure}}:
sub rsp, 40
call alloc::alloc::handle_alloc_error
ud2The complexity optimizes away decently well, but the simplified slice copy definitely optimizes down significantly better. The remaining unwrap is for if layout calculation overflows, which I don't think we can possibly get to optimize out. |
CAD97
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This looks good now; I'll see if I can get some big-picture docs for how the functionality works and publish this, then.
bors: r+
|
Build succeeded: |
Provides a simpler, possibly faster constructor for a common case.