Skip to content

Conversation

@Ralith
Copy link
Contributor

@Ralith Ralith commented Jun 21, 2020

Provides a simpler, possibly faster constructor for a common case.

Copy link
Owner

@CAD97 CAD97 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, but this is UB as written 😅

You're copying the items out of the slice argument, but Item could be non-Copy. So if Item is e.g. Box<_>, you've duplicated the unique pointer and will get a double-drop.

There's two potential solutions here:

  • Bound Item: Copy so that we can actually do the copy without extra ceremony, or
  • (far future) Use const generics, take [Item; N], and just ptr::write it into place.

@Ralith
Copy link
Contributor Author

Ralith commented Jun 21, 2020

Whoops, good catch!

@CAD97
Copy link
Owner

CAD97 commented Jun 21, 2020

I was curious, so here's a quick test of the overhead for a copyable slice:

pub fn new_slice(s: &[u8]) -> Box<SliceWithHeader<(), u8>> {
    SliceWithHeader::new((), s.iter().copied())
}

pub fn new_str(s: &str) -> Box<slice_dst::StrWithHeader<()>> {
    StrWithHeader::new((), s)
}
ASM
playground::new_slice:
 push    rbp
 push    r14
 push    rsi
 push    rdi
 push    rbx
 sub     rsp, 80
 lea     rbp, [rsp, +, 80]
 mov     qword, ptr, [rbp, -, 8], -2
 mov     rbx, rdx
 add     rbx, 8
 jb      .LBB4_14
 cmp     rbx, -7
 jae     .LBB4_14
 mov     rsi, rdx
 mov     r14, rcx
 add     rbx, 7
 and     rbx, -8
 je      .LBB4_3
 mov     edx, 8
 mov     rcx, rbx
 call    __rust_alloc
 test    rax, rax
 jne     .LBB4_5
 mov     edx, 8
 mov     rcx, rbx
 call    slice_dst::alloc_slice_dst_in::{{closure}}
 ud2
.LBB4_3:
 mov     eax, 8
.LBB4_5:
 mov     qword, ptr, [rbp, -, 40], rax
 mov     qword, ptr, [rbp, -, 32], rsi
 mov     qword, ptr, [rbp, -, 24], rbx
 mov     qword, ptr, [rbp, -, 16], 8
 test    rsi, rsi
 je      .LBB4_13
 lea     rcx, [rsi, -, 1]
 xor     ebx, ebx
 xor     edi, edi
.LBB4_7:
 test    bl, 1
 jne     .LBB4_8
 mov     rdx, rdi
 add     rdi, 1
 movzx   ebx, byte, ptr, [r14, +, rdx]
 mov     byte, ptr, [rax, +, rdx, +, 8], bl
 cmp     rcx, rdx
 sete    bl
 cmp     rsi, rdi
 jne     .LBB4_7
 cmp     rcx, rdx
 jne     .LBB4_12
.LBB4_13:
 mov     qword, ptr, [rax], rsi
 mov     rdx, rsi
 add     rsp, 80
 pop     rbx
 pop     rdi
 pop     rsi
 pop     r14
 pop     rbp
 ret
.LBB4_8:
 lea     rcx, [rip, +, __unnamed_1]
 lea     r8, [rip, +, __unnamed_2]
 mov     edx, 38
 call    core::option::expect_failed
.LBB4_9:
 ud2
.LBB4_14:
 lea     rax, [rip, +, __unnamed_3]
 mov     qword, ptr, [rsp, +, 32], rax
 lea     rcx, [rip, +, __unnamed_4]
 lea     r9, [rip, +, __unnamed_5]
 lea     r8, [rbp, -, 40]
 mov     edx, 43
 call    core::result::unwrap_failed
 ud2
.LBB4_12:
 lea     rcx, [rip, +, __unnamed_6]
 lea     r8, [rip, +, __unnamed_7]
 mov     edx, 39
 call    core::panicking::panic
 jmp     .LBB4_9

playground::new_str:
 push    r14
 push    rsi
 push    rdi
 push    rbx
 sub     rsp, 56
 mov     rbx, rdx
 add     rbx, 8
 jb      .LBB5_6
 cmp     rbx, -7
 jae     .LBB5_6
 mov     rsi, rdx
 mov     r14, rcx
 add     rbx, 7
 and     rbx, -8
 je      .LBB5_3
 mov     edx, 8
 mov     rcx, rbx
 call    __rust_alloc
 mov     rdi, rax
 test    rax, rax
 jne     .LBB5_5
 mov     edx, 8
 mov     rcx, rbx
 call    slice_dst::alloc_slice_dst_in::{{closure}}
 ud2
.LBB5_3:
 mov     edi, 8
.LBB5_5:
 mov     qword, ptr, [rdi], rsi
 lea     rcx, [rdi, +, 8]
 mov     rdx, r14
 mov     r8, rsi
 call    memcpy
 mov     rax, rdi
 mov     rdx, rsi
 add     rsp, 56
 pop     rbx
 pop     rdi
 pop     rsi
 pop     r14
 ret
.LBB5_6:
 lea     rax, [rip, +, __unnamed_8]
 mov     qword, ptr, [rsp, +, 32], rax
 lea     rcx, [rip, +, __unnamed_4]
 lea     r9, [rip, +, __unnamed_5]
 lea     r8, [rsp, +, 48]
 mov     edx, 43
 call    core::result::unwrap_failed
 ud2

slice_dst::alloc_slice_dst_in::{{closure}}:
 sub     rsp, 40
 call    alloc::alloc::handle_alloc_error
 ud2

The complexity optimizes away decently well, but the simplified slice copy definitely optimizes down significantly better. The remaining unwrap is for if layout calculation overflows, which I don't think we can possibly get to optimize out.

Copy link
Owner

@CAD97 CAD97 left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks good now; I'll see if I can get some big-picture docs for how the functionality works and publish this, then.

bors: r+

@bors
Copy link
Contributor

bors bot commented Jun 21, 2020

Build succeeded:

@bors bors bot merged commit ab2347f into CAD97:master Jun 21, 2020
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants