-
Notifications
You must be signed in to change notification settings - Fork 15.1k
Open
Description
Consider the following rust example (simplified version of pushing to vector with sufficient capacity):
// using const for simplicity, it happen as well if getting it as argument
const COUNT: usize = 10000;
#[no_mangle]
pub fn run() -> usize {
let mut inter = 0;
let mut my_cap = COUNT;
for _ in 0..COUNT {
let len = inter;
if len == my_cap {
something(len);
}
inter += 1;
}
return inter;
}
#[inline(never)]
fn something(len: usize) {
std::hint::black_box(len as i128);
}
pub fn main() {
std::hint::black_box(run());
}
Using rustc nightly with -C opt-level=3 -C target-feature=+avx2 -C codegen-units=1 -Z mir-opt-level=3
in GodBolt
you can see that
if len == my_cap {
something(len);
}
Is optimized away.
example::main::h86544b4302037868:
mov qword ptr [rsp - 8], 10000
lea rax, [rsp - 8]
ret
run:
mov eax, 10000
ret
as the condition can never be reached.
however when changing the body inside the unreachable condition to be:
if len == my_cap {
something(len);
my_cap += 1;
}
code after change
const COUNT: usize = 10000;
#[no_mangle]
pub fn run() -> usize {
let mut inter = 0;
let mut my_cap = COUNT;
for _ in 0..COUNT {
let len = inter;
if len == my_cap {
something(len);
my_cap += 1;
}
inter += 1;
}
return inter;
}
#[inline(never)]
fn something(len: usize) {
std::hint::black_box(len as i128);
}
pub fn main() {
std::hint::black_box(run());
}
You can see that the if
is not optimized away
Generated assembly
example::main::h86544b4302037868:
push rbx
sub rsp, 16
mov eax, 10000
xor edi, edi
jmp .LBB0_1
.LBB0_3:
mov rdi, rbx
cmp rbx, 10000
je .LBB0_4
.LBB0_1:
lea rbx, [rdi + 1]
cmp rdi, rax
jne .LBB0_3
call example::something::h16dfb585d2622cee
mov rax, rbx
jmp .LBB0_3
.LBB0_4:
mov qword ptr [rsp + 8], 10000
lea rax, [rsp + 8]
add rsp, 16
pop rbx
ret
example::something::h16dfb585d2622cee:
mov qword ptr [rsp - 24], rdi
mov qword ptr [rsp - 16], 0
lea rax, [rsp - 24]
ret
run:
push rbx
mov eax, 10000
xor edi, edi
jmp .LBB2_1
.LBB2_3:
mov rdi, rbx
cmp rbx, 10000
je .LBB2_4
.LBB2_1:
lea rbx, [rdi + 1]
cmp rdi, rax
jne .LBB2_3
call example::something::h16dfb585d2622cee
mov rax, rbx
jmp .LBB2_3
.LBB2_4:
mov eax, 10000
pop rbx
ret
I tried testing in C++ with x86-64 clang 21.1.0 and -O3
and same result.
(I don't really know cpp, so I tried my best to replicate the rust code)
the code:
#include <cstddef>
#include <cstdio>
const size_t COUNT = 10000;
__attribute__((noinline))
void something(size_t len) {
printf("%d", len);
(void)len;
}
extern "C" size_t run() {
size_t inter = 0;
size_t my_cap = COUNT;
for (size_t i = 0; i < my_cap; ++i) {
size_t len = inter;
if (len == my_cap) {
something(len);
my_cap += 1;
}
inter += 1;
}
return inter;
}
int main() {
volatile size_t result = run();
(void)result;
return 0;
}
Produce the following assembly:
something(unsigned long):
mov rsi, rdi
lea rdi, [rip + .L.str]
xor eax, eax
jmp printf@PLT
run:
mov rax, -25
.LBB1_1:
add rax, 25
cmp rax, 9975
jb .LBB1_1
add rax, 25
ret
main:
mov qword ptr [rsp - 8], 10000
mov rax, qword ptr [rsp - 8]
xor eax, eax
ret
.L.str:
.asciz "%d"
.Ldebug_list_header_start0:
.short 5
.byte 8
.byte 0
.long 3
.Ldebug_list_header_end0:
Using x86-64 gcc 15.2 with -O3
however do optimize it:
.LC0:
.string "%d"
something(unsigned long):
mov rsi, rdi
xor eax, eax
mov edi, OFFSET FLAT:.LC0
jmp printf
run:
mov eax, 10000
ret
main:
mov QWORD PTR [rsp-8], 10000
mov rax, QWORD PTR [rsp-8]
xor eax, eax
ret