Skip to content

Commit f8972c8

Browse files
authored
[SCEVExp] Fix early exit in ComputeEndCheck. (#156910)
ComputeEndCheck incorrectly returned false for unsigned predicates starting at zero and a positive step. The AddRec could still wrap if Step * trunc ExitCount wraps or trunc ExitCount strips leading 1s. Fixes #156849. PR: #156910
1 parent 85dbe18 commit f8972c8

File tree

6 files changed

+71
-36
lines changed

6 files changed

+71
-36
lines changed

llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2187,8 +2187,15 @@ Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR,
21872187
// negative. If Step is known to be positive or negative, only create
21882188
// either 1. or 2.
21892189
auto ComputeEndCheck = [&]() -> Value * {
2190-
// Checking <u 0 is always false.
2191-
if (!Signed && Start->isZero() && SE.isKnownPositive(Step))
2190+
// Checking <u 0 is always false, if (Step * trunc ExitCount) does not wrap.
2191+
// TODO: Predicates that can be proven true/false should be discarded when
2192+
// the predicates are created, not late during expansion.
2193+
if (!Signed && Start->isZero() && SE.isKnownPositive(Step) &&
2194+
DstBits < SrcBits &&
2195+
ExitCount == SE.getZeroExtendExpr(SE.getTruncateExpr(ExitCount, ARTy),
2196+
ExitCount->getType()) &&
2197+
SE.willNotOverflow(Instruction::Mul, Signed, Step,
2198+
SE.getTruncateExpr(ExitCount, ARTy)))
21922199
return ConstantInt::getFalse(Loc->getContext());
21932200

21942201
// Get the backedge taken count and truncate or extended to the AR type.

llvm/test/Transforms/LoopDistribute/scev-inserted-runtime-check.ll

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,13 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p
1010
; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]]
1111
; CHECK: for.body.lver.check:
1212
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
13+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
14+
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
15+
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
16+
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
1317
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
14-
; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
18+
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP1]]
19+
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
1520
; CHECK: for.body.ph.lver.orig:
1621
; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
1722
; CHECK: for.body.lver.orig:
@@ -75,7 +80,7 @@ define void @f(ptr noalias %a, ptr noalias %b, ptr noalias %c, ptr noalias %d, p
7580
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
7681
; CHECK: for.end.loopexit:
7782
; CHECK-NEXT: br label [[FOR_END:%.*]]
78-
; CHECK: for.end.loopexit1:
83+
; CHECK: for.end.loopexit2:
7984
; CHECK-NEXT: br label [[FOR_END]]
8085
; CHECK: for.end:
8186
; CHECK-NEXT: ret void
@@ -135,8 +140,13 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n
135140
; CHECK-NEXT: br label [[FOR_BODY_LVER_CHECK:%.*]]
136141
; CHECK: for.body.lver.check:
137142
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N:%.*]], -1
143+
; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 [[TMP0]] to i32
144+
; CHECK-NEXT: [[MUL1:%.*]] = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 2, i32 [[TMP2]])
145+
; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i32, i1 } [[MUL1]], 0
146+
; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i32, i1 } [[MUL1]], 1
138147
; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
139-
; CHECK-NEXT: br i1 [[TMP1]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
148+
; CHECK-NEXT: [[TMP3:%.*]] = or i1 [[MUL_OVERFLOW]], [[TMP1]]
149+
; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_BODY_PH_LVER_ORIG:%.*]], label [[FOR_BODY_PH_LDIST1:%.*]]
140150
; CHECK: for.body.ph.lver.orig:
141151
; CHECK-NEXT: br label [[FOR_BODY_LVER_ORIG:%.*]]
142152
; CHECK: for.body.lver.orig:
@@ -200,7 +210,7 @@ define void @f_with_offset(ptr noalias %b, ptr noalias %c, ptr noalias %d, ptr n
200210
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT2:%.*]], label [[FOR_BODY]]
201211
; CHECK: for.end.loopexit:
202212
; CHECK-NEXT: br label [[FOR_END:%.*]]
203-
; CHECK: for.end.loopexit1:
213+
; CHECK: for.end.loopexit2:
204214
; CHECK-NEXT: br label [[FOR_END]]
205215
; CHECK: for.end:
206216
; CHECK-NEXT: ret void

llvm/test/Transforms/LoopVectorize/X86/cost-model.ll

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -224,7 +224,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
224224
; CHECK-NEXT: [[IND_END9:%.*]] = mul i64 [[N_VEC]], 32
225225
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP2]], [[N_VEC]]
226226
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4
227-
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
227+
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]]
228228
; CHECK: vec.epilog.ph:
229229
; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
230230
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP124]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0.000000e+00, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
@@ -269,7 +269,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
269269
; CHECK-NEXT: [[TMP155]] = fadd fast <4 x float> [[TMP154]], [[TMP153]]
270270
; CHECK-NEXT: [[INDEX_NEXT13]] = add nuw i64 [[INDEX10]], 4
271271
; CHECK-NEXT: [[TMP156:%.*]] = icmp eq i64 [[INDEX_NEXT13]], [[N_VEC8]]
272-
; CHECK-NEXT: br i1 [[TMP156]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
272+
; CHECK-NEXT: br i1 [[TMP156]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
273273
; CHECK: vec.epilog.middle.block:
274274
; CHECK-NEXT: [[TMP157:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP155]])
275275
; CHECK-NEXT: [[CMP_N14:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC8]]
@@ -289,7 +289,7 @@ define float @PR27826(ptr nocapture readonly %a, ptr nocapture readonly %b, i32
289289
; CHECK-NEXT: [[ADD4]] = fadd fast float [[ADD]], [[T2]]
290290
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 32
291291
; CHECK-NEXT: [[CMP1:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT]], [[T0]]
292-
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR]], label [[LOOPEXIT]], !llvm.loop [[LOOP4:![0-9]+]]
292+
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR]], label [[LOOPEXIT]], !llvm.loop [[LOOP5:![0-9]+]]
293293
; CHECK: loopexit:
294294
; CHECK-NEXT: [[ADD4_LCSSA:%.*]] = phi float [ [[ADD4]], [[FOR]] ], [ [[TMP124]], [[MIDDLE_BLOCK]] ], [ [[TMP157]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
295295
; CHECK-NEXT: br label [[FOR_END]]
@@ -369,21 +369,21 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
369369
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
370370
; CHECK: vector.body:
371371
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
372-
; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[SRC_1]], align 8, !alias.scope [[META5:![0-9]+]]
372+
; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[SRC_1]], align 8, !alias.scope [[META6:![0-9]+]]
373373
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP13]], i64 0
374374
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
375-
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[SRC_2]], align 8, !alias.scope [[META8:![0-9]+]]
375+
; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[SRC_2]], align 8, !alias.scope [[META9:![0-9]+]]
376376
; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP14]], i64 0
377377
; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT9]], <2 x i64> poison, <2 x i32> zeroinitializer
378378
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer
379379
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT10]], zeroinitializer
380380
; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i1> [[TMP16]], [[TMP15]]
381381
; CHECK-NEXT: [[TMP18:%.*]] = zext <2 x i1> [[TMP17]] to <2 x i8>
382382
; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i8> [[TMP18]], i32 1
383-
; CHECK-NEXT: store i8 [[TMP19]], ptr [[DST]], align 1, !alias.scope [[META10:![0-9]+]], !noalias [[META12:![0-9]+]]
383+
; CHECK-NEXT: store i8 [[TMP19]], ptr [[DST]], align 1, !alias.scope [[META11:![0-9]+]], !noalias [[META13:![0-9]+]]
384384
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
385385
; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
386-
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
386+
; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
387387
; CHECK: middle.block:
388388
; CHECK-NEXT: br label [[SCALAR_PH]]
389389
; CHECK: scalar.ph:
@@ -406,7 +406,7 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 {
406406
; CHECK-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
407407
; CHECK-NEXT: [[IV_1_NEXT_WIDE]] = zext i32 [[IV_1_NEXT]] to i64
408408
; CHECK-NEXT: [[EC_2:%.*]] = icmp ult i64 [[IV_1_NEXT_WIDE]], [[B]]
409-
; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP14:![0-9]+]]
409+
; CHECK-NEXT: br i1 [[EC_2]], label [[LOOP]], label [[EXIT]], !llvm.loop [[LOOP15:![0-9]+]]
410410
; CHECK: exit:
411411
; CHECK-NEXT: ret void
412412
;
@@ -485,7 +485,7 @@ define i1 @any_of_cost(ptr %start, ptr %end) #0 {
485485
; CHECK-NEXT: [[TMP27]] = or <2 x i1> [[VEC_PHI3]], [[TMP25]]
486486
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
487487
; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
488-
; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
488+
; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
489489
; CHECK: middle.block:
490490
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i1> [[TMP27]], [[TMP26]]
491491
; CHECK-NEXT: [[TMP29:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[BIN_RDX]])
@@ -505,7 +505,7 @@ define i1 @any_of_cost(ptr %start, ptr %end) #0 {
505505
; CHECK-NEXT: [[ANY_OF_NEXT]] = select i1 [[CMP13_NOT_NOT]], i1 [[ANY_OF]], i1 false
506506
; CHECK-NEXT: [[PTR_IV_NEXT]] = getelementptr inbounds i8, ptr [[PTR_IV]], i64 40
507507
; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq ptr [[PTR_IV]], [[END]]
508-
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP16:![0-9]+]]
508+
; CHECK-NEXT: br i1 [[CMP_NOT]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
509509
; CHECK: exit:
510510
; CHECK-NEXT: [[ANY_OF_NEXT_LCSSA:%.*]] = phi i1 [ [[ANY_OF_NEXT]], [[LOOP]] ]
511511
; CHECK-NEXT: ret i1 [[ANY_OF_NEXT_LCSSA]]
@@ -562,7 +562,7 @@ define i64 @cost_assume(ptr %end, i64 %N) {
562562
; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP11]])
563563
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
564564
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
565-
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
565+
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
566566
; CHECK: middle.block:
567567
; CHECK-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP8]], [[TMP7]]
568568
; CHECK-NEXT: [[BIN_RDX5:%.*]] = add <2 x i64> [[TMP9]], [[BIN_RDX]]
@@ -583,7 +583,7 @@ define i64 @cost_assume(ptr %end, i64 %N) {
583583
; CHECK-NEXT: tail call void @llvm.assume(i1 [[C]])
584584
; CHECK-NEXT: [[GEP:%.*]] = getelementptr nusw [9 x i8], ptr null, i64 [[IV_NEXT]]
585585
; CHECK-NEXT: [[EC:%.*]] = icmp eq ptr [[GEP]], [[END]]
586-
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
586+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
587587
; CHECK: exit:
588588
; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[TMP12]], [[LOOP]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
589589
; CHECK-NEXT: ret i64 [[DOTLCSSA]]
@@ -627,7 +627,7 @@ define void @reduction_store(ptr noalias %src, ptr %dst, i1 %x) #2 {
627627
; CHECK-NEXT: [[TMP12]] = and <4 x i32> [[VEC_PHI1]], [[TMP2]]
628628
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
629629
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 24
630-
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
630+
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
631631
; CHECK: middle.block:
632632
; CHECK-NEXT: [[BIN_RDX:%.*]] = and <4 x i32> [[TMP12]], [[TMP11]]
633633
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[BIN_RDX]])
@@ -651,7 +651,7 @@ define void @reduction_store(ptr noalias %src, ptr %dst, i1 %x) #2 {
651651
; CHECK-NEXT: store i32 [[RED_NEXT]], ptr [[DST]], align 4
652652
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
653653
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 29
654-
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
654+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP21:![0-9]+]]
655655
; CHECK: exit:
656656
; CHECK-NEXT: ret void
657657
;
@@ -696,7 +696,7 @@ define i64 @live_in_known_1_via_scev() {
696696
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ <i64 3, i64 1, i64 1, i64 1>, [[VECTOR_PH]] ], [ [[VEC_PHI]], [[VECTOR_BODY]] ]
697697
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
698698
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8
699-
; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
699+
; CHECK-NEXT: br i1 [[TMP0]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
700700
; CHECK: middle.block:
701701
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.mul.v4i64(<4 x i64> [[VEC_PHI]])
702702
; CHECK-NEXT: br label [[EXIT:%.*]]
@@ -708,7 +708,7 @@ define i64 @live_in_known_1_via_scev() {
708708
; CHECK-NEXT: [[RED_MUL]] = mul nsw i64 [[RED]], [[P_EXT]]
709709
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
710710
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
711-
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]]
711+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP23:![0-9]+]]
712712
; CHECK: exit:
713713
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[RED_MUL]], [[LOOP]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
714714
; CHECK-NEXT: ret i64 [[RES]]
@@ -753,7 +753,7 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) {
753753
; CHECK: vector.body:
754754
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 1), [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
755755
; CHECK-NEXT: [[TMP3]] = mul <2 x i64> [[TMP2]], [[VEC_PHI]]
756-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
756+
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
757757
; CHECK: middle.block:
758758
; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vector.reduce.mul.v2i64(<2 x i64> [[TMP3]])
759759
; CHECK-NEXT: br label [[EXIT:%.*]]
@@ -768,7 +768,7 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) {
768768
; CHECK-NEXT: [[RED_MUL]] = mul i64 [[SHL]], [[RED]]
769769
; CHECK-NEXT: [[IV_NEXT_I_I_I]] = add i64 [[IV]], 1
770770
; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 1
771-
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP27:![0-9]+]]
771+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP25:![0-9]+]]
772772
; CHECK: exit:
773773
; CHECK-NEXT: [[RED_MUL_LCSSA:%.*]] = phi i64 [ [[RED_MUL]], [[LOOP]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
774774
; CHECK-NEXT: ret i64 [[RED_MUL_LCSSA]]
@@ -807,7 +807,7 @@ define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 {
807807
; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP1]], [[BROADCAST_SPLAT]]
808808
; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[TMP3]] to <16 x i1>
809809
; CHECK-NEXT: [[TMP7]] = zext <16 x i1> [[TMP5]] to <16 x i32>
810-
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
810+
; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
811811
; CHECK: middle.block:
812812
; CHECK-NEXT: [[TMP20:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]])
813813
; CHECK-NEXT: [[TMP21:%.*]] = zext i1 [[TMP20]] to i32
@@ -821,7 +821,7 @@ define i32 @narrowed_reduction(ptr %a, i1 %cmp) #0 {
821821
; CHECK-NEXT: [[OR]] = or i32 [[AND]], [[CONV]]
822822
; CHECK-NEXT: [[INC]] = add i32 [[IV]], 1
823823
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 16
824-
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP29:![0-9]+]]
824+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP1]], !llvm.loop [[LOOP27:![0-9]+]]
825825
; CHECK: exit:
826826
; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[LOOP1]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
827827
; CHECK-NEXT: ret i32 [[OR_LCSSA]]
@@ -853,6 +853,7 @@ define i32 @g(i64 %n) {
853853
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4
854854
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
855855
; CHECK: vector.scevcheck:
856+
; CHECK-NEXT: [[TMP28:%.*]] = trunc i64 [[N]] to i32
856857
; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[N]], 4294967295
857858
; CHECK-NEXT: br i1 [[TMP2]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
858859
; CHECK: vector.main.loop.iter.check:
@@ -893,7 +894,7 @@ define i32 @g(i64 %n) {
893894
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
894895
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
895896
; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
896-
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
897+
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
897898
; CHECK: middle.block:
898899
; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i32> [[TMP16]], [[TMP15]]
899900
; CHECK-NEXT: [[BIN_RDX5:%.*]] = or <4 x i32> [[TMP17]], [[BIN_RDX]]
@@ -904,7 +905,7 @@ define i32 @g(i64 %n) {
904905
; CHECK: vec.epilog.iter.check:
905906
; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i32 [[TMP1]], [[N_VEC]]
906907
; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING]], 4
907-
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
908+
; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]]
908909
; CHECK: vec.epilog.ph:
909910
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
910911
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP20]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
@@ -928,7 +929,7 @@ define i32 @g(i64 %n) {
928929
; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i32 [[INDEX9]], 4
929930
; CHECK-NEXT: [[VEC_IND_NEXT11]] = add <4 x i32> [[VEC_IND10]], splat (i32 4)
930931
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT15]], [[N_VEC8]]
931-
; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
932+
; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
932933
; CHECK: vec.epilog.middle.block:
933934
; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP25]])
934935
; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC8]]
@@ -945,7 +946,7 @@ define i32 @g(i64 %n) {
945946
; CHECK-NEXT: [[SELECT_I:%.*]] = select i1 [[EXITCOND]], i32 0, i32 2
946947
; CHECK-NEXT: [[SELECT_NEXT]] = or i32 [[SELECT_I]], [[SELECT]]
947948
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
948-
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP32:![0-9]+]]
949+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP30:![0-9]+]]
949950
; CHECK: exit:
950951
; CHECK-NEXT: [[SELECT_NEXT_LCSSA:%.*]] = phi i32 [ [[SELECT_NEXT]], [[LOOP]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
951952
; CHECK-NEXT: ret i32 [[SELECT_NEXT_LCSSA]]

0 commit comments

Comments
 (0)