Skip to content

Commit 8d7dae0

Browse files
committed
i386: Add init pattern for V2HI vectors [PR100637]
2021-06-03 Uroš Bizjak <[email protected]> gcc/ PR target/100637 * config/i386/i386-expand.c (ix86_expand_vector_init_duplicate): Handle V2HI mode. (ix86_expand_vector_init_general): Ditto. Use SImode instead of word_mode for logic operations when GET_MODE_SIZE (mode) < UNITS_PER_WORD. (expand_vec_perm_even_odd_1): Assert that V2HI mode should be implemented by expand_vec_perm_1. (expand_vec_perm_broadcast_1): Assert that V2HI and V4HI modes should be implemented using standard shuffle patterns. (ix86_vectorize_vec_perm_const): Handle V2HImode. Add V4HI and V2HI modes to modes, implementable with shuffle for one operand. * config/i386/mmx.md (*punpckwd): New insn_and_split pattern. (*pshufw_1): New insn pattern. (*vec_dupv2hi): Ditto. (vec_initv2hihi): New expander. gcc/testsuite/ PR target/100637 * gcc.dg/vect/slp-perm-9.c (dg-final): Adjust dumps for vect32 targets.
1 parent ee9548b commit 8d7dae0

File tree

3 files changed

+121
-14
lines changed

3 files changed

+121
-14
lines changed

gcc/config/i386/i386-expand.c

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13723,6 +13723,19 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
1372313723
}
1372413724
goto widen;
1372513725

13726+
case E_V2HImode:
13727+
if (TARGET_SSE2)
13728+
{
13729+
rtx x;
13730+
13731+
val = gen_lowpart (SImode, val);
13732+
x = gen_rtx_TRUNCATE (HImode, val);
13733+
x = gen_rtx_VEC_DUPLICATE (mode, x);
13734+
emit_insn (gen_rtx_SET (target, x));
13735+
return true;
13736+
}
13737+
return false;
13738+
1372613739
case E_V8QImode:
1372713740
if (!mmx_ok)
1372813741
return false;
@@ -14524,6 +14537,8 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
1452414537

1452514538
case E_V4HImode:
1452614539
case E_V8QImode:
14540+
14541+
case E_V2HImode:
1452714542
break;
1452814543

1452914544
default:
@@ -14532,12 +14547,14 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
1453214547

1453314548
{
1453414549
int i, j, n_elts, n_words, n_elt_per_word;
14535-
machine_mode inner_mode;
14550+
machine_mode tmp_mode, inner_mode;
1453614551
rtx words[4], shift;
1453714552

14553+
tmp_mode = (GET_MODE_SIZE (mode) < UNITS_PER_WORD) ? SImode : word_mode;
14554+
1453814555
inner_mode = GET_MODE_INNER (mode);
1453914556
n_elts = GET_MODE_NUNITS (mode);
14540-
n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
14557+
n_words = GET_MODE_SIZE (mode) / GET_MODE_SIZE (tmp_mode);
1454114558
n_elt_per_word = n_elts / n_words;
1454214559
shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
1454314560

@@ -14548,15 +14565,15 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
1454814565
for (j = 0; j < n_elt_per_word; ++j)
1454914566
{
1455014567
rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
14551-
elt = convert_modes (word_mode, inner_mode, elt, true);
14568+
elt = convert_modes (tmp_mode, inner_mode, elt, true);
1455214569

1455314570
if (j == 0)
1455414571
word = elt;
1455514572
else
1455614573
{
14557-
word = expand_simple_binop (word_mode, ASHIFT, word, shift,
14574+
word = expand_simple_binop (tmp_mode, ASHIFT, word, shift,
1455814575
word, 1, OPTAB_LIB_WIDEN);
14559-
word = expand_simple_binop (word_mode, IOR, word, elt,
14576+
word = expand_simple_binop (tmp_mode, IOR, word, elt,
1456014577
word, 1, OPTAB_LIB_WIDEN);
1456114578
}
1456214579
}
@@ -14570,14 +14587,14 @@ ix86_expand_vector_init_general (bool mmx_ok, machine_mode mode,
1457014587
{
1457114588
rtx tmp = gen_reg_rtx (mode);
1457214589
emit_clobber (tmp);
14573-
emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
14574-
emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
14590+
emit_move_insn (gen_lowpart (tmp_mode, tmp), words[0]);
14591+
emit_move_insn (gen_highpart (tmp_mode, tmp), words[1]);
1457514592
emit_move_insn (target, tmp);
1457614593
}
1457714594
else if (n_words == 4)
1457814595
{
1457914596
rtx tmp = gen_reg_rtx (V4SImode);
14580-
gcc_assert (word_mode == SImode);
14597+
gcc_assert (tmp_mode == SImode);
1458114598
vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
1458214599
ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
1458314600
emit_move_insn (target, gen_lowpart (mode, tmp));
@@ -19548,6 +19565,7 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
1954819565
case E_V2DImode:
1954919566
case E_V2SImode:
1955019567
case E_V4SImode:
19568+
case E_V2HImode:
1955119569
/* These are always directly implementable by expand_vec_perm_1. */
1955219570
gcc_unreachable ();
1955319571

@@ -19758,6 +19776,8 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
1975819776
case E_V2DImode:
1975919777
case E_V2SImode:
1976019778
case E_V4SImode:
19779+
case E_V2HImode:
19780+
case E_V4HImode:
1976119781
/* These are always implementable using standard shuffle patterns. */
1976219782
gcc_unreachable ();
1976319783

@@ -20267,6 +20287,10 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
2026720287
if (!TARGET_MMX_WITH_SSE)
2026820288
return false;
2026920289
break;
20290+
case E_V2HImode:
20291+
if (!TARGET_SSE2)
20292+
return false;
20293+
break;
2027020294
case E_V2DImode:
2027120295
case E_V2DFmode:
2027220296
if (!TARGET_SSE)
@@ -20298,10 +20322,11 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
2029820322
/* Check whether the mask can be applied to the vector type. */
2029920323
d.one_operand_p = (which != 3);
2030020324

20301-
/* Implementable with shufps or pshufd. */
20325+
/* Implementable with shufps, pshufd or pshuflw. */
2030220326
if (d.one_operand_p
2030320327
&& (d.vmode == V4SFmode || d.vmode == V2SFmode
20304-
|| d.vmode == V4SImode || d.vmode == V2SImode))
20328+
|| d.vmode == V4SImode || d.vmode == V2SImode
20329+
|| d.vmode == V4HImode || d.vmode == V2HImode))
2030520330
return true;
2030620331

2030720332
/* Otherwise we have to go through the motions and see if we can

gcc/config/i386/mmx.md

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3292,6 +3292,88 @@
32923292
DONE;
32933293
})
32943294

3295+
(define_insn_and_split "*punpckwd"
3296+
[(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
3297+
(vec_select:V2HI
3298+
(vec_concat:V4HI
3299+
(match_operand:V2HI 1 "register_operand" "0,Yw")
3300+
(match_operand:V2HI 2 "register_operand" "x,Yw"))
3301+
(parallel [(match_operand 3 "const_0_to_3_operand")
3302+
(match_operand 4 "const_0_to_3_operand")])))]
3303+
"TARGET_SSE2"
3304+
"#"
3305+
"&& reload_completed"
3306+
[(set (match_dup 5)
3307+
(vec_select:V4HI
3308+
(match_dup 5)
3309+
(parallel [(match_dup 3) (match_dup 4)
3310+
(const_int 0) (const_int 0)])))]
3311+
{
3312+
rtx dest = lowpart_subreg (V8HImode, operands[0], V2HImode);
3313+
rtx op1 = lowpart_subreg (V8HImode, operands[1], V2HImode);
3314+
rtx op2 = lowpart_subreg (V8HImode, operands[2], V2HImode);
3315+
3316+
emit_insn (gen_vec_interleave_lowv8hi (dest, op1, op2));
3317+
3318+
static const int map[4] = { 0, 2, 1, 3 };
3319+
3320+
int sel0 = map[INTVAL (operands[3])];
3321+
int sel1 = map[INTVAL (operands[4])];
3322+
3323+
if (sel0 == 0 && sel1 == 1)
3324+
DONE;
3325+
3326+
operands[3] = GEN_INT (sel0);
3327+
operands[4] = GEN_INT (sel1);
3328+
3329+
operands[5] = lowpart_subreg (V4HImode, dest, V8HImode);
3330+
}
3331+
[(set_attr "isa" "noavx,avx")
3332+
(set_attr "type" "sselog")
3333+
(set_attr "mode" "TI")])
3334+
3335+
(define_insn "*pshufw_1"
3336+
[(set (match_operand:V2HI 0 "register_operand" "=Yw")
3337+
(vec_select:V2HI
3338+
(match_operand:V2HI 1 "register_operand" "Yw")
3339+
(parallel [(match_operand 2 "const_0_to_1_operand")
3340+
(match_operand 3 "const_0_to_1_operand")])))]
3341+
"TARGET_SSE2"
3342+
{
3343+
int mask = 0;
3344+
mask |= INTVAL (operands[2]) << 0;
3345+
mask |= INTVAL (operands[3]) << 2;
3346+
mask |= 2 << 4;
3347+
mask |= 3 << 6;
3348+
operands[2] = GEN_INT (mask);
3349+
3350+
return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
3351+
}
3352+
[(set_attr "type" "sselog1")
3353+
(set_attr "length_immediate" "1")
3354+
(set_attr "mode" "TI")])
3355+
3356+
(define_insn "*vec_dupv2hi"
3357+
[(set (match_operand:V2HI 0 "register_operand" "=Yw")
3358+
(vec_duplicate:V2HI
3359+
(truncate:HI
3360+
(match_operand:SI 1 "register_operand" "Yw"))))]
3361+
"TARGET_SSE2"
3362+
"%vpshuflw\t{$0, %1, %0|%0, %1, 0}"
3363+
[(set_attr "type" "sselog1")
3364+
(set_attr "length_immediate" "1")
3365+
(set_attr "mode" "TI")])
3366+
3367+
(define_expand "vec_initv2hihi"
3368+
[(match_operand:V2HI 0 "register_operand")
3369+
(match_operand 1)]
3370+
"TARGET_SSE2"
3371+
{
3372+
ix86_expand_vector_init (false, operands[0],
3373+
operands[1]);
3374+
DONE;
3375+
})
3376+
32953377
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
32963378
;;
32973379
;; Miscellaneous

gcc/testsuite/gcc.dg/vect/slp-perm-9.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -57,13 +57,13 @@ int main (int argc, const char* argv[])
5757
return 0;
5858
}
5959

60-
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect" { target { ! { vect_perm_short || vect_load_lanes } } } } } */
61-
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_perm_short || vect_load_lanes } } } } */
60+
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect" { target { ! { { vect_perm_short || vect32 } || vect_load_lanes } } } } } */
61+
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { { vect_perm_short || vect32 } || vect_load_lanes } } } } */
6262
/* We don't try permutes with a group size of 3 for variable-length
6363
vectors. */
6464
/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 1 "vect" { target { vect_perm_short && { { ! vect_perm3_short } && { ! vect_partial_vectors_usage_1 } } } xfail vect_variable_length } } } */
6565
/* Try to vectorize the epilogue using partial vectors. */
6666
/* { dg-final { scan-tree-dump-times "permutation requires at least three vectors" 2 "vect" { target { vect_perm_short && { { ! vect_perm3_short } && vect_partial_vectors_usage_1 } } xfail vect_variable_length } } } */
6767
/* { dg-final { scan-tree-dump-not "permutation requires at least three vectors" "vect" { target vect_perm3_short } } } */
68-
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { { ! vect_perm3_short } || vect_load_lanes } } } } */
69-
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_perm3_short && { ! vect_load_lanes } } } } } */
68+
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 0 "vect" { target { { ! { vect_perm3_short || vect32 } } || vect_load_lanes } } } } */
69+
/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { { vect_perm3_short || vect32 } && { ! vect_load_lanes } } } } } */

0 commit comments

Comments
 (0)