Skip to content

Commit 5883e56

Browse files
committed
i386: Add insert and extract patterns for 4-byte vectors [PR100637]
The patch introduces insert and extract patterns for 4-byte vectors. It effectively only emits PINSR and PEXTR instructions when available, otherwise falls back to generic code that emulates these instructions via inserts, extracts, logic operations and shifts in integer registers. Please note that generic fallback produces better code than the current approach of constructing new vector in memory (due to store forwarding stall) so also enable QImode 8-byte vector inserts only with TARGET_SSE4_1. 2021-06-03 Uroš Bizjak <[email protected]> gcc/ PR target/100637 * config/i386/i386-expand.c (ix86_expand_vector_set): Handle V2HI and V4QI modes. (ix86_expand_vector_extract): Ditto. * config/i386/mmx.md (*pinsrw): New insn pattern. (*pinsrb): Ditto. (*pextrw): Ditto. (*pextrw_zext): Ditto. (*pextrb): Ditto. (*pextrb_zext): Ditto. (vec_setv2hi): New expander. (vec_extractv2hihi): Ditto. (vec_setv4qi): Ditto. (vec_extractv4qiqi): Ditto. (vec_setv8qi): Enable only for TARGET_SSE4_1. (vec_extractv8qiqi): Ditto. gcc/testsuite/ PR target/100637 * gcc.target/i386/vperm-v2hi.c: New test. * gcc.target/i386/vperm-v4qi.c: Ditto.
1 parent 52e1306 commit 5883e56

File tree

4 files changed

+268
-2
lines changed

4 files changed

+268
-2
lines changed

gcc/config/i386/i386-expand.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14968,13 +14968,15 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
1496814968
return;
1496914969

1497014970
case E_V8HImode:
14971+
case E_V2HImode:
1497114972
use_vec_merge = TARGET_SSE2;
1497214973
break;
1497314974
case E_V4HImode:
1497414975
use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
1497514976
break;
1497614977

1497714978
case E_V16QImode:
14979+
case E_V4QImode:
1497814980
use_vec_merge = TARGET_SSE4_1;
1497914981
break;
1498014982

@@ -15274,6 +15276,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
1527415276
break;
1527515277

1527615278
case E_V8HImode:
15279+
case E_V2HImode:
1527715280
use_vec_extr = TARGET_SSE2;
1527815281
break;
1527915282
case E_V4HImode:
@@ -15294,6 +15297,9 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
1529415297
return;
1529515298
}
1529615299
break;
15300+
case E_V4QImode:
15301+
use_vec_extr = TARGET_SSE4_1;
15302+
break;
1529715303

1529815304
case E_V8SFmode:
1529915305
if (TARGET_AVX)

gcc/config/i386/mmx.md

Lines changed: 174 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3092,7 +3092,7 @@
30923092
[(match_operand:V8QI 0 "register_operand")
30933093
(match_operand:QI 1 "register_operand")
30943094
(match_operand 2 "const_int_operand")]
3095-
"TARGET_MMX || TARGET_MMX_WITH_SSE"
3095+
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
30963096
{
30973097
ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
30983098
INTVAL (operands[2]));
@@ -3103,7 +3103,7 @@
31033103
[(match_operand:QI 0 "register_operand")
31043104
(match_operand:V8QI 1 "register_operand")
31053105
(match_operand 2 "const_int_operand")]
3106-
"TARGET_MMX || TARGET_MMX_WITH_SSE"
3106+
"TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
31073107
{
31083108
ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0],
31093109
operands[1], INTVAL (operands[2]));
@@ -3120,6 +3120,178 @@
31203120
DONE;
31213121
})
31223122

3123+
(define_insn "*pinsrw"
3124+
[(set (match_operand:V2HI 0 "register_operand" "=x,YW")
3125+
(vec_merge:V2HI
3126+
(vec_duplicate:V2HI
3127+
(match_operand:HI 2 "nonimmediate_operand" "rm,rm"))
3128+
(match_operand:V2HI 1 "register_operand" "0,YW")
3129+
(match_operand:SI 3 "const_int_operand")))]
3130+
"TARGET_SSE2
3131+
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
3132+
< GET_MODE_NUNITS (V2HImode))"
3133+
{
3134+
operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3135+
switch (which_alternative)
3136+
{
3137+
case 1:
3138+
if (MEM_P (operands[2]))
3139+
return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3140+
else
3141+
return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
3142+
case 0:
3143+
if (MEM_P (operands[2]))
3144+
return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
3145+
else
3146+
return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
3147+
default:
3148+
gcc_unreachable ();
3149+
}
3150+
}
3151+
[(set_attr "isa" "noavx,avx")
3152+
(set_attr "type" "sselog")
3153+
(set_attr "length_immediate" "1")
3154+
(set_attr "mode" "TI")])
3155+
3156+
(define_insn "*pinsrb"
3157+
[(set (match_operand:V4QI 0 "register_operand" "=x,YW")
3158+
(vec_merge:V4QI
3159+
(vec_duplicate:V4QI
3160+
(match_operand:QI 2 "nonimmediate_operand" "rm,rm"))
3161+
(match_operand:V4QI 1 "register_operand" "0,YW")
3162+
(match_operand:SI 3 "const_int_operand")))]
3163+
"TARGET_SSE4_1
3164+
&& ((unsigned) exact_log2 (INTVAL (operands[3]))
3165+
< GET_MODE_NUNITS (V4QImode))"
3166+
{
3167+
operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
3168+
switch (which_alternative)
3169+
{
3170+
case 1:
3171+
if (MEM_P (operands[2]))
3172+
return "vpinsrb\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3173+
else
3174+
return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
3175+
case 0:
3176+
if (MEM_P (operands[2]))
3177+
return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
3178+
else
3179+
return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
3180+
default:
3181+
gcc_unreachable ();
3182+
}
3183+
}
3184+
[(set_attr "isa" "noavx,avx")
3185+
(set_attr "type" "sselog")
3186+
(set_attr "prefix_data16" "1")
3187+
(set_attr "prefix_extra" "1")
3188+
(set_attr "length_immediate" "1")
3189+
(set_attr "prefix" "orig,vex")
3190+
(set_attr "mode" "TI")])
3191+
3192+
(define_insn "*pextrw"
3193+
[(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,m")
3194+
(vec_select:HI
3195+
(match_operand:V2HI 1 "register_operand" "YW,YW")
3196+
(parallel [(match_operand:SI 2 "const_0_to_1_operand" "n,n")])))]
3197+
"TARGET_SSE2"
3198+
"@
3199+
%vpextrw\t{%2, %1, %k0|%k0, %1, %2}
3200+
%vpextrw\t{%2, %1, %0|%0, %1, %2}"
3201+
[(set_attr "isa" "*,sse4")
3202+
(set_attr "type" "sselog1")
3203+
(set_attr "length_immediate" "1")
3204+
(set_attr "prefix" "maybe_vex")
3205+
(set_attr "mode" "TI")])
3206+
3207+
(define_insn "*pextrw_zext"
3208+
[(set (match_operand:SWI48 0 "register_operand" "=r")
3209+
(zero_extend:SWI48
3210+
(vec_select:HI
3211+
(match_operand:V2HI 1 "register_operand" "YW")
3212+
(parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")]))))]
3213+
"TARGET_SSE2"
3214+
"%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
3215+
[(set_attr "type" "sselog1")
3216+
(set_attr "length_immediate" "1")
3217+
(set_attr "prefix" "maybe_vex")
3218+
(set_attr "mode" "TI")])
3219+
3220+
(define_insn "*pextrb"
3221+
[(set (match_operand:QI 0 "nonimmediate_operand" "=r,m")
3222+
(vec_select:QI
3223+
(match_operand:V4QI 1 "register_operand" "YW,YW")
3224+
(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")])))]
3225+
"TARGET_SSE4_1"
3226+
"@
3227+
%vpextrb\t{%2, %1, %k0|%k0, %1, %2}
3228+
%vpextrb\t{%2, %1, %0|%0, %1, %2}"
3229+
[(set_attr "type" "sselog1")
3230+
(set_attr "prefix_data16" "1")
3231+
(set_attr "prefix_extra" "1")
3232+
(set_attr "length_immediate" "1")
3233+
(set_attr "prefix" "maybe_vex")
3234+
(set_attr "mode" "TI")])
3235+
3236+
(define_insn "*pextrb_zext"
3237+
[(set (match_operand:SWI248 0 "register_operand" "=r")
3238+
(zero_extend:SWI248
3239+
(vec_select:QI
3240+
(match_operand:V4QI 1 "register_operand" "YW")
3241+
(parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
3242+
"TARGET_SSE4_1"
3243+
"%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
3244+
[(set_attr "type" "sselog1")
3245+
(set_attr "prefix_data16" "1")
3246+
(set_attr "prefix_extra" "1")
3247+
(set_attr "length_immediate" "1")
3248+
(set_attr "prefix" "maybe_vex")
3249+
(set_attr "mode" "TI")])
3250+
3251+
(define_expand "vec_setv2hi"
3252+
[(match_operand:V2HI 0 "register_operand")
3253+
(match_operand:HI 1 "register_operand")
3254+
(match_operand 2 "const_int_operand")]
3255+
"TARGET_SSE2"
3256+
{
3257+
ix86_expand_vector_set (false, operands[0], operands[1],
3258+
INTVAL (operands[2]));
3259+
DONE;
3260+
})
3261+
3262+
(define_expand "vec_extractv2hihi"
3263+
[(match_operand:HI 0 "register_operand")
3264+
(match_operand:V2HI 1 "register_operand")
3265+
(match_operand 2 "const_int_operand")]
3266+
"TARGET_SSE2"
3267+
{
3268+
ix86_expand_vector_extract (false, operands[0],
3269+
operands[1], INTVAL (operands[2]));
3270+
DONE;
3271+
})
3272+
3273+
(define_expand "vec_setv4qi"
3274+
[(match_operand:V4QI 0 "register_operand")
3275+
(match_operand:QI 1 "register_operand")
3276+
(match_operand 2 "const_int_operand")]
3277+
"TARGET_SSE4_1"
3278+
{
3279+
ix86_expand_vector_set (false, operands[0], operands[1],
3280+
INTVAL (operands[2]));
3281+
DONE;
3282+
})
3283+
3284+
(define_expand "vec_extractv4qiqi"
3285+
[(match_operand:QI 0 "register_operand")
3286+
(match_operand:V4QI 1 "register_operand")
3287+
(match_operand 2 "const_int_operand")]
3288+
"TARGET_SSE4_1"
3289+
{
3290+
ix86_expand_vector_extract (false, operands[0],
3291+
operands[1], INTVAL (operands[2]));
3292+
DONE;
3293+
})
3294+
31233295
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
31243296
;;
31253297
;; Miscellaneous
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
/* { dg-do run } */
2+
/* { dg-options "-O -msse2" } */
3+
/* { dg-require-effective-target sse2 } */
4+
5+
#include "isa-check.h"
6+
#include "sse-os-support.h"
7+
8+
typedef short S;
9+
typedef short V __attribute__((vector_size(4)));
10+
typedef short IV __attribute__((vector_size(4)));
11+
typedef union { S s[2]; V v; } U;
12+
13+
static U i[2], b, c;
14+
15+
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
16+
#define assert(T) ((T) || (__builtin_trap (), 0))
17+
18+
#define TEST(E0, E1) \
19+
b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1}); \
20+
c.s[0] = i[0].s[E0]; \
21+
c.s[1] = i[0].s[E1]; \
22+
__asm__("" : : : "memory"); \
23+
assert (memcmp (&b, &c, sizeof(c)) == 0);
24+
25+
#include "vperm-2-2.inc"
26+
27+
int main()
28+
{
29+
check_isa ();
30+
31+
if (!sse_os_support ())
32+
exit (0);
33+
34+
i[0].s[0] = 0;
35+
i[0].s[1] = 1;
36+
i[0].s[2] = 2;
37+
i[0].s[3] = 3;
38+
39+
check();
40+
return 0;
41+
}
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
/* { dg-do run } */
2+
/* { dg-options "-O -msse2" } */
3+
/* { dg-require-effective-target sse2 } */
4+
5+
#include "isa-check.h"
6+
#include "sse-os-support.h"
7+
8+
typedef char S;
9+
typedef char V __attribute__((vector_size(4)));
10+
typedef char IV __attribute__((vector_size(4)));
11+
typedef union { S s[4]; V v; } U;
12+
13+
static U i[2], b, c;
14+
15+
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
16+
#define assert(T) ((T) || (__builtin_trap (), 0))
17+
18+
#define TEST(E0, E1, E2, E3) \
19+
b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
20+
c.s[0] = i[0].s[E0]; \
21+
c.s[1] = i[0].s[E1]; \
22+
c.s[2] = i[0].s[E2]; \
23+
c.s[3] = i[0].s[E3]; \
24+
__asm__("" : : : "memory"); \
25+
assert (memcmp (&b, &c, sizeof(c)) == 0);
26+
27+
#include "vperm-4-2.inc"
28+
29+
int main()
30+
{
31+
check_isa ();
32+
33+
if (!sse_os_support ())
34+
exit (0);
35+
36+
i[0].s[0] = 0;
37+
i[0].s[1] = 1;
38+
i[0].s[2] = 2;
39+
i[0].s[3] = 3;
40+
i[0].s[4] = 4;
41+
i[0].s[5] = 5;
42+
i[0].s[6] = 6;
43+
i[0].s[7] = 7;
44+
45+
check();
46+
return 0;
47+
}

0 commit comments

Comments
 (0)