@@ -157,3 +157,152 @@ block0(v0: f64x2, v1: f64x2, v2: f64x2):
157157; fmls v0.2d, v5.2d, v1.2d
158158; ret
159159
160+ function %f32x4_splat0(f32, f32x4, f32x4) -> f32x4 {
161+ block0(v0: f32, v1: f32x4, v2: f32x4):
162+ v3 = splat.f32x4 v0
163+ v4 = fma v3, v1, v2
164+ return v4
165+ }
166+
167+ ; VCode:
168+ ; block0:
169+ ; mov v5.16b, v0.16b
170+ ; mov v0.16b, v2.16b
171+ ; fmla v0.4s, v0.4s, v1.4s, v5.s[0]
172+ ; ret
173+ ;
174+ ; Disassembled:
175+ ; block0: ; offset 0x0
176+ ; mov v5.16b, v0.16b
177+ ; mov v0.16b, v2.16b
178+ ; fmla v0.4s, v1.4s, v5.s[0]
179+ ; ret
180+
181+ function %f32x4_splat1(f32x4, f32, f32x4) -> f32x4 {
182+ block0(v0: f32x4, v1: f32, v2: f32x4):
183+ v3 = splat.f32x4 v1
184+ v4 = fneg v0
185+ v5 = fma v4, v3, v2
186+ return v5
187+ }
188+
189+ ; VCode:
190+ ; block0:
191+ ; mov v5.16b, v0.16b
192+ ; mov v0.16b, v2.16b
193+ ; fmls v0.4s, v0.4s, v5.4s, v1.s[0]
194+ ; ret
195+ ;
196+ ; Disassembled:
197+ ; block0: ; offset 0x0
198+ ; mov v5.16b, v0.16b
199+ ; mov v0.16b, v2.16b
200+ ; fmls v0.4s, v5.4s, v1.s[0]
201+ ; ret
202+
203+ function %f32x4_splat2(f32x4, f32x4, f32x4) -> f32x4 {
204+ block0(v0: f32x4, v1: f32x4, v2: f32x4):
205+ v3 = bitcast.i8x16 little v0
206+ v4 = shuffle v3, v3, 0x07060504_07060504_07060504_07060504
207+ v5 = bitcast.f32x4 little v4
208+ v6 = fma v5, v1, v2
209+ return v6
210+ }
211+
212+ ; VCode:
213+ ; block0:
214+ ; mov v5.16b, v0.16b
215+ ; mov v0.16b, v2.16b
216+ ; fmla v0.4s, v0.4s, v1.4s, v5.s[1]
217+ ; ret
218+ ;
219+ ; Disassembled:
220+ ; block0: ; offset 0x0
221+ ; mov v5.16b, v0.16b
222+ ; mov v0.16b, v2.16b
223+ ; fmla v0.4s, v1.4s, v5.s[1]
224+ ; ret
225+
226+ function %f32x4_splat3(f32x4, f32x4, f32x4) -> f32x4 {
227+ block0(v0: f32x4, v1: f32x4, v2: f32x4):
228+ v3 = bitcast.i8x16 little v1
229+ v4 = shuffle v3, v3, 0x0f0e0d0c_0f0e0d0c_0f0e0d0c_0f0e0d0c
230+ v5 = bitcast.f32x4 little v4
231+ v6 = fneg v5
232+ v7 = fma v0, v6, v2
233+ return v7
234+ }
235+
236+ ; VCode:
237+ ; block0:
238+ ; mov v5.16b, v0.16b
239+ ; mov v0.16b, v2.16b
240+ ; fmls v0.4s, v0.4s, v5.4s, v1.s[3]
241+ ; ret
242+ ;
243+ ; Disassembled:
244+ ; block0: ; offset 0x0
245+ ; mov v5.16b, v0.16b
246+ ; mov v0.16b, v2.16b
247+ ; fmls v0.4s, v5.4s, v1.s[3]
248+ ; ret
249+
250+ function %f32x4_splat4(f32x4, f32x4, f32x4) -> f32x4 {
251+ block0(v0: f32x4, v1: f32x4, v2: f32x4):
252+ v3 = bitcast.i8x16 little v1
253+ v4 = shuffle v3, v3, 0x1f1e1d1c_1f1e1d1c_1f1e1d1c_1f1e1d1c
254+ v5 = bitcast.f32x4 little v4
255+ v6 = fma v0, v5, v2
256+ return v6
257+ }
258+
259+ ; VCode:
260+ ; block0:
261+ ; mov v31.16b, v1.16b
262+ ; movz w6, #7452
263+ ; movk w6, w6, #7966, LSL #16
264+ ; dup v17.4s, w6
265+ ; mov v30.16b, v31.16b
266+ ; tbl v19.16b, { v30.16b, v31.16b }, v17.16b
267+ ; mov v23.16b, v0.16b
268+ ; mov v0.16b, v2.16b
269+ ; fmla v0.4s, v0.4s, v23.4s, v19.4s
270+ ; ret
271+ ;
272+ ; Disassembled:
273+ ; block0: ; offset 0x0
274+ ; mov v31.16b, v1.16b
275+ ; mov w6, #0x1d1c
276+ ; movk w6, #0x1f1e, lsl #16
277+ ; dup v17.4s, w6
278+ ; mov v30.16b, v31.16b
279+ ; tbl v19.16b, {v30.16b, v31.16b}, v17.16b
280+ ; mov v23.16b, v0.16b
281+ ; mov v0.16b, v2.16b
282+ ; fmla v0.4s, v23.4s, v19.4s
283+ ; ret
284+
285+ function %f64x2_splat0(f64x2, f64x2, f64x2) -> f64x2 {
286+ block0(v0: f64x2, v1: f64x2, v2: f64x2):
287+ v3 = bitcast.i8x16 little v1
288+ v4 = shuffle v3, v3, 0x0f0e0d0c0b0a0908_0f0e0d0c0b0a0908
289+ v5 = bitcast.f64x2 little v4
290+ v6 = fneg v5
291+ v7 = fma v0, v6, v2
292+ return v7
293+ }
294+
295+ ; VCode:
296+ ; block0:
297+ ; mov v5.16b, v0.16b
298+ ; mov v0.16b, v2.16b
299+ ; fmls v0.2d, v0.2d, v5.2d, v1.d[1]
300+ ; ret
301+ ;
302+ ; Disassembled:
303+ ; block0: ; offset 0x0
304+ ; mov v5.16b, v0.16b
305+ ; mov v0.16b, v2.16b
306+ ; fmls v0.2d, v5.2d, v1.d[1]
307+ ; ret
308+
0 commit comments