@@ -221,6 +221,86 @@ define <8 x double> @poison_test_vpermilvar_pd_512(<8 x double> %v) {
221221 ret <8 x double > %a
222222}
223223
224+ ; Simplify demanded bits (PR106413)
225+
226+ define <4 x float > @bits_test_vpermilvar_ps (<4 x float > %InVec , <4 x i32 > %InMask ) {
227+ ; CHECK-LABEL: @bits_test_vpermilvar_ps(
228+ ; CHECK-NEXT: [[M:%.*]] = or <4 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4>
229+ ; CHECK-NEXT: [[S:%.*]] = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[INVEC:%.*]], <4 x i32> [[M]])
230+ ; CHECK-NEXT: ret <4 x float> [[S]]
231+ ;
232+ %m = or <4 x i32 > %InMask , <i32 0 , i32 12 , i32 4294967292 , i32 -4 >
233+ %s = tail call <4 x float > @llvm.x86.avx.vpermilvar.ps (<4 x float > %InVec , <4 x i32 > %m )
234+ ret <4 x float > %s
235+ }
236+
237+ define <8 x float > @bits_test_vpermilvar_ps_256 (<8 x float > %InVec , <8 x i32 > %InMask ) {
238+ ; CHECK-LABEL: @bits_test_vpermilvar_ps_256(
239+ ; CHECK-NEXT: [[M:%.*]] = or <8 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
240+ ; CHECK-NEXT: [[S:%.*]] = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[INVEC:%.*]], <8 x i32> [[M]])
241+ ; CHECK-NEXT: ret <8 x float> [[S]]
242+ ;
243+ %m = or <8 x i32 > %InMask , <i32 0 , i32 12 , i32 4294967292 , i32 -4 , i32 0 , i32 12 , i32 4294967292 , i32 -4 >
244+ %s = tail call <8 x float > @llvm.x86.avx.vpermilvar.ps.256 (<8 x float > %InVec , <8 x i32 > %m )
245+ ret <8 x float > %s
246+ }
247+
248+ define <16 x float > @bits_test_vpermilvar_ps_512 (<16 x float > %InVec , <16 x i32 > %InMask ) {
249+ ; CHECK-LABEL: @bits_test_vpermilvar_ps_512(
250+ ; CHECK-NEXT: [[M:%.*]] = or <16 x i32> [[INMASK:%.*]], <i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4, i32 0, i32 12, i32 -4, i32 -4>
251+ ; CHECK-NEXT: [[S:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[INVEC:%.*]], <16 x i32> [[M]])
252+ ; CHECK-NEXT: ret <16 x float> [[S]]
253+ ;
254+ %m = or <16 x i32 > %InMask , <i32 0 , i32 12 , i32 4294967292 , i32 -4 , i32 0 , i32 12 , i32 4294967292 , i32 -4 , i32 0 , i32 12 , i32 4294967292 , i32 -4 , i32 0 , i32 12 , i32 4294967292 , i32 -4 >
255+ %s = tail call <16 x float > @llvm.x86.avx512.vpermilvar.ps.512 (<16 x float > %InVec , <16 x i32 > %m )
256+ ret <16 x float > %s
257+ }
258+
259+ define <2 x double > @bits_test_vpermilvar_pd (<2 x double > %InVec , <2 x i64 > %InMask ) {
260+ ; CHECK-LABEL: @bits_test_vpermilvar_pd(
261+ ; CHECK-NEXT: [[M:%.*]] = or <2 x i64> [[INMASK:%.*]], <i64 0, i64 4294967293>
262+ ; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[M]])
263+ ; CHECK-NEXT: ret <2 x double> [[S]]
264+ ;
265+ %m = or <2 x i64 > %InMask , <i64 0 , i64 4294967293 >
266+ %s = tail call <2 x double > @llvm.x86.avx.vpermilvar.pd (<2 x double > %InVec , <2 x i64 > %m )
267+ ret <2 x double > %s
268+ }
269+
270+ define <4 x double > @bits_test_vpermilvar_pd_256 (<4 x double > %InVec , <4 x i64 > %InMask ) {
271+ ; CHECK-LABEL: @bits_test_vpermilvar_pd_256(
272+ ; CHECK-NEXT: [[M:%.*]] = or <4 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3>
273+ ; CHECK-NEXT: [[S:%.*]] = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[INVEC:%.*]], <4 x i64> [[M]])
274+ ; CHECK-NEXT: ret <4 x double> [[S]]
275+ ;
276+ %m = or <4 x i64 > %InMask , <i64 0 , i64 1 , i64 4294967293 , i64 -3 >
277+ %s = tail call <4 x double > @llvm.x86.avx.vpermilvar.pd.256 (<4 x double > %InVec , <4 x i64 > %m )
278+ ret <4 x double > %s
279+ }
280+
281+ define <8 x double > @bits_test_vpermilvar_pd_512 (<8 x double > %InVec , <8 x i64 > %InMask ) {
282+ ; CHECK-LABEL: @bits_test_vpermilvar_pd_512(
283+ ; CHECK-NEXT: [[M:%.*]] = or <8 x i64> [[INMASK:%.*]], <i64 0, i64 1, i64 4294967293, i64 -3, i64 0, i64 1, i64 4294967293, i64 -3>
284+ ; CHECK-NEXT: [[S:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[INVEC:%.*]], <8 x i64> [[M]])
285+ ; CHECK-NEXT: ret <8 x double> [[S]]
286+ ;
287+ %m = or <8 x i64 > %InMask , <i64 0 , i64 1 , i64 4294967293 , i64 -3 , i64 0 , i64 1 , i64 4294967293 , i64 -3 >
288+ %s = tail call <8 x double > @llvm.x86.avx512.vpermilvar.pd.512 (<8 x double > %InVec , <8 x i64 > %m )
289+ ret <8 x double > %s
290+ }
291+
292+ ; negative test - vpermilpd uses bit1 not bit0 for the index bit
293+ define <2 x double > @bits_test_vpermilvar_pd_negative (<2 x double > %InVec , <2 x i64 > %InMask ) {
294+ ; CHECK-LABEL: @bits_test_vpermilvar_pd_negative(
295+ ; CHECK-NEXT: [[M:%.*]] = or <2 x i64> [[INMASK:%.*]], <i64 0, i64 2>
296+ ; CHECK-NEXT: [[S:%.*]] = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[INVEC:%.*]], <2 x i64> [[M]])
297+ ; CHECK-NEXT: ret <2 x double> [[S]]
298+ ;
299+ %m = or <2 x i64 > %InMask , <i64 0 , i64 2 >
300+ %s = tail call <2 x double > @llvm.x86.avx.vpermilvar.pd (<2 x double > %InVec , <2 x i64 > %m )
301+ ret <2 x double > %s
302+ }
303+
224304; Simplify demanded elts
225305
226306define <4 x float > @elts_test_vpermilvar_ps (<4 x float > %a0 , i32 %a1 ) {
0 commit comments