pulp/core_arch/x86/
avx.rs

1use super::*;
2
3impl Avx {
4	delegate!({
5		fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d;
6		fn _mm256_add_ps(a: __m256, b: __m256) -> __m256;
7		fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d;
8		fn _mm256_and_ps(a: __m256, b: __m256) -> __m256;
9		fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d;
10		fn _mm256_or_ps(a: __m256, b: __m256) -> __m256;
11		fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d;
12		fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256;
13		fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d;
14		fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256;
15		fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d;
16		fn _mm256_max_ps(a: __m256, b: __m256) -> __m256;
17		fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d;
18		fn _mm256_min_ps(a: __m256, b: __m256) -> __m256;
19		fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d;
20		fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256;
21		fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d;
22		fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256;
23		fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d;
24		fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256;
25		fn _mm256_div_ps(a: __m256, b: __m256) -> __m256;
26		fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d;
27		fn _mm256_round_pd<const ROUNDING: i32>(a: __m256d) -> __m256d;
28		fn _mm256_ceil_pd(a: __m256d) -> __m256d;
29		fn _mm256_floor_pd(a: __m256d) -> __m256d;
30		fn _mm256_round_ps<const ROUNDING: i32>(a: __m256) -> __m256;
31		fn _mm256_ceil_ps(a: __m256) -> __m256;
32		fn _mm256_floor_ps(a: __m256) -> __m256;
33		fn _mm256_sqrt_ps(a: __m256) -> __m256;
34		fn _mm256_sqrt_pd(a: __m256d) -> __m256d;
35		fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d;
36		fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256;
37		fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
38		fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256;
39		fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256;
40		fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d;
41		fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256;
42		fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d;
43		fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256;
44		fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d;
45		fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256;
46		fn _mm_cmp_pd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d;
47		fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d;
48		fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128;
49		fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256;
50		fn _mm_cmp_sd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d;
51		fn _mm_cmp_ss<const IMM5: i32>(a: __m128, b: __m128) -> __m128;
52		fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d;
53		fn _mm256_cvtepi32_ps(a: __m256i) -> __m256;
54		fn _mm256_cvtpd_ps(a: __m256d) -> __m128;
55		fn _mm256_cvtps_epi32(a: __m256) -> __m256i;
56		fn _mm256_cvtps_pd(a: __m128) -> __m256d;
57		fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i;
58		fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i;
59		fn _mm256_cvttps_epi32(a: __m256) -> __m256i;
60		fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128;
61		fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d;
62		fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i;
63		fn _mm256_zeroall();
64		fn _mm256_zeroupper();
65		fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256;
66		fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128;
67		fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256;
68		fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128;
69		fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d;
70		fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d;
71		fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d;
72		fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d;
73		fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256;
74		fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d;
75		fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i;
76		fn _mm256_broadcast_ss(f: &f32) -> __m256;
77		fn _mm_broadcast_ss(f: &f32) -> __m128;
78		fn _mm256_broadcast_sd(f: &f64) -> __m256d;
79		fn _mm256_broadcast_ps(a: &__m128) -> __m256;
80		fn _mm256_broadcast_pd(a: &__m128d) -> __m256d;
81		fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256;
82		fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d;
83		fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i;
84		fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i;
85		fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i;
86		fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i;
87		unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d;
88		unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d);
89		unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256;
90		unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256);
91		unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d;
92		unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d);
93		unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256;
94		unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256);
95		unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i;
96		unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i);
97		unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i;
98		unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i);
99		unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d;
100		unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d);
101		unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d;
102		unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d);
103		unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256;
104		unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256);
105		unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128;
106		unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128);
107		fn _mm256_movehdup_ps(a: __m256) -> __m256;
108		fn _mm256_moveldup_ps(a: __m256) -> __m256;
109		fn _mm256_movedup_pd(a: __m256d) -> __m256d;
110		unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i;
111		unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i);
112		unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d);
113		unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256);
114		fn _mm256_rcp_ps(a: __m256) -> __m256;
115		fn _mm256_rsqrt_ps(a: __m256) -> __m256;
116		fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d;
117		fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256;
118		fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d;
119		fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256;
120		fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32;
121		fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32;
122		fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32;
123		fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32;
124		fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32;
125		fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32;
126		fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32;
127		fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32;
128		fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32;
129		fn _mm256_testz_ps(a: __m256, b: __m256) -> i32;
130		fn _mm256_testc_ps(a: __m256, b: __m256) -> i32;
131		fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32;
132		fn _mm_testz_ps(a: __m128, b: __m128) -> i32;
133		fn _mm_testc_ps(a: __m128, b: __m128) -> i32;
134		fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32;
135		fn _mm256_movemask_pd(a: __m256d) -> i32;
136		fn _mm256_movemask_ps(a: __m256) -> i32;
137		fn _mm256_setzero_pd() -> __m256d;
138		fn _mm256_setzero_ps() -> __m256;
139		fn _mm256_setzero_si256() -> __m256i;
140		fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d;
141		fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256;
142		fn _mm256_set_epi8(
143			e00: i8,
144			e01: i8,
145			e02: i8,
146			e03: i8,
147			e04: i8,
148			e05: i8,
149			e06: i8,
150			e07: i8,
151			e08: i8,
152			e09: i8,
153			e10: i8,
154			e11: i8,
155			e12: i8,
156			e13: i8,
157			e14: i8,
158			e15: i8,
159			e16: i8,
160			e17: i8,
161			e18: i8,
162			e19: i8,
163			e20: i8,
164			e21: i8,
165			e22: i8,
166			e23: i8,
167			e24: i8,
168			e25: i8,
169			e26: i8,
170			e27: i8,
171			e28: i8,
172			e29: i8,
173			e30: i8,
174			e31: i8,
175		) -> __m256i;
176		fn _mm256_set_epi16(
177			e00: i16,
178			e01: i16,
179			e02: i16,
180			e03: i16,
181			e04: i16,
182			e05: i16,
183			e06: i16,
184			e07: i16,
185			e08: i16,
186			e09: i16,
187			e10: i16,
188			e11: i16,
189			e12: i16,
190			e13: i16,
191			e14: i16,
192			e15: i16,
193		) -> __m256i;
194		fn _mm256_set_epi32(
195			e0: i32,
196			e1: i32,
197			e2: i32,
198			e3: i32,
199			e4: i32,
200			e5: i32,
201			e6: i32,
202			e7: i32,
203		) -> __m256i;
204		fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i;
205		fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d;
206		fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32)
207		-> __m256;
208		fn _mm256_setr_epi8(
209			e00: i8,
210			e01: i8,
211			e02: i8,
212			e03: i8,
213			e04: i8,
214			e05: i8,
215			e06: i8,
216			e07: i8,
217			e08: i8,
218			e09: i8,
219			e10: i8,
220			e11: i8,
221			e12: i8,
222			e13: i8,
223			e14: i8,
224			e15: i8,
225			e16: i8,
226			e17: i8,
227			e18: i8,
228			e19: i8,
229			e20: i8,
230			e21: i8,
231			e22: i8,
232			e23: i8,
233			e24: i8,
234			e25: i8,
235			e26: i8,
236			e27: i8,
237			e28: i8,
238			e29: i8,
239			e30: i8,
240			e31: i8,
241		) -> __m256i;
242		fn _mm256_setr_epi16(
243			e00: i16,
244			e01: i16,
245			e02: i16,
246			e03: i16,
247			e04: i16,
248			e05: i16,
249			e06: i16,
250			e07: i16,
251			e08: i16,
252			e09: i16,
253			e10: i16,
254			e11: i16,
255			e12: i16,
256			e13: i16,
257			e14: i16,
258			e15: i16,
259		) -> __m256i;
260		fn _mm256_setr_epi32(
261			e0: i32,
262			e1: i32,
263			e2: i32,
264			e3: i32,
265			e4: i32,
266			e5: i32,
267			e6: i32,
268			e7: i32,
269		) -> __m256i;
270		fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i;
271		fn _mm256_set1_pd(a: f64) -> __m256d;
272		fn _mm256_set1_ps(a: f32) -> __m256;
273		fn _mm256_set1_epi8(a: i8) -> __m256i;
274		fn _mm256_set1_epi16(a: i16) -> __m256i;
275		fn _mm256_set1_epi32(a: i32) -> __m256i;
276		fn _mm256_set1_epi64x(a: i64) -> __m256i;
277		fn _mm256_castpd_ps(a: __m256d) -> __m256;
278		fn _mm256_castps_pd(a: __m256) -> __m256d;
279		fn _mm256_castps_si256(a: __m256) -> __m256i;
280		fn _mm256_castsi256_ps(a: __m256i) -> __m256;
281		fn _mm256_castpd_si256(a: __m256d) -> __m256i;
282		fn _mm256_castsi256_pd(a: __m256i) -> __m256d;
283		fn _mm256_castps256_ps128(a: __m256) -> __m128;
284		fn _mm256_castpd256_pd128(a: __m256d) -> __m128d;
285		fn _mm256_castsi256_si128(a: __m256i) -> __m128i;
286		fn _mm256_castps128_ps256(a: __m128) -> __m256;
287		fn _mm256_castpd128_pd256(a: __m128d) -> __m256d;
288		fn _mm256_castsi128_si256(a: __m128i) -> __m256i;
289		fn _mm256_zextps128_ps256(a: __m128) -> __m256;
290		fn _mm256_zextsi128_si256(a: __m128i) -> __m256i;
291		fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d;
292		fn _mm256_undefined_ps() -> __m256;
293		fn _mm256_undefined_pd() -> __m256d;
294		fn _mm256_undefined_si256() -> __m256i;
295		fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256;
296		fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d;
297		fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i;
298		fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256;
299		fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d;
300		fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i;
301		unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256;
302		unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d;
303		unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i;
304		unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256);
305		unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d);
306		unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i);
307		fn _mm256_cvtss_f32(a: __m256) -> f32;
308	});
309}