1use super::*;
2
3impl Avx {
4 delegate!({
5 fn _mm256_add_pd(a: __m256d, b: __m256d) -> __m256d;
6 fn _mm256_add_ps(a: __m256, b: __m256) -> __m256;
7 fn _mm256_and_pd(a: __m256d, b: __m256d) -> __m256d;
8 fn _mm256_and_ps(a: __m256, b: __m256) -> __m256;
9 fn _mm256_or_pd(a: __m256d, b: __m256d) -> __m256d;
10 fn _mm256_or_ps(a: __m256, b: __m256) -> __m256;
11 fn _mm256_shuffle_pd<const MASK: i32>(a: __m256d, b: __m256d) -> __m256d;
12 fn _mm256_shuffle_ps<const MASK: i32>(a: __m256, b: __m256) -> __m256;
13 fn _mm256_andnot_pd(a: __m256d, b: __m256d) -> __m256d;
14 fn _mm256_andnot_ps(a: __m256, b: __m256) -> __m256;
15 fn _mm256_max_pd(a: __m256d, b: __m256d) -> __m256d;
16 fn _mm256_max_ps(a: __m256, b: __m256) -> __m256;
17 fn _mm256_min_pd(a: __m256d, b: __m256d) -> __m256d;
18 fn _mm256_min_ps(a: __m256, b: __m256) -> __m256;
19 fn _mm256_mul_pd(a: __m256d, b: __m256d) -> __m256d;
20 fn _mm256_mul_ps(a: __m256, b: __m256) -> __m256;
21 fn _mm256_addsub_pd(a: __m256d, b: __m256d) -> __m256d;
22 fn _mm256_addsub_ps(a: __m256, b: __m256) -> __m256;
23 fn _mm256_sub_pd(a: __m256d, b: __m256d) -> __m256d;
24 fn _mm256_sub_ps(a: __m256, b: __m256) -> __m256;
25 fn _mm256_div_ps(a: __m256, b: __m256) -> __m256;
26 fn _mm256_div_pd(a: __m256d, b: __m256d) -> __m256d;
27 fn _mm256_round_pd<const ROUNDING: i32>(a: __m256d) -> __m256d;
28 fn _mm256_ceil_pd(a: __m256d) -> __m256d;
29 fn _mm256_floor_pd(a: __m256d) -> __m256d;
30 fn _mm256_round_ps<const ROUNDING: i32>(a: __m256) -> __m256;
31 fn _mm256_ceil_ps(a: __m256) -> __m256;
32 fn _mm256_floor_ps(a: __m256) -> __m256;
33 fn _mm256_sqrt_ps(a: __m256) -> __m256;
34 fn _mm256_sqrt_pd(a: __m256d) -> __m256d;
35 fn _mm256_blend_pd<const IMM4: i32>(a: __m256d, b: __m256d) -> __m256d;
36 fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256;
37 fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d;
38 fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256;
39 fn _mm256_dp_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256;
40 fn _mm256_hadd_pd(a: __m256d, b: __m256d) -> __m256d;
41 fn _mm256_hadd_ps(a: __m256, b: __m256) -> __m256;
42 fn _mm256_hsub_pd(a: __m256d, b: __m256d) -> __m256d;
43 fn _mm256_hsub_ps(a: __m256, b: __m256) -> __m256;
44 fn _mm256_xor_pd(a: __m256d, b: __m256d) -> __m256d;
45 fn _mm256_xor_ps(a: __m256, b: __m256) -> __m256;
46 fn _mm_cmp_pd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d;
47 fn _mm256_cmp_pd<const IMM5: i32>(a: __m256d, b: __m256d) -> __m256d;
48 fn _mm_cmp_ps<const IMM5: i32>(a: __m128, b: __m128) -> __m128;
49 fn _mm256_cmp_ps<const IMM5: i32>(a: __m256, b: __m256) -> __m256;
50 fn _mm_cmp_sd<const IMM5: i32>(a: __m128d, b: __m128d) -> __m128d;
51 fn _mm_cmp_ss<const IMM5: i32>(a: __m128, b: __m128) -> __m128;
52 fn _mm256_cvtepi32_pd(a: __m128i) -> __m256d;
53 fn _mm256_cvtepi32_ps(a: __m256i) -> __m256;
54 fn _mm256_cvtpd_ps(a: __m256d) -> __m128;
55 fn _mm256_cvtps_epi32(a: __m256) -> __m256i;
56 fn _mm256_cvtps_pd(a: __m128) -> __m256d;
57 fn _mm256_cvttpd_epi32(a: __m256d) -> __m128i;
58 fn _mm256_cvtpd_epi32(a: __m256d) -> __m128i;
59 fn _mm256_cvttps_epi32(a: __m256) -> __m256i;
60 fn _mm256_extractf128_ps<const IMM1: i32>(a: __m256) -> __m128;
61 fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d;
62 fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i;
63 fn _mm256_zeroall();
64 fn _mm256_zeroupper();
65 fn _mm256_permutevar_ps(a: __m256, b: __m256i) -> __m256;
66 fn _mm_permutevar_ps(a: __m128, b: __m128i) -> __m128;
67 fn _mm256_permute_ps<const IMM8: i32>(a: __m256) -> __m256;
68 fn _mm_permute_ps<const IMM8: i32>(a: __m128) -> __m128;
69 fn _mm256_permutevar_pd(a: __m256d, b: __m256i) -> __m256d;
70 fn _mm_permutevar_pd(a: __m128d, b: __m128i) -> __m128d;
71 fn _mm256_permute_pd<const IMM4: i32>(a: __m256d) -> __m256d;
72 fn _mm_permute_pd<const IMM2: i32>(a: __m128d) -> __m128d;
73 fn _mm256_permute2f128_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256;
74 fn _mm256_permute2f128_pd<const IMM8: i32>(a: __m256d, b: __m256d) -> __m256d;
75 fn _mm256_permute2f128_si256<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i;
76 fn _mm256_broadcast_ss(f: &f32) -> __m256;
77 fn _mm_broadcast_ss(f: &f32) -> __m128;
78 fn _mm256_broadcast_sd(f: &f64) -> __m256d;
79 fn _mm256_broadcast_ps(a: &__m128) -> __m256;
80 fn _mm256_broadcast_pd(a: &__m128d) -> __m256d;
81 fn _mm256_insertf128_ps<const IMM1: i32>(a: __m256, b: __m128) -> __m256;
82 fn _mm256_insertf128_pd<const IMM1: i32>(a: __m256d, b: __m128d) -> __m256d;
83 fn _mm256_insertf128_si256<const IMM1: i32>(a: __m256i, b: __m128i) -> __m256i;
84 fn _mm256_insert_epi8<const INDEX: i32>(a: __m256i, i: i8) -> __m256i;
85 fn _mm256_insert_epi16<const INDEX: i32>(a: __m256i, i: i16) -> __m256i;
86 fn _mm256_insert_epi32<const INDEX: i32>(a: __m256i, i: i32) -> __m256i;
87 unsafe fn _mm256_load_pd(mem_addr: *const f64) -> __m256d;
88 unsafe fn _mm256_store_pd(mem_addr: *mut f64, a: __m256d);
89 unsafe fn _mm256_load_ps(mem_addr: *const f32) -> __m256;
90 unsafe fn _mm256_store_ps(mem_addr: *mut f32, a: __m256);
91 unsafe fn _mm256_loadu_pd(mem_addr: *const f64) -> __m256d;
92 unsafe fn _mm256_storeu_pd(mem_addr: *mut f64, a: __m256d);
93 unsafe fn _mm256_loadu_ps(mem_addr: *const f32) -> __m256;
94 unsafe fn _mm256_storeu_ps(mem_addr: *mut f32, a: __m256);
95 unsafe fn _mm256_load_si256(mem_addr: *const __m256i) -> __m256i;
96 unsafe fn _mm256_store_si256(mem_addr: *mut __m256i, a: __m256i);
97 unsafe fn _mm256_loadu_si256(mem_addr: *const __m256i) -> __m256i;
98 unsafe fn _mm256_storeu_si256(mem_addr: *mut __m256i, a: __m256i);
99 unsafe fn _mm256_maskload_pd(mem_addr: *const f64, mask: __m256i) -> __m256d;
100 unsafe fn _mm256_maskstore_pd(mem_addr: *mut f64, mask: __m256i, a: __m256d);
101 unsafe fn _mm_maskload_pd(mem_addr: *const f64, mask: __m128i) -> __m128d;
102 unsafe fn _mm_maskstore_pd(mem_addr: *mut f64, mask: __m128i, a: __m128d);
103 unsafe fn _mm256_maskload_ps(mem_addr: *const f32, mask: __m256i) -> __m256;
104 unsafe fn _mm256_maskstore_ps(mem_addr: *mut f32, mask: __m256i, a: __m256);
105 unsafe fn _mm_maskload_ps(mem_addr: *const f32, mask: __m128i) -> __m128;
106 unsafe fn _mm_maskstore_ps(mem_addr: *mut f32, mask: __m128i, a: __m128);
107 fn _mm256_movehdup_ps(a: __m256) -> __m256;
108 fn _mm256_moveldup_ps(a: __m256) -> __m256;
109 fn _mm256_movedup_pd(a: __m256d) -> __m256d;
110 unsafe fn _mm256_lddqu_si256(mem_addr: *const __m256i) -> __m256i;
111 unsafe fn _mm256_stream_si256(mem_addr: *mut __m256i, a: __m256i);
112 unsafe fn _mm256_stream_pd(mem_addr: *mut f64, a: __m256d);
113 unsafe fn _mm256_stream_ps(mem_addr: *mut f32, a: __m256);
114 fn _mm256_rcp_ps(a: __m256) -> __m256;
115 fn _mm256_rsqrt_ps(a: __m256) -> __m256;
116 fn _mm256_unpackhi_pd(a: __m256d, b: __m256d) -> __m256d;
117 fn _mm256_unpackhi_ps(a: __m256, b: __m256) -> __m256;
118 fn _mm256_unpacklo_pd(a: __m256d, b: __m256d) -> __m256d;
119 fn _mm256_unpacklo_ps(a: __m256, b: __m256) -> __m256;
120 fn _mm256_testz_si256(a: __m256i, b: __m256i) -> i32;
121 fn _mm256_testc_si256(a: __m256i, b: __m256i) -> i32;
122 fn _mm256_testnzc_si256(a: __m256i, b: __m256i) -> i32;
123 fn _mm256_testz_pd(a: __m256d, b: __m256d) -> i32;
124 fn _mm256_testc_pd(a: __m256d, b: __m256d) -> i32;
125 fn _mm256_testnzc_pd(a: __m256d, b: __m256d) -> i32;
126 fn _mm_testz_pd(a: __m128d, b: __m128d) -> i32;
127 fn _mm_testc_pd(a: __m128d, b: __m128d) -> i32;
128 fn _mm_testnzc_pd(a: __m128d, b: __m128d) -> i32;
129 fn _mm256_testz_ps(a: __m256, b: __m256) -> i32;
130 fn _mm256_testc_ps(a: __m256, b: __m256) -> i32;
131 fn _mm256_testnzc_ps(a: __m256, b: __m256) -> i32;
132 fn _mm_testz_ps(a: __m128, b: __m128) -> i32;
133 fn _mm_testc_ps(a: __m128, b: __m128) -> i32;
134 fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32;
135 fn _mm256_movemask_pd(a: __m256d) -> i32;
136 fn _mm256_movemask_ps(a: __m256) -> i32;
137 fn _mm256_setzero_pd() -> __m256d;
138 fn _mm256_setzero_ps() -> __m256;
139 fn _mm256_setzero_si256() -> __m256i;
140 fn _mm256_set_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d;
141 fn _mm256_set_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32) -> __m256;
142 fn _mm256_set_epi8(
143 e00: i8,
144 e01: i8,
145 e02: i8,
146 e03: i8,
147 e04: i8,
148 e05: i8,
149 e06: i8,
150 e07: i8,
151 e08: i8,
152 e09: i8,
153 e10: i8,
154 e11: i8,
155 e12: i8,
156 e13: i8,
157 e14: i8,
158 e15: i8,
159 e16: i8,
160 e17: i8,
161 e18: i8,
162 e19: i8,
163 e20: i8,
164 e21: i8,
165 e22: i8,
166 e23: i8,
167 e24: i8,
168 e25: i8,
169 e26: i8,
170 e27: i8,
171 e28: i8,
172 e29: i8,
173 e30: i8,
174 e31: i8,
175 ) -> __m256i;
176 fn _mm256_set_epi16(
177 e00: i16,
178 e01: i16,
179 e02: i16,
180 e03: i16,
181 e04: i16,
182 e05: i16,
183 e06: i16,
184 e07: i16,
185 e08: i16,
186 e09: i16,
187 e10: i16,
188 e11: i16,
189 e12: i16,
190 e13: i16,
191 e14: i16,
192 e15: i16,
193 ) -> __m256i;
194 fn _mm256_set_epi32(
195 e0: i32,
196 e1: i32,
197 e2: i32,
198 e3: i32,
199 e4: i32,
200 e5: i32,
201 e6: i32,
202 e7: i32,
203 ) -> __m256i;
204 fn _mm256_set_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i;
205 fn _mm256_setr_pd(a: f64, b: f64, c: f64, d: f64) -> __m256d;
206 fn _mm256_setr_ps(a: f32, b: f32, c: f32, d: f32, e: f32, f: f32, g: f32, h: f32)
207 -> __m256;
208 fn _mm256_setr_epi8(
209 e00: i8,
210 e01: i8,
211 e02: i8,
212 e03: i8,
213 e04: i8,
214 e05: i8,
215 e06: i8,
216 e07: i8,
217 e08: i8,
218 e09: i8,
219 e10: i8,
220 e11: i8,
221 e12: i8,
222 e13: i8,
223 e14: i8,
224 e15: i8,
225 e16: i8,
226 e17: i8,
227 e18: i8,
228 e19: i8,
229 e20: i8,
230 e21: i8,
231 e22: i8,
232 e23: i8,
233 e24: i8,
234 e25: i8,
235 e26: i8,
236 e27: i8,
237 e28: i8,
238 e29: i8,
239 e30: i8,
240 e31: i8,
241 ) -> __m256i;
242 fn _mm256_setr_epi16(
243 e00: i16,
244 e01: i16,
245 e02: i16,
246 e03: i16,
247 e04: i16,
248 e05: i16,
249 e06: i16,
250 e07: i16,
251 e08: i16,
252 e09: i16,
253 e10: i16,
254 e11: i16,
255 e12: i16,
256 e13: i16,
257 e14: i16,
258 e15: i16,
259 ) -> __m256i;
260 fn _mm256_setr_epi32(
261 e0: i32,
262 e1: i32,
263 e2: i32,
264 e3: i32,
265 e4: i32,
266 e5: i32,
267 e6: i32,
268 e7: i32,
269 ) -> __m256i;
270 fn _mm256_setr_epi64x(a: i64, b: i64, c: i64, d: i64) -> __m256i;
271 fn _mm256_set1_pd(a: f64) -> __m256d;
272 fn _mm256_set1_ps(a: f32) -> __m256;
273 fn _mm256_set1_epi8(a: i8) -> __m256i;
274 fn _mm256_set1_epi16(a: i16) -> __m256i;
275 fn _mm256_set1_epi32(a: i32) -> __m256i;
276 fn _mm256_set1_epi64x(a: i64) -> __m256i;
277 fn _mm256_castpd_ps(a: __m256d) -> __m256;
278 fn _mm256_castps_pd(a: __m256) -> __m256d;
279 fn _mm256_castps_si256(a: __m256) -> __m256i;
280 fn _mm256_castsi256_ps(a: __m256i) -> __m256;
281 fn _mm256_castpd_si256(a: __m256d) -> __m256i;
282 fn _mm256_castsi256_pd(a: __m256i) -> __m256d;
283 fn _mm256_castps256_ps128(a: __m256) -> __m128;
284 fn _mm256_castpd256_pd128(a: __m256d) -> __m128d;
285 fn _mm256_castsi256_si128(a: __m256i) -> __m128i;
286 fn _mm256_castps128_ps256(a: __m128) -> __m256;
287 fn _mm256_castpd128_pd256(a: __m128d) -> __m256d;
288 fn _mm256_castsi128_si256(a: __m128i) -> __m256i;
289 fn _mm256_zextps128_ps256(a: __m128) -> __m256;
290 fn _mm256_zextsi128_si256(a: __m128i) -> __m256i;
291 fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d;
292 fn _mm256_undefined_ps() -> __m256;
293 fn _mm256_undefined_pd() -> __m256d;
294 fn _mm256_undefined_si256() -> __m256i;
295 fn _mm256_set_m128(hi: __m128, lo: __m128) -> __m256;
296 fn _mm256_set_m128d(hi: __m128d, lo: __m128d) -> __m256d;
297 fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i;
298 fn _mm256_setr_m128(lo: __m128, hi: __m128) -> __m256;
299 fn _mm256_setr_m128d(lo: __m128d, hi: __m128d) -> __m256d;
300 fn _mm256_setr_m128i(lo: __m128i, hi: __m128i) -> __m256i;
301 unsafe fn _mm256_loadu2_m128(hiaddr: *const f32, loaddr: *const f32) -> __m256;
302 unsafe fn _mm256_loadu2_m128d(hiaddr: *const f64, loaddr: *const f64) -> __m256d;
303 unsafe fn _mm256_loadu2_m128i(hiaddr: *const __m128i, loaddr: *const __m128i) -> __m256i;
304 unsafe fn _mm256_storeu2_m128(hiaddr: *mut f32, loaddr: *mut f32, a: __m256);
305 unsafe fn _mm256_storeu2_m128d(hiaddr: *mut f64, loaddr: *mut f64, a: __m256d);
306 unsafe fn _mm256_storeu2_m128i(hiaddr: *mut __m128i, loaddr: *mut __m128i, a: __m256i);
307 fn _mm256_cvtss_f32(a: __m256) -> f32;
308 });
309}