pulp/core_arch/x86/
sse2.rs

1use super::*;
2
3impl Sse2 {
4	delegate!({
5		fn _mm_pause();
6		#[allow(clippy::not_unsafe_ptr_arg_deref)]
7		fn _mm_clflush(p: *const u8);
8		fn _mm_lfence();
9		fn _mm_mfence();
10		fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i;
11		fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i;
12		fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i;
13		fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i;
14		fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i;
15		fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i;
16		fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i;
17		fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i;
18		fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i;
19		fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i;
20		fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i;
21		fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i;
22		fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i;
23		fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i;
24		fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i;
25		fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i;
26		fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i;
27		fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i;
28		fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i;
29		fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i;
30		fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i;
31		fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i;
32		fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i;
33		fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i;
34		fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i;
35		fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i;
36		fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i;
37		fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i;
38		fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i;
39		fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i;
40		fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i;
41		fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
42		fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i;
43		fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i;
44		fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i;
45		fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i;
46		fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i;
47		fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
48		fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i;
49		fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i;
50		fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i;
51		fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i;
52		fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
53		fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i;
54		fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i;
55		fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i;
56		fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i;
57		fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i;
58		fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i;
59		fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i;
60		fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i;
61		fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i;
62		fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i;
63		fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i;
64		fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i;
65		fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i;
66		fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i;
67		fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i;
68		fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i;
69		fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i;
70		fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i;
71		fn _mm_cvtepi32_pd(a: __m128i) -> __m128d;
72		fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d;
73		fn _mm_cvtepi32_ps(a: __m128i) -> __m128;
74		fn _mm_cvtps_epi32(a: __m128) -> __m128i;
75		fn _mm_cvtsi32_si128(a: i32) -> __m128i;
76		fn _mm_cvtsi128_si32(a: __m128i) -> i32;
77		fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i;
78		fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i;
79		fn _mm_set_epi16(
80			e7: i16,
81			e6: i16,
82			e5: i16,
83			e4: i16,
84			e3: i16,
85			e2: i16,
86			e1: i16,
87			e0: i16,
88		) -> __m128i;
89		fn _mm_set_epi8(
90			e15: i8,
91			e14: i8,
92			e13: i8,
93			e12: i8,
94			e11: i8,
95			e10: i8,
96			e9: i8,
97			e8: i8,
98			e7: i8,
99			e6: i8,
100			e5: i8,
101			e4: i8,
102			e3: i8,
103			e2: i8,
104			e1: i8,
105			e0: i8,
106		) -> __m128i;
107		fn _mm_set1_epi64x(a: i64) -> __m128i;
108		fn _mm_set1_epi32(a: i32) -> __m128i;
109		fn _mm_set1_epi16(a: i16) -> __m128i;
110		fn _mm_set1_epi8(a: i8) -> __m128i;
111		fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i;
112		fn _mm_setr_epi16(
113			e7: i16,
114			e6: i16,
115			e5: i16,
116			e4: i16,
117			e3: i16,
118			e2: i16,
119			e1: i16,
120			e0: i16,
121		) -> __m128i;
122		fn _mm_setr_epi8(
123			e15: i8,
124			e14: i8,
125			e13: i8,
126			e12: i8,
127			e11: i8,
128			e10: i8,
129			e9: i8,
130			e8: i8,
131			e7: i8,
132			e6: i8,
133			e5: i8,
134			e4: i8,
135			e3: i8,
136			e2: i8,
137			e1: i8,
138			e0: i8,
139		) -> __m128i;
140		fn _mm_setzero_si128() -> __m128i;
141		unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i;
142		unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i;
143		unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i;
144		unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8);
145		unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i);
146		unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i);
147		unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i);
148		unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i);
149		unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32);
150		fn _mm_move_epi64(a: __m128i) -> __m128i;
151		fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i;
152		fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i;
153		fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i;
154		fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32;
155		fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i;
156		fn _mm_movemask_epi8(a: __m128i) -> i32;
157		fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i;
158		fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
159		fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
160		fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i;
161		fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i;
162		fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i;
163		fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i;
164		fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i;
165		fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i;
166		fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i;
167		fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i;
168		fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d;
169		fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d;
170		fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d;
171		fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d;
172		fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d;
173		fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d;
174		fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d;
175		fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d;
176		fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d;
177		fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d;
178		fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d;
179		fn _mm_sqrt_pd(a: __m128d) -> __m128d;
180		fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d;
181		fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d;
182		fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d;
183		fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d;
184		fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d;
185		fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d;
186		fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d;
187		fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d;
188		fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d;
189		fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d;
190		fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d;
191		fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d;
192		fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d;
193		fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d;
194		fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d;
195		fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d;
196		fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d;
197		fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d;
198		fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d;
199		fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d;
200		fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d;
201		fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d;
202		fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d;
203		fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d;
204		fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d;
205		fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d;
206		fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d;
207		fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d;
208		fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d;
209		fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d;
210		fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32;
211		fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32;
212		fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32;
213		fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32;
214		fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32;
215		fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32;
216		fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32;
217		fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32;
218		fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32;
219		fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32;
220		fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32;
221		fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32;
222		fn _mm_cvtpd_ps(a: __m128d) -> __m128;
223		fn _mm_cvtps_pd(a: __m128) -> __m128d;
224		fn _mm_cvtpd_epi32(a: __m128d) -> __m128i;
225		fn _mm_cvtsd_si32(a: __m128d) -> i32;
226		fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128;
227		fn _mm_cvtsd_f64(a: __m128d) -> f64;
228		fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d;
229		fn _mm_cvttpd_epi32(a: __m128d) -> __m128i;
230		fn _mm_cvttsd_si32(a: __m128d) -> i32;
231		fn _mm_cvttps_epi32(a: __m128) -> __m128i;
232		fn _mm_set_sd(a: f64) -> __m128d;
233		fn _mm_set1_pd(a: f64) -> __m128d;
234		fn _mm_set_pd1(a: f64) -> __m128d;
235		fn _mm_set_pd(a: f64, b: f64) -> __m128d;
236		fn _mm_setr_pd(a: f64, b: f64) -> __m128d;
237		fn _mm_setzero_pd() -> __m128d;
238		fn _mm_movemask_pd(a: __m128d) -> i32;
239		unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d;
240		unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d;
241		unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d;
242		unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d;
243		unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d);
244		unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d);
245		unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d);
246		unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d);
247		unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d);
248		unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d);
249		unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d);
250		unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d);
251		unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d);
252		unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d;
253		unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d;
254		unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d;
255		unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d;
256		fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d;
257		fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d;
258		fn _mm_castpd_ps(a: __m128d) -> __m128;
259		fn _mm_castpd_si128(a: __m128d) -> __m128i;
260		fn _mm_castps_pd(a: __m128) -> __m128d;
261		fn _mm_castps_si128(a: __m128) -> __m128i;
262		fn _mm_castsi128_pd(a: __m128i) -> __m128d;
263		fn _mm_castsi128_ps(a: __m128i) -> __m128;
264		fn _mm_undefined_pd() -> __m128d;
265		fn _mm_undefined_si128() -> __m128i;
266		fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d;
267		fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d;
268	});
269}