1use super::*;
2
3impl Sse2 {
4 delegate!({
5 fn _mm_pause();
6 #[allow(clippy::not_unsafe_ptr_arg_deref)]
7 fn _mm_clflush(p: *const u8);
8 fn _mm_lfence();
9 fn _mm_mfence();
10 fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i;
11 fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i;
12 fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i;
13 fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i;
14 fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i;
15 fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i;
16 fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i;
17 fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i;
18 fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i;
19 fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i;
20 fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i;
21 fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i;
22 fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i;
23 fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i;
24 fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i;
25 fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i;
26 fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i;
27 fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i;
28 fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i;
29 fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i;
30 fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i;
31 fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i;
32 fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i;
33 fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i;
34 fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i;
35 fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i;
36 fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i;
37 fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i;
38 fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i;
39 fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i;
40 fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i;
41 fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
42 fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i;
43 fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i;
44 fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i;
45 fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i;
46 fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i;
47 fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
48 fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i;
49 fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i;
50 fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i;
51 fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i;
52 fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
53 fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i;
54 fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i;
55 fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i;
56 fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i;
57 fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i;
58 fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i;
59 fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i;
60 fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i;
61 fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i;
62 fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i;
63 fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i;
64 fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i;
65 fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i;
66 fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i;
67 fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i;
68 fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i;
69 fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i;
70 fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i;
71 fn _mm_cvtepi32_pd(a: __m128i) -> __m128d;
72 fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d;
73 fn _mm_cvtepi32_ps(a: __m128i) -> __m128;
74 fn _mm_cvtps_epi32(a: __m128) -> __m128i;
75 fn _mm_cvtsi32_si128(a: i32) -> __m128i;
76 fn _mm_cvtsi128_si32(a: __m128i) -> i32;
77 fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i;
78 fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i;
79 fn _mm_set_epi16(
80 e7: i16,
81 e6: i16,
82 e5: i16,
83 e4: i16,
84 e3: i16,
85 e2: i16,
86 e1: i16,
87 e0: i16,
88 ) -> __m128i;
89 fn _mm_set_epi8(
90 e15: i8,
91 e14: i8,
92 e13: i8,
93 e12: i8,
94 e11: i8,
95 e10: i8,
96 e9: i8,
97 e8: i8,
98 e7: i8,
99 e6: i8,
100 e5: i8,
101 e4: i8,
102 e3: i8,
103 e2: i8,
104 e1: i8,
105 e0: i8,
106 ) -> __m128i;
107 fn _mm_set1_epi64x(a: i64) -> __m128i;
108 fn _mm_set1_epi32(a: i32) -> __m128i;
109 fn _mm_set1_epi16(a: i16) -> __m128i;
110 fn _mm_set1_epi8(a: i8) -> __m128i;
111 fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i;
112 fn _mm_setr_epi16(
113 e7: i16,
114 e6: i16,
115 e5: i16,
116 e4: i16,
117 e3: i16,
118 e2: i16,
119 e1: i16,
120 e0: i16,
121 ) -> __m128i;
122 fn _mm_setr_epi8(
123 e15: i8,
124 e14: i8,
125 e13: i8,
126 e12: i8,
127 e11: i8,
128 e10: i8,
129 e9: i8,
130 e8: i8,
131 e7: i8,
132 e6: i8,
133 e5: i8,
134 e4: i8,
135 e3: i8,
136 e2: i8,
137 e1: i8,
138 e0: i8,
139 ) -> __m128i;
140 fn _mm_setzero_si128() -> __m128i;
141 unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i;
142 unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i;
143 unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i;
144 unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8);
145 unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i);
146 unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i);
147 unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i);
148 unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i);
149 unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32);
150 fn _mm_move_epi64(a: __m128i) -> __m128i;
151 fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i;
152 fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i;
153 fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i;
154 fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32;
155 fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i;
156 fn _mm_movemask_epi8(a: __m128i) -> i32;
157 fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i;
158 fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
159 fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i;
160 fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i;
161 fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i;
162 fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i;
163 fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i;
164 fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i;
165 fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i;
166 fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i;
167 fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i;
168 fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d;
169 fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d;
170 fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d;
171 fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d;
172 fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d;
173 fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d;
174 fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d;
175 fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d;
176 fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d;
177 fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d;
178 fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d;
179 fn _mm_sqrt_pd(a: __m128d) -> __m128d;
180 fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d;
181 fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d;
182 fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d;
183 fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d;
184 fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d;
185 fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d;
186 fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d;
187 fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d;
188 fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d;
189 fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d;
190 fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d;
191 fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d;
192 fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d;
193 fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d;
194 fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d;
195 fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d;
196 fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d;
197 fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d;
198 fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d;
199 fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d;
200 fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d;
201 fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d;
202 fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d;
203 fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d;
204 fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d;
205 fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d;
206 fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d;
207 fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d;
208 fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d;
209 fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d;
210 fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32;
211 fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32;
212 fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32;
213 fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32;
214 fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32;
215 fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32;
216 fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32;
217 fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32;
218 fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32;
219 fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32;
220 fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32;
221 fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32;
222 fn _mm_cvtpd_ps(a: __m128d) -> __m128;
223 fn _mm_cvtps_pd(a: __m128) -> __m128d;
224 fn _mm_cvtpd_epi32(a: __m128d) -> __m128i;
225 fn _mm_cvtsd_si32(a: __m128d) -> i32;
226 fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128;
227 fn _mm_cvtsd_f64(a: __m128d) -> f64;
228 fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d;
229 fn _mm_cvttpd_epi32(a: __m128d) -> __m128i;
230 fn _mm_cvttsd_si32(a: __m128d) -> i32;
231 fn _mm_cvttps_epi32(a: __m128) -> __m128i;
232 fn _mm_set_sd(a: f64) -> __m128d;
233 fn _mm_set1_pd(a: f64) -> __m128d;
234 fn _mm_set_pd1(a: f64) -> __m128d;
235 fn _mm_set_pd(a: f64, b: f64) -> __m128d;
236 fn _mm_setr_pd(a: f64, b: f64) -> __m128d;
237 fn _mm_setzero_pd() -> __m128d;
238 fn _mm_movemask_pd(a: __m128d) -> i32;
239 unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d;
240 unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d;
241 unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d;
242 unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d;
243 unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d);
244 unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d);
245 unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d);
246 unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d);
247 unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d);
248 unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d);
249 unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d);
250 unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d);
251 unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d);
252 unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d;
253 unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d;
254 unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d;
255 unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d;
256 fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d;
257 fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d;
258 fn _mm_castpd_ps(a: __m128d) -> __m128;
259 fn _mm_castpd_si128(a: __m128d) -> __m128i;
260 fn _mm_castps_pd(a: __m128) -> __m128d;
261 fn _mm_castps_si128(a: __m128) -> __m128i;
262 fn _mm_castsi128_pd(a: __m128i) -> __m128d;
263 fn _mm_castsi128_ps(a: __m128i) -> __m128;
264 fn _mm_undefined_pd() -> __m128d;
265 fn _mm_undefined_si128() -> __m128i;
266 fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d;
267 fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d;
268 });
269}