1use super::*;
2
3simd_type!({
5 #[allow(missing_docs)]
7 pub struct V2 {
8 pub sse: f!("sse"),
9 pub sse2: f!("sse2"),
10 pub fxsr: f!("fxsr"),
11 pub sse3: f!("sse3"),
12 pub ssse3: f!("ssse3"),
13 pub sse4_1: f!("sse4.1"),
14 pub sse4_2: f!("sse4.2"),
15 pub popcnt: f!("popcnt"),
16 }
17});
18
19impl Seal for V2 {}
20
21impl V2 {
22 #[inline(always)]
24 pub fn abs_f32x4(self, a: f32x4) -> f32x4 {
25 self.and_f32x4(a, cast!(self.splat_u32x4((1 << 31) - 1)))
26 }
27
28 #[inline(always)]
30 pub fn abs_f64x2(self, a: f64x2) -> f64x2 {
31 self.and_f64x2(a, cast!(self.splat_u64x2((1 << 63) - 1)))
32 }
33
34 #[inline(always)]
36 pub fn add_f32x4(self, a: f32x4, b: f32x4) -> f32x4 {
37 cast!(self.sse._mm_add_ps(cast!(a), cast!(b)))
38 }
39
40 #[inline(always)]
42 pub fn add_f64x2(self, a: f64x2, b: f64x2) -> f64x2 {
43 cast!(self.sse2._mm_add_pd(cast!(a), cast!(b)))
44 }
45
46 #[inline(always)]
48 pub fn and_f32x4(self, a: f32x4, b: f32x4) -> f32x4 {
49 cast!(self.sse._mm_and_ps(cast!(a), cast!(b)))
50 }
51
52 #[inline(always)]
54 pub fn and_f64x2(self, a: f64x2, b: f64x2) -> f64x2 {
55 cast!(self.sse2._mm_and_pd(cast!(a), cast!(b)))
56 }
57
58 #[inline(always)]
60 pub fn and_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
61 cast!(self.sse2._mm_and_si128(cast!(a), cast!(b)))
62 }
63
64 #[inline(always)]
66 pub fn and_i32x4(self, a: i32x4, b: i32x4) -> i32x4 {
67 cast!(self.sse2._mm_and_si128(cast!(a), cast!(b)))
68 }
69
70 #[inline(always)]
72 pub fn and_i64x2(self, a: i64x2, b: i64x2) -> i64x2 {
73 cast!(self.sse2._mm_and_si128(cast!(a), cast!(b)))
74 }
75
76 #[inline(always)]
78 pub fn and_i8x16(self, a: i8x16, b: i8x16) -> i8x16 {
79 cast!(self.sse2._mm_and_si128(cast!(a), cast!(b)))
80 }
81
82 #[inline(always)]
84 pub fn and_m16x8(self, a: m16x8, b: m16x8) -> m16x8 {
85 cast!(self.sse2._mm_and_si128(cast!(a), cast!(b)))
86 }
87
88 #[inline(always)]
90 pub fn and_m32x4(self, a: m32x4, b: m32x4) -> m32x4 {
91 cast!(self.sse2._mm_and_si128(cast!(a), cast!(b)))
92 }
93
94 #[inline(always)]
96 pub fn and_m64x2(self, a: m64x2, b: m64x2) -> m64x2 {
97 cast!(self.sse2._mm_and_si128(cast!(a), cast!(b)))
98 }
99
100 #[inline(always)]
102 pub fn and_m8x16(self, a: m8x16, b: m8x16) -> m8x16 {
103 cast!(self.sse2._mm_and_si128(cast!(a), cast!(b)))
104 }
105
106 #[inline(always)]
108 pub fn and_u16x8(self, a: u16x8, b: u16x8) -> u16x8 {
109 cast!(self.sse2._mm_and_si128(cast!(a), cast!(b)))
110 }
111
112 #[inline(always)]
114 pub fn and_u32x4(self, a: u32x4, b: u32x4) -> u32x4 {
115 cast!(self.sse2._mm_and_si128(cast!(a), cast!(b)))
116 }
117
118 #[inline(always)]
120 pub fn and_u64x2(self, a: u64x2, b: u64x2) -> u64x2 {
121 cast!(self.sse2._mm_and_si128(cast!(a), cast!(b)))
122 }
123
124 #[inline(always)]
126 pub fn and_u8x16(self, a: u8x16, b: u8x16) -> u8x16 {
127 cast!(self.sse2._mm_and_si128(cast!(a), cast!(b)))
128 }
129
130 #[inline(always)]
132 pub fn andnot_f32x4(self, a: f32x4, b: f32x4) -> f32x4 {
133 cast!(self.sse._mm_andnot_ps(cast!(a), cast!(b)))
134 }
135
136 #[inline(always)]
138 pub fn andnot_f64x2(self, a: f64x2, b: f64x2) -> f64x2 {
139 cast!(self.sse2._mm_andnot_pd(cast!(a), cast!(b)))
140 }
141
142 #[inline(always)]
144 pub fn andnot_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
145 cast!(self.sse2._mm_andnot_si128(cast!(a), cast!(b)))
146 }
147
148 #[inline(always)]
150 pub fn andnot_i32x4(self, a: i32x4, b: i32x4) -> i32x4 {
151 cast!(self.sse2._mm_andnot_si128(cast!(a), cast!(b)))
152 }
153
154 #[inline(always)]
156 pub fn andnot_i64x2(self, a: i64x2, b: i64x2) -> i64x2 {
157 cast!(self.sse2._mm_andnot_si128(cast!(a), cast!(b)))
158 }
159
160 #[inline(always)]
162 pub fn andnot_i8x16(self, a: i8x16, b: i8x16) -> i8x16 {
163 cast!(self.sse2._mm_andnot_si128(cast!(a), cast!(b)))
164 }
165
166 #[inline(always)]
168 pub fn andnot_m16x8(self, a: m16x8, b: m16x8) -> m16x8 {
169 cast!(self.sse2._mm_andnot_si128(cast!(a), cast!(b)))
170 }
171
172 #[inline(always)]
174 pub fn andnot_m32x4(self, a: m32x4, b: m32x4) -> m32x4 {
175 cast!(self.sse2._mm_andnot_si128(cast!(a), cast!(b)))
176 }
177
178 #[inline(always)]
180 pub fn andnot_m64x2(self, a: m64x2, b: m64x2) -> m64x2 {
181 cast!(self.sse2._mm_andnot_si128(cast!(a), cast!(b)))
182 }
183
184 #[inline(always)]
186 pub fn andnot_m8x16(self, a: m8x16, b: m8x16) -> m8x16 {
187 cast!(self.sse2._mm_andnot_si128(cast!(a), cast!(b)))
188 }
189
190 #[inline(always)]
192 pub fn andnot_u16x8(self, a: u16x8, b: u16x8) -> u16x8 {
193 cast!(self.sse2._mm_andnot_si128(cast!(a), cast!(b)))
194 }
195
196 #[inline(always)]
198 pub fn andnot_u32x4(self, a: u32x4, b: u32x4) -> u32x4 {
199 cast!(self.sse2._mm_andnot_si128(cast!(a), cast!(b)))
200 }
201
202 #[inline(always)]
204 pub fn andnot_u64x2(self, a: u64x2, b: u64x2) -> u64x2 {
205 cast!(self.sse2._mm_andnot_si128(cast!(a), cast!(b)))
206 }
207
208 #[inline(always)]
210 pub fn andnot_u8x16(self, a: u8x16, b: u8x16) -> u8x16 {
211 cast!(self.sse2._mm_andnot_si128(cast!(a), cast!(b)))
212 }
213
214 #[inline(always)]
219 pub fn apply_sign_i16x8(self, sign: i16x8, a: i16x8) -> i16x8 {
220 cast!(self.ssse3._mm_sign_epi16(cast!(a), cast!(sign)))
221 }
222
223 #[inline(always)]
228 pub fn apply_sign_i32x4(self, sign: i32x4, a: i32x4) -> i32x4 {
229 cast!(self.ssse3._mm_sign_epi32(cast!(a), cast!(sign)))
230 }
231
232 #[inline(always)]
237 pub fn apply_sign_i8x16(self, sign: i8x16, a: i8x16) -> i8x16 {
238 cast!(self.ssse3._mm_sign_epi8(cast!(a), cast!(sign)))
239 }
240
241 #[inline(always)]
243 pub fn approx_reciprocal_f32x4(self, a: f32x4) -> f32x4 {
244 cast!(self.sse._mm_rcp_ps(cast!(a)))
245 }
246
247 #[inline(always)]
249 pub fn approx_reciprocal_sqrt_f32x4(self, a: f32x4) -> f32x4 {
250 cast!(self.sse._mm_rsqrt_ps(cast!(a)))
251 }
252
253 #[inline(always)]
255 pub fn average_u16x8(self, a: u16x8, b: u16x8) -> u16x8 {
256 cast!(self.sse2._mm_avg_epu16(cast!(a), cast!(b)))
257 }
258
259 #[inline(always)]
261 pub fn average_u8x16(self, a: u8x16, b: u8x16) -> u8x16 {
262 cast!(self.sse2._mm_avg_epu8(cast!(a), cast!(b)))
263 }
264
265 #[inline(always)]
267 pub fn ceil_f32x4(self, a: f32x4) -> f32x4 {
268 cast!(self.sse4_1._mm_ceil_ps(cast!(a)))
269 }
270
271 #[inline(always)]
273 pub fn ceil_f64x2(self, a: f64x2) -> f64x2 {
274 cast!(self.sse4_1._mm_ceil_pd(cast!(a)))
275 }
276
277 #[inline(always)]
279 pub fn cmp_eq_f32x4(self, a: f32x4, b: f32x4) -> m32x4 {
280 cast!(self.sse._mm_cmpeq_ps(cast!(a), cast!(b)))
281 }
282
283 #[inline(always)]
285 pub fn cmp_eq_f64x2(self, a: f64x2, b: f64x2) -> m64x2 {
286 cast!(self.sse2._mm_cmpeq_pd(cast!(a), cast!(b)))
287 }
288
289 #[inline(always)]
291 pub fn cmp_eq_i16x8(self, a: i16x8, b: i16x8) -> m16x8 {
292 cast!(self.sse2._mm_cmpeq_epi16(cast!(a), cast!(b)))
293 }
294
295 #[inline(always)]
297 pub fn cmp_eq_i32x4(self, a: i32x4, b: i32x4) -> m32x4 {
298 cast!(self.sse2._mm_cmpeq_epi32(cast!(a), cast!(b)))
299 }
300
301 #[inline(always)]
303 pub fn cmp_eq_i64x2(self, a: i64x2, b: i64x2) -> m64x2 {
304 cast!(self.sse4_1._mm_cmpeq_epi64(cast!(a), cast!(b)))
305 }
306
307 #[inline(always)]
309 pub fn cmp_eq_i8x16(self, a: i8x16, b: i8x16) -> m8x16 {
310 cast!(self.sse2._mm_cmpeq_epi8(cast!(a), cast!(b)))
311 }
312
313 #[inline(always)]
315 pub fn cmp_eq_u16x8(self, a: u16x8, b: u16x8) -> m16x8 {
316 cast!(self.sse2._mm_cmpeq_epi16(cast!(a), cast!(b)))
317 }
318
319 #[inline(always)]
321 pub fn cmp_eq_u32x4(self, a: u32x4, b: u32x4) -> m32x4 {
322 cast!(self.sse2._mm_cmpeq_epi32(cast!(a), cast!(b)))
323 }
324
325 #[inline(always)]
327 pub fn cmp_eq_u64x2(self, a: u64x2, b: u64x2) -> m64x2 {
328 cast!(self.sse4_1._mm_cmpeq_epi64(cast!(a), cast!(b)))
329 }
330
331 #[inline(always)]
333 pub fn cmp_eq_u8x16(self, a: u8x16, b: u8x16) -> m8x16 {
334 cast!(self.sse2._mm_cmpeq_epi8(cast!(a), cast!(b)))
335 }
336
337 #[inline(always)]
339 pub fn cmp_ge_f32x4(self, a: f32x4, b: f32x4) -> m32x4 {
340 cast!(self.sse._mm_cmpge_ps(cast!(a), cast!(b)))
341 }
342
343 #[inline(always)]
345 pub fn cmp_ge_f64x2(self, a: f64x2, b: f64x2) -> m64x2 {
346 cast!(self.sse2._mm_cmpge_pd(cast!(a), cast!(b)))
347 }
348
349 #[inline(always)]
351 pub fn cmp_ge_i16x8(self, a: i16x8, b: i16x8) -> m16x8 {
352 self.not_m16x8(self.cmp_lt_i16x8(a, b))
353 }
354
355 #[inline(always)]
357 pub fn cmp_ge_i32x4(self, a: i32x4, b: i32x4) -> m32x4 {
358 self.not_m32x4(self.cmp_lt_i32x4(a, b))
359 }
360
361 #[inline(always)]
363 pub fn cmp_ge_i64x2(self, a: i64x2, b: i64x2) -> m64x2 {
364 self.not_m64x2(self.cmp_lt_i64x2(a, b))
365 }
366
367 #[inline(always)]
369 pub fn cmp_ge_i8x16(self, a: i8x16, b: i8x16) -> m8x16 {
370 self.not_m8x16(self.cmp_lt_i8x16(a, b))
371 }
372
373 #[inline(always)]
375 pub fn cmp_ge_u16x8(self, a: u16x8, b: u16x8) -> m16x8 {
376 self.not_m16x8(self.cmp_lt_u16x8(a, b))
377 }
378
379 #[inline(always)]
381 pub fn cmp_ge_u32x4(self, a: u32x4, b: u32x4) -> m32x4 {
382 self.not_m32x4(self.cmp_lt_u32x4(a, b))
383 }
384
385 #[inline(always)]
387 pub fn cmp_ge_u64x2(self, a: u64x2, b: u64x2) -> m64x2 {
388 self.not_m64x2(self.cmp_lt_u64x2(a, b))
389 }
390
391 #[inline(always)]
393 pub fn cmp_ge_u8x16(self, a: u8x16, b: u8x16) -> m8x16 {
394 self.not_m8x16(self.cmp_lt_u8x16(a, b))
395 }
396
397 #[inline(always)]
399 pub fn cmp_gt_f32x4(self, a: f32x4, b: f32x4) -> m32x4 {
400 cast!(self.sse._mm_cmpgt_ps(cast!(a), cast!(b)))
401 }
402
403 #[inline(always)]
405 pub fn cmp_gt_f64x2(self, a: f64x2, b: f64x2) -> m64x2 {
406 cast!(self.sse2._mm_cmpgt_pd(cast!(a), cast!(b)))
407 }
408
409 #[inline(always)]
411 pub fn cmp_gt_i16x8(self, a: i16x8, b: i16x8) -> m16x8 {
412 cast!(self.sse2._mm_cmpgt_epi16(cast!(a), cast!(b)))
413 }
414
415 #[inline(always)]
417 pub fn cmp_gt_i32x4(self, a: i32x4, b: i32x4) -> m32x4 {
418 cast!(self.sse2._mm_cmpgt_epi32(cast!(a), cast!(b)))
419 }
420
421 #[inline(always)]
423 pub fn cmp_gt_i64x2(self, a: i64x2, b: i64x2) -> m64x2 {
424 cast!(self.sse4_2._mm_cmpgt_epi64(cast!(a), cast!(b)))
425 }
426
427 #[inline(always)]
429 pub fn cmp_gt_i8x16(self, a: i8x16, b: i8x16) -> m8x16 {
430 cast!(self.sse2._mm_cmpgt_epi8(cast!(a), cast!(b)))
431 }
432
433 #[inline(always)]
435 pub fn cmp_gt_u16x8(self, a: u16x8, b: u16x8) -> m16x8 {
436 let k = self.splat_u16x8(0x8000);
437 self.cmp_gt_i16x8(cast!(self.xor_u16x8(a, k)), cast!(self.xor_u16x8(b, k)))
438 }
439
440 #[inline(always)]
442 pub fn cmp_gt_u32x4(self, a: u32x4, b: u32x4) -> m32x4 {
443 let k = self.splat_u32x4(0x80000000);
444 self.cmp_gt_i32x4(cast!(self.xor_u32x4(a, k)), cast!(self.xor_u32x4(b, k)))
445 }
446
447 #[inline(always)]
449 pub fn cmp_gt_u64x2(self, a: u64x2, b: u64x2) -> m64x2 {
450 let k = self.splat_u64x2(0x8000000000000000);
451 self.cmp_gt_i64x2(cast!(self.xor_u64x2(a, k)), cast!(self.xor_u64x2(b, k)))
452 }
453
454 #[inline(always)]
456 pub fn cmp_gt_u8x16(self, a: u8x16, b: u8x16) -> m8x16 {
457 let k = self.splat_u8x16(0x80);
458 self.cmp_gt_i8x16(cast!(self.xor_u8x16(a, k)), cast!(self.xor_u8x16(b, k)))
459 }
460
461 #[inline(always)]
463 pub fn cmp_le_f32x4(self, a: f32x4, b: f32x4) -> m32x4 {
464 cast!(self.sse._mm_cmple_ps(cast!(a), cast!(b)))
465 }
466
467 #[inline(always)]
469 pub fn cmp_le_f64x2(self, a: f64x2, b: f64x2) -> m64x2 {
470 cast!(self.sse2._mm_cmple_pd(cast!(a), cast!(b)))
471 }
472
473 #[inline(always)]
475 pub fn cmp_le_i16x8(self, a: i16x8, b: i16x8) -> m16x8 {
476 self.not_m16x8(self.cmp_gt_i16x8(a, b))
477 }
478
479 #[inline(always)]
481 pub fn cmp_le_i32x4(self, a: i32x4, b: i32x4) -> m32x4 {
482 self.not_m32x4(self.cmp_gt_i32x4(a, b))
483 }
484
485 #[inline(always)]
487 pub fn cmp_le_i64x2(self, a: i64x2, b: i64x2) -> m64x2 {
488 self.not_m64x2(self.cmp_gt_i64x2(a, b))
489 }
490
491 #[inline(always)]
493 pub fn cmp_le_i8x16(self, a: i8x16, b: i8x16) -> m8x16 {
494 self.not_m8x16(self.cmp_gt_i8x16(a, b))
495 }
496
497 #[inline(always)]
499 pub fn cmp_le_u16x8(self, a: u16x8, b: u16x8) -> m16x8 {
500 self.not_m16x8(self.cmp_gt_u16x8(a, b))
501 }
502
503 #[inline(always)]
505 pub fn cmp_le_u32x4(self, a: u32x4, b: u32x4) -> m32x4 {
506 self.not_m32x4(self.cmp_gt_u32x4(a, b))
507 }
508
509 #[inline(always)]
511 pub fn cmp_le_u64x2(self, a: u64x2, b: u64x2) -> m64x2 {
512 self.not_m64x2(self.cmp_gt_u64x2(a, b))
513 }
514
515 #[inline(always)]
517 pub fn cmp_le_u8x16(self, a: u8x16, b: u8x16) -> m8x16 {
518 self.not_m8x16(self.cmp_gt_u8x16(a, b))
519 }
520
521 #[inline(always)]
523 pub fn cmp_lt_f32x4(self, a: f32x4, b: f32x4) -> m32x4 {
524 cast!(self.sse._mm_cmplt_ps(cast!(a), cast!(b)))
525 }
526
527 #[inline(always)]
529 pub fn cmp_lt_f64x2(self, a: f64x2, b: f64x2) -> m64x2 {
530 cast!(self.sse2._mm_cmplt_pd(cast!(a), cast!(b)))
531 }
532
533 #[inline(always)]
535 pub fn cmp_lt_i16x8(self, a: i16x8, b: i16x8) -> m16x8 {
536 cast!(self.sse2._mm_cmplt_epi16(cast!(a), cast!(b)))
537 }
538
539 #[inline(always)]
541 pub fn cmp_lt_i32x4(self, a: i32x4, b: i32x4) -> m32x4 {
542 cast!(self.sse2._mm_cmplt_epi32(cast!(a), cast!(b)))
543 }
544
545 #[inline(always)]
547 pub fn cmp_lt_i64x2(self, a: i64x2, b: i64x2) -> m64x2 {
548 cast!(self.sse4_2._mm_cmpgt_epi64(cast!(b), cast!(a)))
549 }
550
551 #[inline(always)]
553 pub fn cmp_lt_i8x16(self, a: i8x16, b: i8x16) -> m8x16 {
554 cast!(self.sse2._mm_cmplt_epi8(cast!(a), cast!(b)))
555 }
556
557 #[inline(always)]
559 pub fn cmp_lt_u16x8(self, a: u16x8, b: u16x8) -> m16x8 {
560 let k = self.splat_u16x8(0x8000);
561 self.cmp_lt_i16x8(cast!(self.xor_u16x8(a, k)), cast!(self.xor_u16x8(b, k)))
562 }
563
564 #[inline(always)]
566 pub fn cmp_lt_u32x4(self, a: u32x4, b: u32x4) -> m32x4 {
567 let k = self.splat_u32x4(0x80000000);
568 self.cmp_lt_i32x4(cast!(self.xor_u32x4(a, k)), cast!(self.xor_u32x4(b, k)))
569 }
570
571 #[inline(always)]
573 pub fn cmp_lt_u64x2(self, a: u64x2, b: u64x2) -> m64x2 {
574 let k = self.splat_u64x2(0x8000000000000000);
575 self.cmp_lt_i64x2(cast!(self.xor_u64x2(a, k)), cast!(self.xor_u64x2(b, k)))
576 }
577
578 #[inline(always)]
580 pub fn cmp_lt_u8x16(self, a: u8x16, b: u8x16) -> m8x16 {
581 let k = self.splat_u8x16(0x80);
582 self.cmp_lt_i8x16(cast!(self.xor_u8x16(a, k)), cast!(self.xor_u8x16(b, k)))
583 }
584
585 #[inline(always)]
587 pub fn cmp_not_eq_f32x4(self, a: f32x4, b: f32x4) -> m32x4 {
588 cast!(self.sse._mm_cmpneq_ps(cast!(a), cast!(b)))
589 }
590
591 #[inline(always)]
593 pub fn cmp_not_eq_f64x2(self, a: f64x2, b: f64x2) -> m64x2 {
594 cast!(self.sse2._mm_cmpneq_pd(cast!(a), cast!(b)))
595 }
596
597 #[inline(always)]
599 pub fn cmp_not_ge_f32x4(self, a: f32x4, b: f32x4) -> m32x4 {
600 cast!(self.sse._mm_cmpnge_ps(cast!(a), cast!(b)))
601 }
602
603 #[inline(always)]
605 pub fn cmp_not_ge_f64x2(self, a: f64x2, b: f64x2) -> m64x2 {
606 cast!(self.sse2._mm_cmpnge_pd(cast!(a), cast!(b)))
607 }
608
609 #[inline(always)]
611 pub fn cmp_not_gt_f32x4(self, a: f32x4, b: f32x4) -> m32x4 {
612 cast!(self.sse._mm_cmpngt_ps(cast!(a), cast!(b)))
613 }
614
615 #[inline(always)]
617 pub fn cmp_not_gt_f64x2(self, a: f64x2, b: f64x2) -> m64x2 {
618 cast!(self.sse2._mm_cmpngt_pd(cast!(a), cast!(b)))
619 }
620
621 #[inline(always)]
623 pub fn cmp_not_le_f32x4(self, a: f32x4, b: f32x4) -> m32x4 {
624 cast!(self.sse._mm_cmpnle_ps(cast!(a), cast!(b)))
625 }
626
627 #[inline(always)]
629 pub fn cmp_not_le_f64x2(self, a: f64x2, b: f64x2) -> m64x2 {
630 cast!(self.sse2._mm_cmpnle_pd(cast!(a), cast!(b)))
631 }
632
633 #[inline(always)]
635 pub fn cmp_not_lt_f32x4(self, a: f32x4, b: f32x4) -> m32x4 {
636 cast!(self.sse._mm_cmpnlt_ps(cast!(a), cast!(b)))
637 }
638
639 #[inline(always)]
641 pub fn cmp_not_lt_f64x2(self, a: f64x2, b: f64x2) -> m64x2 {
642 cast!(self.sse2._mm_cmpnlt_pd(cast!(a), cast!(b)))
643 }
644
645 #[inline(always)]
647 pub fn convert_f32x4_to_f64x2(self, a: f32x4) -> f64x2 {
648 cast!(self.sse2._mm_cvtps_pd(cast!(a)))
649 }
650
651 #[inline(always)]
653 pub fn convert_f32x4_to_i32x4(self, a: f32x4) -> i32x4 {
654 cast!(self.sse2._mm_cvttps_epi32(cast!(a)))
655 }
656
657 #[inline(always)]
659 pub fn convert_f64x2_to_f32x4(self, a: f64x2) -> f32x4 {
660 cast!(self.sse2._mm_cvtpd_ps(cast!(a)))
661 }
662
663 #[inline(always)]
665 pub fn convert_f64x2_to_i32x4(self, a: f64x2) -> i32x4 {
666 cast!(self.sse2._mm_cvttpd_epi32(cast!(a)))
667 }
668
669 #[inline(always)]
671 pub fn convert_i16x8_to_i32x4(self, a: i16x8) -> i32x4 {
672 cast!(self.sse4_1._mm_cvtepi16_epi32(cast!(a)))
673 }
674
675 #[inline(always)]
677 pub fn convert_i16x8_to_i64x2(self, a: i16x8) -> i64x2 {
678 cast!(self.sse4_1._mm_cvtepi16_epi64(cast!(a)))
679 }
680
681 #[inline(always)]
683 pub fn convert_i16x8_to_u16x8(self, a: i16x8) -> u16x8 {
684 cast!(a)
685 }
686
687 #[inline(always)]
689 pub fn convert_i16x8_to_u32x4(self, a: i16x8) -> u32x4 {
690 cast!(self.sse4_1._mm_cvtepi16_epi32(cast!(a)))
691 }
692
693 #[inline(always)]
695 pub fn convert_i16x8_to_u64x2(self, a: i16x8) -> u64x2 {
696 cast!(self.sse4_1._mm_cvtepi16_epi64(cast!(a)))
697 }
698
699 #[inline(always)]
701 pub fn convert_i32x4_to_f32x4(self, a: i32x4) -> f32x4 {
702 cast!(self.sse2._mm_cvtepi32_ps(cast!(a)))
703 }
704
705 #[inline(always)]
707 pub fn convert_i32x4_to_f64x2(self, a: i32x4) -> f64x2 {
708 cast!(self.sse2._mm_cvtepi32_pd(cast!(a)))
709 }
710
711 #[inline(always)]
713 pub fn convert_i32x4_to_i64x2(self, a: i32x4) -> i64x2 {
714 cast!(self.sse4_1._mm_cvtepi32_epi64(cast!(a)))
715 }
716
717 #[inline(always)]
719 pub fn convert_i32x4_to_u32x4(self, a: i32x4) -> u32x4 {
720 cast!(a)
721 }
722
723 #[inline(always)]
725 pub fn convert_i32x4_to_u64x2(self, a: i32x4) -> u64x2 {
726 cast!(self.sse4_1._mm_cvtepi32_epi64(cast!(a)))
727 }
728
729 #[inline(always)]
731 pub fn convert_i8x16_to_i16x8(self, a: i8x16) -> i16x8 {
732 cast!(self.sse4_1._mm_cvtepi8_epi16(cast!(a)))
733 }
734
735 #[inline(always)]
737 pub fn convert_i8x16_to_i32x4(self, a: i8x16) -> i32x4 {
738 cast!(self.sse4_1._mm_cvtepi8_epi32(cast!(a)))
739 }
740
741 #[inline(always)]
743 pub fn convert_i8x16_to_i64x2(self, a: i8x16) -> i64x2 {
744 cast!(self.sse4_1._mm_cvtepi8_epi64(cast!(a)))
745 }
746
747 #[inline(always)]
749 pub fn convert_i8x16_to_u16x8(self, a: i8x16) -> u16x8 {
750 cast!(self.sse4_1._mm_cvtepi8_epi16(cast!(a)))
751 }
752
753 #[inline(always)]
755 pub fn convert_i8x16_to_u32x4(self, a: i8x16) -> u32x4 {
756 cast!(self.sse4_1._mm_cvtepi8_epi32(cast!(a)))
757 }
758
759 #[inline(always)]
761 pub fn convert_i8x16_to_u64x2(self, a: i8x16) -> u64x2 {
762 cast!(self.sse4_1._mm_cvtepi8_epi64(cast!(a)))
763 }
764
765 #[inline(always)]
767 pub fn convert_i8x16_to_u8x16(self, a: i8x16) -> u8x16 {
768 cast!(a)
769 }
770
771 #[inline(always)]
773 pub fn convert_u16x8_to_i16x8(self, a: u16x8) -> i16x8 {
774 cast!(a)
775 }
776
777 #[inline(always)]
779 pub fn convert_u16x8_to_i32x4(self, a: u16x8) -> i32x4 {
780 cast!(self.sse4_1._mm_cvtepu16_epi32(cast!(a)))
781 }
782
783 #[inline(always)]
785 pub fn convert_u16x8_to_i64x2(self, a: u16x8) -> i64x2 {
786 cast!(self.sse4_1._mm_cvtepu16_epi64(cast!(a)))
787 }
788
789 #[inline(always)]
791 pub fn convert_u16x8_to_u32x4(self, a: u16x8) -> u32x4 {
792 cast!(self.sse4_1._mm_cvtepu16_epi32(cast!(a)))
793 }
794
795 #[inline(always)]
797 pub fn convert_u16x8_to_u64x2(self, a: u16x8) -> u64x2 {
798 cast!(self.sse4_1._mm_cvtepu16_epi64(cast!(a)))
799 }
800
801 #[inline(always)]
803 pub fn convert_u32x4_to_i32x4(self, a: u32x4) -> i32x4 {
804 cast!(a)
805 }
806
807 #[inline(always)]
809 pub fn convert_u32x4_to_i64x2(self, a: u32x4) -> i64x2 {
810 cast!(self.sse4_1._mm_cvtepu32_epi64(cast!(a)))
811 }
812
813 #[inline(always)]
815 pub fn convert_u32x4_to_u64x2(self, a: u32x4) -> u64x2 {
816 cast!(self.sse4_1._mm_cvtepu32_epi64(cast!(a)))
817 }
818
819 #[inline(always)]
821 pub fn convert_u8x16_to_i16x8(self, a: u8x16) -> i16x8 {
822 cast!(self.sse4_1._mm_cvtepu8_epi16(cast!(a)))
823 }
824
825 #[inline(always)]
827 pub fn convert_u8x16_to_i32x4(self, a: u8x16) -> i32x4 {
828 cast!(self.sse4_1._mm_cvtepu8_epi32(cast!(a)))
829 }
830
831 #[inline(always)]
833 pub fn convert_u8x16_to_i64x2(self, a: u8x16) -> i64x2 {
834 cast!(self.sse4_1._mm_cvtepu8_epi64(cast!(a)))
835 }
836
837 #[inline(always)]
839 pub fn convert_u8x16_to_i8x16(self, a: u8x16) -> i8x16 {
840 cast!(a)
841 }
842
843 #[inline(always)]
845 pub fn convert_u8x16_to_u16x8(self, a: u8x16) -> u16x8 {
846 cast!(self.sse4_1._mm_cvtepu8_epi16(cast!(a)))
847 }
848
849 #[inline(always)]
851 pub fn convert_u8x16_to_u32x4(self, a: u8x16) -> u32x4 {
852 cast!(self.sse4_1._mm_cvtepu8_epi32(cast!(a)))
853 }
854
855 #[inline(always)]
857 pub fn convert_u8x16_to_u64x2(self, a: u8x16) -> u64x2 {
858 cast!(self.sse4_1._mm_cvtepu8_epi64(cast!(a)))
859 }
860
861 #[inline(always)]
863 pub fn div_f32x4(self, a: f32x4, b: f32x4) -> f32x4 {
864 cast!(self.sse._mm_div_ps(cast!(a), cast!(b)))
865 }
866
867 #[inline(always)]
869 pub fn div_f64x2(self, a: f64x2, b: f64x2) -> f64x2 {
870 cast!(self.sse2._mm_div_pd(cast!(a), cast!(b)))
871 }
872
873 #[inline(always)]
875 pub fn floor_f32x4(self, a: f32x4) -> f32x4 {
876 cast!(self.sse4_1._mm_floor_ps(cast!(a)))
877 }
878
879 #[inline(always)]
881 pub fn floor_f64x2(self, a: f64x2) -> f64x2 {
882 cast!(self.sse4_1._mm_floor_pd(cast!(a)))
883 }
884
885 #[inline(always)]
889 pub fn horizontal_add_pack_f32x4(self, a: f32x4, b: f32x4) -> f32x4 {
890 cast!(self.sse3._mm_hadd_ps(cast!(a), cast!(b)))
891 }
892
893 #[inline(always)]
897 pub fn horizontal_add_pack_f64x2(self, a: f64x2, b: f64x2) -> f64x2 {
898 cast!(self.sse3._mm_hadd_pd(cast!(a), cast!(b)))
899 }
900
901 #[inline(always)]
905 pub fn horizontal_add_pack_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
906 cast!(self.ssse3._mm_hadd_epi16(cast!(a), cast!(b)))
907 }
908
909 #[inline(always)]
913 pub fn horizontal_add_pack_i32x4(self, a: i32x4, b: i32x4) -> i32x4 {
914 cast!(self.ssse3._mm_hadd_epi32(cast!(a), cast!(b)))
915 }
916
917 #[inline(always)]
921 pub fn horizontal_saturating_add_pack_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
922 cast!(self.ssse3._mm_hadds_epi16(cast!(a), cast!(b)))
923 }
924
925 #[inline(always)]
929 pub fn horizontal_saturating_sub_pack_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
930 cast!(self.ssse3._mm_hsubs_epi16(cast!(a), cast!(b)))
931 }
932
933 #[inline(always)]
937 pub fn horizontal_sub_pack_f32x4(self, a: f32x4, b: f32x4) -> f32x4 {
938 cast!(self.sse3._mm_hsub_ps(cast!(a), cast!(b)))
939 }
940
941 #[inline(always)]
945 pub fn horizontal_sub_pack_f64x2(self, a: f64x2, b: f64x2) -> f64x2 {
946 cast!(self.sse3._mm_hsub_pd(cast!(a), cast!(b)))
947 }
948
949 #[inline(always)]
953 pub fn horizontal_sub_pack_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
954 cast!(self.ssse3._mm_hsub_epi16(cast!(a), cast!(b)))
955 }
956
957 #[inline(always)]
961 pub fn horizontal_sub_pack_i32x4(self, a: i32x4, b: i32x4) -> i32x4 {
962 cast!(self.ssse3._mm_hsub_epi32(cast!(a), cast!(b)))
963 }
964
965 #[inline(always)]
967 pub fn is_nan_f32x4(self, a: f32x4) -> m32x4 {
968 cast!(self.sse._mm_cmpunord_ps(cast!(a), cast!(a)))
969 }
970
971 #[inline(always)]
973 pub fn is_nan_f64x2(self, a: f64x2) -> m64x2 {
974 cast!(self.sse2._mm_cmpunord_pd(cast!(a), cast!(a)))
975 }
976
977 #[inline(always)]
979 pub fn is_not_nan_f32x4(self, a: f32x4) -> m32x4 {
980 cast!(self.sse._mm_cmpord_ps(cast!(a), cast!(a)))
981 }
982
983 #[inline(always)]
985 pub fn is_not_nan_f64x2(self, a: f64x2) -> m64x2 {
986 cast!(self.sse2._mm_cmpord_pd(cast!(a), cast!(a)))
987 }
988
989 #[inline(always)]
991 pub fn max_f32x4(self, a: f32x4, b: f32x4) -> f32x4 {
992 cast!(self.sse._mm_max_ps(cast!(a), cast!(b)))
993 }
994
995 #[inline(always)]
997 pub fn max_f64x2(self, a: f64x2, b: f64x2) -> f64x2 {
998 cast!(self.sse2._mm_max_pd(cast!(a), cast!(b)))
999 }
1000
1001 #[inline(always)]
1003 pub fn max_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
1004 cast!(self.sse2._mm_max_epi16(cast!(a), cast!(b)))
1005 }
1006
1007 #[inline(always)]
1009 pub fn max_i32x4(self, a: i32x4, b: i32x4) -> i32x4 {
1010 cast!(self.sse4_1._mm_max_epi32(cast!(a), cast!(b)))
1011 }
1012
1013 #[inline(always)]
1015 pub fn max_i8x16(self, a: i8x16, b: i8x16) -> i8x16 {
1016 cast!(self.sse4_1._mm_max_epi8(cast!(a), cast!(b)))
1017 }
1018
1019 #[inline(always)]
1021 pub fn max_u16x8(self, a: u16x8, b: u16x8) -> u16x8 {
1022 cast!(self.sse4_1._mm_max_epu16(cast!(a), cast!(b)))
1023 }
1024
1025 #[inline(always)]
1027 pub fn max_u32x4(self, a: u32x4, b: u32x4) -> u32x4 {
1028 cast!(self.sse4_1._mm_max_epu32(cast!(a), cast!(b)))
1029 }
1030
1031 #[inline(always)]
1033 pub fn max_u8x16(self, a: u8x16, b: u8x16) -> u8x16 {
1034 cast!(self.sse2._mm_max_epu8(cast!(a), cast!(b)))
1035 }
1036
1037 #[inline(always)]
1039 pub fn min_f32x4(self, a: f32x4, b: f32x4) -> f32x4 {
1040 cast!(self.sse._mm_min_ps(cast!(a), cast!(b)))
1041 }
1042
1043 #[inline(always)]
1045 pub fn min_f64x2(self, a: f64x2, b: f64x2) -> f64x2 {
1046 cast!(self.sse2._mm_min_pd(cast!(a), cast!(b)))
1047 }
1048
1049 #[inline(always)]
1051 pub fn min_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
1052 cast!(self.sse2._mm_min_epi16(cast!(a), cast!(b)))
1053 }
1054
1055 #[inline(always)]
1057 pub fn min_i32x4(self, a: i32x4, b: i32x4) -> i32x4 {
1058 cast!(self.sse4_1._mm_min_epi32(cast!(a), cast!(b)))
1059 }
1060
1061 #[inline(always)]
1063 pub fn min_i8x16(self, a: i8x16, b: i8x16) -> i8x16 {
1064 cast!(self.sse4_1._mm_min_epi8(cast!(a), cast!(b)))
1065 }
1066
1067 #[inline(always)]
1069 pub fn min_u16x8(self, a: u16x8, b: u16x8) -> u16x8 {
1070 cast!(self.sse4_1._mm_min_epu16(cast!(a), cast!(b)))
1071 }
1072
1073 #[inline(always)]
1075 pub fn min_u32x4(self, a: u32x4, b: u32x4) -> u32x4 {
1076 cast!(self.sse4_1._mm_min_epu32(cast!(a), cast!(b)))
1077 }
1078
1079 #[inline(always)]
1081 pub fn min_u8x16(self, a: u8x16, b: u8x16) -> u8x16 {
1082 cast!(self.sse2._mm_min_epu8(cast!(a), cast!(b)))
1083 }
1084
1085 #[inline(always)]
1087 pub fn mul_f32x4(self, a: f32x4, b: f32x4) -> f32x4 {
1088 cast!(self.sse._mm_mul_ps(cast!(a), cast!(b)))
1089 }
1090
1091 #[inline(always)]
1093 pub fn mul_f64x2(self, a: f64x2, b: f64x2) -> f64x2 {
1094 cast!(self.sse2._mm_mul_pd(cast!(a), cast!(b)))
1095 }
1096
1097 #[inline(always)]
1101 pub fn multiply_saturating_add_adjacent_i8x16(self, a: i8x16, b: i8x16) -> i16x8 {
1102 cast!(self.ssse3._mm_maddubs_epi16(cast!(a), cast!(b)))
1103 }
1104
1105 #[inline(always)]
1109 pub fn multiply_wrapping_add_adjacent_i16x8(self, a: i16x8, b: i16x8) -> i32x4 {
1110 cast!(self.sse2._mm_madd_epi16(cast!(a), cast!(b)))
1111 }
1112
1113 #[inline(always)]
1117 pub fn multisum_of_absolute_differences_u8x16<const OFFSETS: i32>(
1118 self,
1119 a: u8x16,
1120 b: u8x16,
1121 ) -> u16x8 {
1122 cast!(self.sse4_1._mm_mpsadbw_epu8::<OFFSETS>(cast!(a), cast!(b)))
1123 }
1124
1125 #[inline(always)]
1127 pub fn not_i16x8(self, a: i16x8) -> i16x8 {
1128 self.xor_i16x8(a, self.splat_i16x8(!0))
1129 }
1130
1131 #[inline(always)]
1133 pub fn not_i32x4(self, a: i32x4) -> i32x4 {
1134 self.xor_i32x4(a, self.splat_i32x4(!0))
1135 }
1136
1137 #[inline(always)]
1139 pub fn not_i64x2(self, a: i64x2) -> i64x2 {
1140 self.xor_i64x2(a, self.splat_i64x2(!0))
1141 }
1142
1143 #[inline(always)]
1145 pub fn not_i8x16(self, a: i8x16) -> i8x16 {
1146 self.xor_i8x16(a, self.splat_i8x16(!0))
1147 }
1148
1149 #[inline(always)]
1151 pub fn not_m16x8(self, a: m16x8) -> m16x8 {
1152 self.xor_m16x8(a, self.splat_m16x8(m16::new(true)))
1153 }
1154
1155 #[inline(always)]
1157 pub fn not_m32x4(self, a: m32x4) -> m32x4 {
1158 self.xor_m32x4(a, self.splat_m32x4(m32::new(true)))
1159 }
1160
1161 #[inline(always)]
1163 pub fn not_m64x2(self, a: m64x2) -> m64x2 {
1164 self.xor_m64x2(a, self.splat_m64x2(m64::new(true)))
1165 }
1166
1167 #[inline(always)]
1169 pub fn not_m8x16(self, a: m8x16) -> m8x16 {
1170 self.xor_m8x16(a, self.splat_m8x16(m8::new(true)))
1171 }
1172
1173 #[inline(always)]
1175 pub fn not_u16x8(self, a: u16x8) -> u16x8 {
1176 self.xor_u16x8(a, self.splat_u16x8(!0))
1177 }
1178
1179 #[inline(always)]
1181 pub fn not_u32x4(self, a: u32x4) -> u32x4 {
1182 self.xor_u32x4(a, self.splat_u32x4(!0))
1183 }
1184
1185 #[inline(always)]
1187 pub fn not_u64x2(self, a: u64x2) -> u64x2 {
1188 self.xor_u64x2(a, self.splat_u64x2(!0))
1189 }
1190
1191 #[inline(always)]
1193 pub fn not_u8x16(self, a: u8x16) -> u8x16 {
1194 self.xor_u8x16(a, self.splat_u8x16(!0))
1195 }
1196
1197 #[inline(always)]
1199 pub fn or_f32x4(self, a: f32x4, b: f32x4) -> f32x4 {
1200 cast!(self.sse._mm_or_ps(cast!(a), cast!(b)))
1201 }
1202
1203 #[inline(always)]
1205 pub fn or_f64x2(self, a: f64x2, b: f64x2) -> f64x2 {
1206 cast!(self.sse2._mm_or_pd(cast!(a), cast!(b)))
1207 }
1208
1209 #[inline(always)]
1211 pub fn or_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
1212 cast!(self.sse2._mm_or_si128(cast!(a), cast!(b)))
1213 }
1214
1215 #[inline(always)]
1217 pub fn or_i32x4(self, a: i32x4, b: i32x4) -> i32x4 {
1218 cast!(self.sse2._mm_or_si128(cast!(a), cast!(b)))
1219 }
1220
1221 #[inline(always)]
1223 pub fn or_i64x2(self, a: i64x2, b: i64x2) -> i64x2 {
1224 cast!(self.sse2._mm_or_si128(cast!(a), cast!(b)))
1225 }
1226
1227 #[inline(always)]
1229 pub fn or_i8x16(self, a: i8x16, b: i8x16) -> i8x16 {
1230 cast!(self.sse2._mm_or_si128(cast!(a), cast!(b)))
1231 }
1232
1233 #[inline(always)]
1235 pub fn or_m16x8(self, a: m16x8, b: m16x8) -> m16x8 {
1236 cast!(self.sse2._mm_or_si128(cast!(a), cast!(b)))
1237 }
1238
1239 #[inline(always)]
1241 pub fn or_m32x4(self, a: m32x4, b: m32x4) -> m32x4 {
1242 cast!(self.sse2._mm_or_si128(cast!(a), cast!(b)))
1243 }
1244
1245 #[inline(always)]
1247 pub fn or_m64x2(self, a: m64x2, b: m64x2) -> m64x2 {
1248 cast!(self.sse2._mm_or_si128(cast!(a), cast!(b)))
1249 }
1250
1251 #[inline(always)]
1253 pub fn or_m8x16(self, a: m8x16, b: m8x16) -> m8x16 {
1254 cast!(self.sse2._mm_or_si128(cast!(a), cast!(b)))
1255 }
1256
1257 #[inline(always)]
1259 pub fn or_u16x8(self, a: u16x8, b: u16x8) -> u16x8 {
1260 cast!(self.sse2._mm_or_si128(cast!(a), cast!(b)))
1261 }
1262
1263 #[inline(always)]
1265 pub fn or_u32x4(self, a: u32x4, b: u32x4) -> u32x4 {
1266 cast!(self.sse2._mm_or_si128(cast!(a), cast!(b)))
1267 }
1268
1269 #[inline(always)]
1271 pub fn or_u64x2(self, a: u64x2, b: u64x2) -> u64x2 {
1272 cast!(self.sse2._mm_or_si128(cast!(a), cast!(b)))
1273 }
1274
1275 #[inline(always)]
1277 pub fn or_u8x16(self, a: u8x16, b: u8x16) -> u8x16 {
1278 cast!(self.sse2._mm_or_si128(cast!(a), cast!(b)))
1279 }
1280
1281 #[inline(always)]
1285 pub fn pack_with_signed_saturation_i16x8(self, a: i16x8, b: i16x8) -> i8x16 {
1286 cast!(self.sse2._mm_packs_epi16(cast!(a), cast!(b)))
1287 }
1288
1289 #[inline(always)]
1293 pub fn pack_with_signed_saturation_i32x4(self, a: i32x4, b: i32x4) -> i16x8 {
1294 cast!(self.sse2._mm_packs_epi32(cast!(a), cast!(b)))
1295 }
1296
1297 #[inline(always)]
1301 pub fn pack_with_unsigned_saturation_i16x8(self, a: i16x8, b: i16x8) -> u8x16 {
1302 cast!(self.sse2._mm_packus_epi16(cast!(a), cast!(b)))
1303 }
1304
1305 #[inline(always)]
1309 pub fn pack_with_unsigned_saturation_i32x4(self, a: i32x4, b: i32x4) -> u16x8 {
1310 cast!(self.sse4_1._mm_packus_epi32(cast!(a), cast!(b)))
1311 }
1312
1313 #[inline(always)]
1314 pub fn reduce_max_c32x2(self, a: f32x4) -> c32 {
1315 let a: __m128 = cast!(a);
1317 let hi = self.sse._mm_movehl_ps(a, a);
1319
1320 let r0 = self.sse._mm_max_ps(a, hi);
1322
1323 cast!(self.sse2._mm_cvtsd_f64(cast!(r0)))
1324 }
1325
1326 #[inline(always)]
1327 pub fn reduce_max_c64x1(self, a: f64x2) -> c64 {
1328 cast!(a)
1329 }
1330
1331 #[inline(always)]
1332 pub fn reduce_max_f32x4(self, a: f32x4) -> f32 {
1333 let a: __m128 = cast!(a);
1334 let hi = self.sse._mm_movehl_ps(a, a);
1335 let r0 = self.sse._mm_max_ps(a, hi);
1336 let r0_shuffled = self.sse._mm_shuffle_ps::<0b0001>(r0, r0);
1337 let r = self.sse._mm_max_ss(r0, r0_shuffled);
1338 self.sse._mm_cvtss_f32(r)
1339 }
1340
1341 #[inline(always)]
1342 pub fn reduce_max_f64x2(self, a: f64x2) -> f64 {
1343 let a: __m128d = cast!(a);
1344 let hi = cast!(self.sse._mm_movehl_ps(cast!(a), cast!(a)));
1345 let r = self.sse2._mm_max_sd(a, hi);
1346 self.sse2._mm_cvtsd_f64(r)
1347 }
1348
1349 #[inline(always)]
1350 pub fn reduce_min_c32x2(self, a: f32x4) -> c32 {
1351 let a: __m128 = cast!(a);
1353 let hi = self.sse._mm_movehl_ps(a, a);
1355
1356 let r0 = self.sse._mm_min_ps(a, hi);
1358
1359 cast!(self.sse2._mm_cvtsd_f64(cast!(r0)))
1360 }
1361
1362 #[inline(always)]
1363 pub fn reduce_min_c64x1(self, a: f64x2) -> c64 {
1364 cast!(a)
1365 }
1366
1367 #[inline(always)]
1368 pub fn reduce_min_f32x4(self, a: f32x4) -> f32 {
1369 let a: __m128 = cast!(a);
1370 let hi = self.sse._mm_movehl_ps(a, a);
1371 let r0 = self.sse._mm_min_ps(a, hi);
1372 let r0_shuffled = self.sse._mm_shuffle_ps::<0b0001>(r0, r0);
1373 let r = self.sse._mm_min_ss(r0, r0_shuffled);
1374 self.sse._mm_cvtss_f32(r)
1375 }
1376
1377 #[inline(always)]
1378 pub fn reduce_min_f64x2(self, a: f64x2) -> f64 {
1379 let a: __m128d = cast!(a);
1380 let hi = cast!(self.sse._mm_movehl_ps(cast!(a), cast!(a)));
1381 let r = self.sse2._mm_min_sd(a, hi);
1382 self.sse2._mm_cvtsd_f64(r)
1383 }
1384
1385 #[inline(always)]
1386 pub fn reduce_product_f32x4(self, a: f32x4) -> f32 {
1387 let a: __m128 = cast!(a);
1388 let hi = self.sse._mm_movehl_ps(a, a);
1389 let r0 = self.sse._mm_mul_ps(a, hi);
1390 let r0_shuffled = self.sse._mm_shuffle_ps::<0b0001>(r0, r0);
1391 let r = self.sse._mm_mul_ss(r0, r0_shuffled);
1392 self.sse._mm_cvtss_f32(r)
1393 }
1394
1395 #[inline(always)]
1396 pub fn reduce_product_f64x2(self, a: f64x2) -> f64 {
1397 let a: __m128d = cast!(a);
1398 let hi = cast!(self.sse._mm_movehl_ps(cast!(a), cast!(a)));
1399 let r = self.sse2._mm_mul_sd(a, hi);
1400 self.sse2._mm_cvtsd_f64(r)
1401 }
1402
1403 #[inline(always)]
1404 pub fn reduce_sum_c32x2(self, a: f32x4) -> c32 {
1405 let a: __m128 = cast!(a);
1407 let hi = self.sse._mm_movehl_ps(a, a);
1409
1410 let r0 = self.sse._mm_add_ps(a, hi);
1412
1413 cast!(self.sse2._mm_cvtsd_f64(cast!(r0)))
1414 }
1415
1416 #[inline(always)]
1417 pub fn reduce_sum_c64x1(self, a: f64x2) -> c64 {
1418 cast!(a)
1419 }
1420
1421 #[inline(always)]
1422 pub fn reduce_sum_f32x4(self, a: f32x4) -> f32 {
1423 let a: __m128 = cast!(a);
1425 let hi = self.sse._mm_movehl_ps(a, a);
1427
1428 let r0 = self.sse._mm_add_ps(a, hi);
1430 let r0_shuffled = self.sse._mm_shuffle_ps::<0b0001>(r0, r0);
1432
1433 let r = self.sse._mm_add_ss(r0, r0_shuffled);
1434
1435 self.sse._mm_cvtss_f32(r)
1436 }
1437
1438 #[inline(always)]
1439 pub fn reduce_sum_f64x2(self, a: f64x2) -> f64 {
1440 let a: __m128d = cast!(a);
1441 let hi = cast!(self.sse._mm_movehl_ps(cast!(a), cast!(a)));
1442 let r = self.sse2._mm_add_sd(a, hi);
1443 self.sse2._mm_cvtsd_f64(r)
1444 }
1445
1446 #[inline(always)]
1449 pub fn round_f32x4(self, a: f32x4) -> f32x4 {
1450 const ROUNDING: i32 = _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
1451 cast!(self.sse4_1._mm_round_ps::<ROUNDING>(cast!(a)))
1452 }
1453
1454 #[inline(always)]
1457 pub fn round_f64x2(self, a: f64x2) -> f64x2 {
1458 const ROUNDING: i32 = _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC;
1459 cast!(self.sse4_1._mm_round_pd::<ROUNDING>(cast!(a)))
1460 }
1461
1462 #[inline(always)]
1464 pub fn saturating_add_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
1465 cast!(self.sse2._mm_adds_epi16(cast!(a), cast!(b)))
1466 }
1467
1468 #[inline(always)]
1470 pub fn saturating_add_i8x16(self, a: i8x16, b: i8x16) -> i8x16 {
1471 cast!(self.sse2._mm_adds_epi8(cast!(a), cast!(b)))
1472 }
1473
1474 #[inline(always)]
1476 pub fn saturating_add_u16x8(self, a: u16x8, b: u16x8) -> u16x8 {
1477 cast!(self.sse2._mm_adds_epu16(cast!(a), cast!(b)))
1478 }
1479
1480 #[inline(always)]
1482 pub fn saturating_add_u8x16(self, a: u8x16, b: u8x16) -> u8x16 {
1483 cast!(self.sse2._mm_adds_epu8(cast!(a), cast!(b)))
1484 }
1485
1486 #[inline(always)]
1488 pub fn saturating_sub_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
1489 cast!(self.sse2._mm_subs_epi16(cast!(a), cast!(b)))
1490 }
1491
1492 #[inline(always)]
1494 pub fn saturating_sub_i8x16(self, a: i8x16, b: i8x16) -> i8x16 {
1495 cast!(self.sse2._mm_subs_epi8(cast!(a), cast!(b)))
1496 }
1497
1498 #[inline(always)]
1500 pub fn saturating_sub_u16x8(self, a: u16x8, b: u16x8) -> u16x8 {
1501 cast!(self.sse2._mm_subs_epu16(cast!(a), cast!(b)))
1502 }
1503
1504 #[inline(always)]
1506 pub fn saturating_sub_u8x16(self, a: u8x16, b: u8x16) -> u8x16 {
1507 cast!(self.sse2._mm_subs_epu8(cast!(a), cast!(b)))
1508 }
1509
1510 #[inline(always)]
1513 pub fn select_const_f32x4<const MASK4: i32>(self, if_true: f32x4, if_false: f32x4) -> f32x4 {
1514 cast!(self.select_const_u32x4::<MASK4>(cast!(if_true), cast!(if_false)))
1515 }
1516
1517 #[inline(always)]
1520 pub fn select_const_f64x2<const MASK2: i32>(self, if_true: f64x2, if_false: f64x2) -> f64x2 {
1521 cast!(self.select_const_u64x2::<MASK2>(cast!(if_true), cast!(if_false)))
1522 }
1523
1524 #[inline(always)]
1527 pub fn select_const_i32x4<const MASK4: i32>(self, if_true: i32x4, if_false: i32x4) -> i32x4 {
1528 cast!(self.select_const_u32x4::<MASK4>(cast!(if_true), cast!(if_false)))
1529 }
1530
1531 #[inline(always)]
1534 pub fn select_const_i64x2<const MASK2: i32>(self, if_true: i64x2, if_false: i64x2) -> i64x2 {
1535 cast!(self.select_const_u64x2::<MASK2>(cast!(if_true), cast!(if_false)))
1536 }
1537
1538 #[inline(always)]
1541 pub fn select_const_u32x4<const MASK4: i32>(self, if_true: u32x4, if_false: u32x4) -> u32x4 {
1542 cast!(
1543 self.sse4_1
1544 ._mm_blend_ps::<MASK4>(cast!(if_false), cast!(if_true)),
1545 )
1546 }
1547
1548 #[inline(always)]
1551 pub fn select_const_u64x2<const MASK2: i32>(self, if_true: u64x2, if_false: u64x2) -> u64x2 {
1552 cast!(
1553 self.sse4_1
1554 ._mm_blend_pd::<MASK2>(cast!(if_false), cast!(if_true)),
1555 )
1556 }
1557
1558 #[inline(always)]
1561 pub fn select_f32x4(self, mask: m32x4, if_true: f32x4, if_false: f32x4) -> f32x4 {
1562 cast!(
1563 self.sse4_1
1564 ._mm_blendv_ps(cast!(if_false), cast!(if_true), cast!(mask)),
1565 )
1566 }
1567
1568 #[inline(always)]
1571 pub fn select_f64x2(self, mask: m64x2, if_true: f64x2, if_false: f64x2) -> f64x2 {
1572 cast!(
1573 self.sse4_1
1574 ._mm_blendv_pd(cast!(if_false), cast!(if_true), cast!(mask)),
1575 )
1576 }
1577
1578 #[inline(always)]
1581 pub fn select_i16x8(self, mask: m16x8, if_true: i16x8, if_false: i16x8) -> i16x8 {
1582 cast!(self.select_u16x8(mask, cast!(if_true), cast!(if_false)))
1583 }
1584
1585 #[inline(always)]
1588 pub fn select_i32x4(self, mask: m32x4, if_true: i32x4, if_false: i32x4) -> i32x4 {
1589 cast!(self.select_u32x4(mask, cast!(if_true), cast!(if_false)))
1590 }
1591
1592 #[inline(always)]
1595 pub fn select_i64x2(self, mask: m64x2, if_true: i64x2, if_false: i64x2) -> i64x2 {
1596 cast!(self.select_u64x2(mask, cast!(if_true), cast!(if_false)))
1597 }
1598
1599 #[inline(always)]
1602 pub fn select_i8x16(self, mask: m8x16, if_true: i8x16, if_false: i8x16) -> i8x16 {
1603 cast!(self.select_u8x16(mask, cast!(if_true), cast!(if_false)))
1604 }
1605
1606 #[inline(always)]
1609 pub fn select_u16x8(self, mask: m16x8, if_true: u16x8, if_false: u16x8) -> u16x8 {
1610 cast!(
1611 self.sse4_1
1612 ._mm_blendv_epi8(cast!(if_false), cast!(if_true), cast!(mask)),
1613 )
1614 }
1615
1616 #[inline(always)]
1619 pub fn select_u32x4(self, mask: m32x4, if_true: u32x4, if_false: u32x4) -> u32x4 {
1620 cast!(
1621 self.sse4_1
1622 ._mm_blendv_epi8(cast!(if_false), cast!(if_true), cast!(mask)),
1623 )
1624 }
1625
1626 #[inline(always)]
1629 pub fn select_u64x2(self, mask: m64x2, if_true: u64x2, if_false: u64x2) -> u64x2 {
1630 cast!(
1631 self.sse4_1
1632 ._mm_blendv_epi8(cast!(if_false), cast!(if_true), cast!(mask)),
1633 )
1634 }
1635
1636 #[inline(always)]
1639 pub fn select_u8x16(self, mask: m8x16, if_true: u8x16, if_false: u8x16) -> u8x16 {
1640 cast!(
1641 self.sse4_1
1642 ._mm_blendv_epi8(cast!(if_false), cast!(if_true), cast!(mask)),
1643 )
1644 }
1645
1646 #[inline(always)]
1649 pub fn shl_const_i16x8<const AMOUNT: i32>(self, a: i16x8) -> i16x8 {
1650 cast!(self.sse2._mm_slli_epi16::<AMOUNT>(cast!(a)))
1651 }
1652
1653 #[inline(always)]
1656 pub fn shl_const_i32x4<const AMOUNT: i32>(self, a: i32x4) -> i32x4 {
1657 cast!(self.sse2._mm_slli_epi32::<AMOUNT>(cast!(a)))
1658 }
1659
1660 #[inline(always)]
1663 pub fn shl_const_i64x2<const AMOUNT: i32>(self, a: i64x2) -> i64x2 {
1664 cast!(self.sse2._mm_slli_epi64::<AMOUNT>(cast!(a)))
1665 }
1666
1667 #[inline(always)]
1670 pub fn shl_const_u16x8<const AMOUNT: i32>(self, a: u16x8) -> u16x8 {
1671 cast!(self.sse2._mm_slli_epi16::<AMOUNT>(cast!(a)))
1672 }
1673
1674 #[inline(always)]
1677 pub fn shl_const_u32x4<const AMOUNT: i32>(self, a: u32x4) -> u32x4 {
1678 cast!(self.sse2._mm_slli_epi32::<AMOUNT>(cast!(a)))
1679 }
1680
1681 #[inline(always)]
1684 pub fn shl_const_u64x2<const AMOUNT: i32>(self, a: u64x2) -> u64x2 {
1685 cast!(self.sse2._mm_slli_epi64::<AMOUNT>(cast!(a)))
1686 }
1687
1688 #[inline(always)]
1692 pub fn shl_i16x8(self, a: i16x8, amount: u64x2) -> i16x8 {
1693 cast!(self.sse2._mm_sll_epi16(cast!(a), cast!(amount)))
1694 }
1695
1696 #[inline(always)]
1700 pub fn shl_i32x4(self, a: i32x4, amount: u64x2) -> i32x4 {
1701 cast!(self.sse2._mm_sll_epi32(cast!(a), cast!(amount)))
1702 }
1703
1704 #[inline(always)]
1708 pub fn shl_i64x2(self, a: i64x2, amount: u64x2) -> u64x2 {
1709 cast!(self.sse2._mm_sll_epi64(cast!(a), cast!(amount)))
1710 }
1711
1712 #[inline(always)]
1716 pub fn shl_u16x8(self, a: u16x8, amount: u64x2) -> u16x8 {
1717 cast!(self.sse2._mm_sll_epi16(cast!(a), cast!(amount)))
1718 }
1719
1720 #[inline(always)]
1724 pub fn shl_u32x4(self, a: u32x4, amount: u64x2) -> u32x4 {
1725 cast!(self.sse2._mm_sll_epi32(cast!(a), cast!(amount)))
1726 }
1727
1728 #[inline(always)]
1732 pub fn shl_u64x2(self, a: u64x2, amount: u64x2) -> u64x2 {
1733 cast!(self.sse2._mm_sll_epi64(cast!(a), cast!(amount)))
1734 }
1735
1736 #[inline(always)]
1740 pub fn shr_const_i16x8<const AMOUNT: i32>(self, a: i16x8) -> i16x8 {
1741 cast!(self.sse2._mm_srai_epi16::<AMOUNT>(cast!(a)))
1742 }
1743
1744 #[inline(always)]
1748 pub fn shr_const_i32x4<const AMOUNT: i32>(self, a: i32x4) -> i32x4 {
1749 cast!(self.sse2._mm_srai_epi32::<AMOUNT>(cast!(a)))
1750 }
1751
1752 #[inline(always)]
1755 pub fn shr_const_u16x8<const AMOUNT: i32>(self, a: u16x8) -> u16x8 {
1756 cast!(self.sse2._mm_srli_epi16::<AMOUNT>(cast!(a)))
1757 }
1758
1759 #[inline(always)]
1762 pub fn shr_const_u32x4<const AMOUNT: i32>(self, a: u32x4) -> u32x4 {
1763 cast!(self.sse2._mm_srli_epi32::<AMOUNT>(cast!(a)))
1764 }
1765
1766 #[inline(always)]
1769 pub fn shr_const_u64x2<const AMOUNT: i32>(self, a: u64x2) -> u64x2 {
1770 cast!(self.sse2._mm_srli_epi64::<AMOUNT>(cast!(a)))
1771 }
1772
1773 #[inline(always)]
1778 pub fn shr_i16x8(self, a: i16x8, amount: u64x2) -> i16x8 {
1779 cast!(self.sse2._mm_sra_epi16(cast!(a), cast!(amount)))
1780 }
1781
1782 #[inline(always)]
1787 pub fn shr_i32x4(self, a: i32x4, amount: u64x2) -> i32x4 {
1788 cast!(self.sse2._mm_sra_epi32(cast!(a), cast!(amount)))
1789 }
1790
1791 #[inline(always)]
1795 pub fn shr_u16x8(self, a: u16x8, amount: u64x2) -> u16x8 {
1796 cast!(self.sse2._mm_srl_epi16(cast!(a), cast!(amount)))
1797 }
1798
1799 #[inline(always)]
1803 pub fn shr_u32x4(self, a: u32x4, amount: u64x2) -> u32x4 {
1804 cast!(self.sse2._mm_srl_epi32(cast!(a), cast!(amount)))
1805 }
1806
1807 #[inline(always)]
1811 pub fn shr_u64x2(self, a: u64x2, amount: u64x2) -> u64x2 {
1812 cast!(self.sse2._mm_srl_epi64(cast!(a), cast!(amount)))
1813 }
1814
1815 #[inline(always)]
1817 pub fn splat_f32x4(self, value: f32) -> f32x4 {
1818 cast!(self.sse._mm_set1_ps(value))
1819 }
1820
1821 #[inline(always)]
1823 pub fn splat_f64x2(self, value: f64) -> f64x2 {
1824 cast!(self.sse2._mm_set1_pd(value))
1825 }
1826
1827 #[inline(always)]
1829 pub fn splat_i16x8(self, value: i16) -> i16x8 {
1830 cast!(self.sse2._mm_set1_epi16(value))
1831 }
1832
1833 #[inline(always)]
1835 pub fn splat_i32x4(self, value: i32) -> i32x4 {
1836 cast!(self.sse2._mm_set1_epi32(value))
1837 }
1838
1839 #[inline(always)]
1841 pub fn splat_i64x2(self, value: i64) -> i64x2 {
1842 cast!(self.sse2._mm_set1_epi64x(value))
1843 }
1844
1845 #[inline(always)]
1847 pub fn splat_i8x16(self, value: i8) -> i8x16 {
1848 cast!(self.sse2._mm_set1_epi8(value))
1849 }
1850
1851 #[inline(always)]
1853 pub fn splat_m16x8(self, value: m16) -> m16x8 {
1854 cast!(self.sse2._mm_set1_epi16(value.0 as i16))
1855 }
1856
1857 #[inline(always)]
1859 pub fn splat_m32x4(self, value: m32) -> m32x4 {
1860 cast!(self.sse2._mm_set1_epi32(value.0 as i32))
1861 }
1862
1863 #[inline(always)]
1865 pub fn splat_m64x2(self, value: m64) -> m64x2 {
1866 cast!(self.sse2._mm_set1_epi64x(value.0 as i64))
1867 }
1868
1869 #[inline(always)]
1871 pub fn splat_m8x16(self, value: m8) -> m8x16 {
1872 cast!(self.sse2._mm_set1_epi8(value.0 as i8))
1873 }
1874
1875 #[inline(always)]
1877 pub fn splat_u16x8(self, value: u16) -> u16x8 {
1878 cast!(self.sse2._mm_set1_epi16(value as i16))
1879 }
1880
1881 #[inline(always)]
1883 pub fn splat_u32x4(self, value: u32) -> u32x4 {
1884 cast!(self.sse2._mm_set1_epi32(value as i32))
1885 }
1886
1887 #[inline(always)]
1889 pub fn splat_u64x2(self, value: u64) -> u64x2 {
1890 cast!(self.sse2._mm_set1_epi64x(value as i64))
1891 }
1892
1893 #[inline(always)]
1895 pub fn splat_u8x16(self, value: u8) -> u8x16 {
1896 cast!(self.sse2._mm_set1_epi8(value as i8))
1897 }
1898
1899 #[inline(always)]
1901 pub fn sqrt_f32x4(self, a: f32x4) -> f32x4 {
1902 cast!(self.sse._mm_sqrt_ps(cast!(a)))
1903 }
1904
1905 #[inline(always)]
1907 pub fn sqrt_f64x2(self, a: f64x2) -> f64x2 {
1908 cast!(self.sse2._mm_sqrt_pd(cast!(a)))
1909 }
1910
1911 #[inline(always)]
1913 pub fn sub_f32x4(self, a: f32x4, b: f32x4) -> f32x4 {
1914 cast!(self.sse._mm_sub_ps(cast!(a), cast!(b)))
1915 }
1916
1917 #[inline(always)]
1919 pub fn sub_f64x2(self, a: f64x2, b: f64x2) -> f64x2 {
1920 cast!(self.sse2._mm_sub_pd(cast!(a), cast!(b)))
1921 }
1922
1923 #[inline(always)]
1925 pub fn subadd_f32x4(self, a: f32x4, b: f32x4) -> f32x4 {
1926 cast!(self.sse3._mm_addsub_ps(cast!(a), cast!(b)))
1927 }
1928
1929 #[inline(always)]
1931 pub fn subadd_f64x2(self, a: f64x2, b: f64x2) -> f64x2 {
1932 cast!(self.sse3._mm_addsub_pd(cast!(a), cast!(b)))
1933 }
1934
1935 #[inline(always)]
1939 pub fn sum_of_absolute_differences_u8x16(self, a: u8x16, b: u8x16) -> u64x2 {
1940 cast!(self.sse2._mm_sad_epu8(cast!(a), cast!(b)))
1941 }
1942
1943 #[inline(always)]
1945 pub fn truncate_f32x4(self, a: f32x4) -> f32x4 {
1946 const ROUNDING: i32 = _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
1947 cast!(self.sse4_1._mm_round_ps::<ROUNDING>(cast!(a)))
1948 }
1949
1950 #[inline(always)]
1952 pub fn truncate_f64x2(self, a: f64x2) -> f64x2 {
1953 const ROUNDING: i32 = _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC;
1954 cast!(self.sse4_1._mm_round_pd::<ROUNDING>(cast!(a)))
1955 }
1956
1957 #[inline(always)]
1959 pub fn unsigned_abs_i16x8(self, a: i16x8) -> u16x8 {
1960 cast!(self.ssse3._mm_abs_epi16(cast!(a)))
1961 }
1962
1963 #[inline(always)]
1965 pub fn unsigned_abs_i32x4(self, a: i32x4) -> u32x4 {
1966 cast!(self.ssse3._mm_abs_epi32(cast!(a)))
1967 }
1968
1969 #[inline(always)]
1971 pub fn unsigned_abs_i8x16(self, a: i8x16) -> u8x16 {
1972 cast!(self.ssse3._mm_abs_epi8(cast!(a)))
1973 }
1974
1975 #[inline(always)]
1978 pub fn widening_mul_i16x8(self, a: i16x8, b: i16x8) -> (i16x8, i16x8) {
1979 (
1980 cast!(self.sse2._mm_mullo_epi16(cast!(a), cast!(b))),
1981 cast!(self.sse2._mm_mulhi_epi16(cast!(a), cast!(b))),
1982 )
1983 }
1984
1985 #[inline(always)]
1988 pub fn widening_mul_i32x4(self, a: i32x4, b: i32x4) -> (i32x4, i32x4) {
1989 let a = cast!(a);
1990 let b = cast!(b);
1991 let sse = self.sse2;
1992
1993 let ab_evens = self.sse4_1._mm_mul_epi32(a, b);
1995 let ab_odds = self
1997 .sse4_1
1998 ._mm_mul_epi32(sse._mm_srli_epi64::<32>(a), sse._mm_srli_epi64::<32>(b));
1999
2000 let ab_lo = self.sse4_1._mm_blend_ps::<0b1010>(
2001 cast!(ab_evens),
2003 cast!(sse._mm_slli_epi64::<32>(ab_odds)),
2005 );
2006 let ab_hi = self.sse4_1._mm_blend_ps::<0b1010>(
2007 cast!(sse._mm_srli_epi64::<32>(ab_evens)),
2009 cast!(ab_odds),
2011 );
2012
2013 (cast!(ab_lo), cast!(ab_hi))
2014 }
2015
2016 #[inline(always)]
2019 pub fn widening_mul_u16x8(self, a: u16x8, b: u16x8) -> (u16x8, u16x8) {
2020 (
2021 cast!(self.sse2._mm_mullo_epi16(cast!(a), cast!(b))),
2022 cast!(self.sse2._mm_mulhi_epu16(cast!(a), cast!(b))),
2023 )
2024 }
2025
2026 #[inline(always)]
2029 pub fn widening_mul_u32x4(self, a: u32x4, b: u32x4) -> (u32x4, u32x4) {
2030 let a = cast!(a);
2031 let b = cast!(b);
2032 let sse = self.sse2;
2033
2034 let ab_evens = sse._mm_mul_epu32(a, b);
2036 let ab_odds = sse._mm_mul_epu32(sse._mm_srli_epi64::<32>(a), sse._mm_srli_epi64::<32>(b));
2038
2039 let ab_lo = self.sse4_1._mm_blend_ps::<0b1010>(
2040 cast!(ab_evens),
2042 cast!(sse._mm_slli_epi64::<32>(ab_odds)),
2044 );
2045 let ab_hi = self.sse4_1._mm_blend_ps::<0b1010>(
2046 cast!(sse._mm_srli_epi64::<32>(ab_evens)),
2048 cast!(ab_odds),
2050 );
2051
2052 (cast!(ab_lo), cast!(ab_hi))
2053 }
2054
2055 #[inline(always)]
2057 pub fn wrapping_add_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
2058 cast!(self.sse2._mm_add_epi16(cast!(a), cast!(b)))
2059 }
2060
2061 #[inline(always)]
2063 pub fn wrapping_add_i32x4(self, a: i32x4, b: i32x4) -> i32x4 {
2064 cast!(self.sse2._mm_add_epi32(cast!(a), cast!(b)))
2065 }
2066
2067 #[inline(always)]
2069 pub fn wrapping_add_i64x2(self, a: i64x2, b: i64x2) -> i64x2 {
2070 cast!(self.sse2._mm_add_epi64(cast!(a), cast!(b)))
2071 }
2072
2073 #[inline(always)]
2075 pub fn wrapping_add_i8x16(self, a: i8x16, b: i8x16) -> i8x16 {
2076 cast!(self.sse2._mm_add_epi8(cast!(a), cast!(b)))
2077 }
2078
2079 #[inline(always)]
2081 pub fn wrapping_add_u16x8(self, a: u16x8, b: u16x8) -> u16x8 {
2082 cast!(self.sse2._mm_add_epi16(cast!(a), cast!(b)))
2083 }
2084
2085 #[inline(always)]
2087 pub fn wrapping_add_u32x4(self, a: u32x4, b: u32x4) -> u32x4 {
2088 cast!(self.sse2._mm_add_epi32(cast!(a), cast!(b)))
2089 }
2090
2091 #[inline(always)]
2093 pub fn wrapping_add_u64x2(self, a: u64x2, b: u64x2) -> u64x2 {
2094 cast!(self.sse2._mm_add_epi64(cast!(a), cast!(b)))
2095 }
2096
2097 #[inline(always)]
2099 pub fn wrapping_add_u8x16(self, a: u8x16, b: u8x16) -> u8x16 {
2100 cast!(self.sse2._mm_add_epi8(cast!(a), cast!(b)))
2101 }
2102
2103 #[inline(always)]
2105 pub fn wrapping_mul_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
2106 cast!(self.sse2._mm_mullo_epi16(cast!(a), cast!(b)))
2107 }
2108
2109 #[inline(always)]
2111 pub fn wrapping_mul_i32x4(self, a: i32x4, b: i32x4) -> i32x4 {
2112 cast!(self.sse4_1._mm_mullo_epi32(cast!(a), cast!(b)))
2113 }
2114
2115 #[inline(always)]
2117 pub fn wrapping_mul_u16x8(self, a: u16x8, b: u16x8) -> u16x8 {
2118 cast!(self.sse2._mm_mullo_epi16(cast!(a), cast!(b)))
2119 }
2120
2121 #[inline(always)]
2123 pub fn wrapping_mul_u32x4(self, a: u32x4, b: u32x4) -> u32x4 {
2124 cast!(self.sse4_1._mm_mullo_epi32(cast!(a), cast!(b)))
2125 }
2126
2127 #[inline(always)]
2129 pub fn wrapping_sub_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
2130 cast!(self.sse2._mm_sub_epi16(cast!(a), cast!(b)))
2131 }
2132
2133 #[inline(always)]
2135 pub fn wrapping_sub_i32x4(self, a: i32x4, b: i32x4) -> i32x4 {
2136 cast!(self.sse2._mm_sub_epi32(cast!(a), cast!(b)))
2137 }
2138
2139 #[inline(always)]
2141 pub fn wrapping_sub_i64x2(self, a: i64x2, b: i64x2) -> i64x2 {
2142 cast!(self.sse2._mm_sub_epi64(cast!(a), cast!(b)))
2143 }
2144
2145 #[inline(always)]
2147 pub fn wrapping_sub_i8x16(self, a: i8x16, b: i8x16) -> i8x16 {
2148 cast!(self.sse2._mm_sub_epi8(cast!(a), cast!(b)))
2149 }
2150
2151 #[inline(always)]
2153 pub fn wrapping_sub_u16x8(self, a: u16x8, b: u16x8) -> u16x8 {
2154 cast!(self.sse2._mm_sub_epi16(cast!(a), cast!(b)))
2155 }
2156
2157 #[inline(always)]
2159 pub fn wrapping_sub_u32x4(self, a: u32x4, b: u32x4) -> u32x4 {
2160 cast!(self.sse2._mm_sub_epi32(cast!(a), cast!(b)))
2161 }
2162
2163 #[inline(always)]
2165 pub fn wrapping_sub_u64x2(self, a: u64x2, b: u64x2) -> u64x2 {
2166 cast!(self.sse2._mm_sub_epi64(cast!(a), cast!(b)))
2167 }
2168
2169 #[inline(always)]
2171 pub fn wrapping_sub_u8x16(self, a: u8x16, b: u8x16) -> u8x16 {
2172 cast!(self.sse2._mm_sub_epi8(cast!(a), cast!(b)))
2173 }
2174
2175 #[inline(always)]
2177 pub fn xor_f32x4(self, a: f32x4, b: f32x4) -> f32x4 {
2178 cast!(self.sse._mm_xor_ps(cast!(a), cast!(b)))
2179 }
2180
2181 #[inline(always)]
2183 pub fn xor_f64x2(self, a: f64x2, b: f64x2) -> f64x2 {
2184 cast!(self.sse2._mm_xor_pd(cast!(a), cast!(b)))
2185 }
2186
2187 #[inline(always)]
2189 pub fn xor_i16x8(self, a: i16x8, b: i16x8) -> i16x8 {
2190 cast!(self.sse2._mm_xor_si128(cast!(a), cast!(b)))
2191 }
2192
2193 #[inline(always)]
2195 pub fn xor_i32x4(self, a: i32x4, b: i32x4) -> i32x4 {
2196 cast!(self.sse2._mm_xor_si128(cast!(a), cast!(b)))
2197 }
2198
2199 #[inline(always)]
2201 pub fn xor_i64x2(self, a: i64x2, b: i64x2) -> i64x2 {
2202 cast!(self.sse2._mm_xor_si128(cast!(a), cast!(b)))
2203 }
2204
2205 #[inline(always)]
2207 pub fn xor_i8x16(self, a: i8x16, b: i8x16) -> i8x16 {
2208 cast!(self.sse2._mm_xor_si128(cast!(a), cast!(b)))
2209 }
2210
2211 #[inline(always)]
2213 pub fn xor_m16x8(self, a: m16x8, b: m16x8) -> m16x8 {
2214 cast!(self.sse2._mm_xor_si128(cast!(a), cast!(b)))
2215 }
2216
2217 #[inline(always)]
2219 pub fn xor_m32x4(self, a: m32x4, b: m32x4) -> m32x4 {
2220 cast!(self.sse2._mm_xor_si128(cast!(a), cast!(b)))
2221 }
2222
2223 #[inline(always)]
2225 pub fn xor_m64x2(self, a: m64x2, b: m64x2) -> m64x2 {
2226 cast!(self.sse2._mm_xor_si128(cast!(a), cast!(b)))
2227 }
2228
2229 #[inline(always)]
2231 pub fn xor_m8x16(self, a: m8x16, b: m8x16) -> m8x16 {
2232 cast!(self.sse2._mm_xor_si128(cast!(a), cast!(b)))
2233 }
2234
2235 #[inline(always)]
2237 pub fn xor_u16x8(self, a: u16x8, b: u16x8) -> u16x8 {
2238 cast!(self.sse2._mm_xor_si128(cast!(a), cast!(b)))
2239 }
2240
2241 #[inline(always)]
2243 pub fn xor_u32x4(self, a: u32x4, b: u32x4) -> u32x4 {
2244 cast!(self.sse2._mm_xor_si128(cast!(a), cast!(b)))
2245 }
2246
2247 #[inline(always)]
2249 pub fn xor_u64x2(self, a: u64x2, b: u64x2) -> u64x2 {
2250 cast!(self.sse2._mm_xor_si128(cast!(a), cast!(b)))
2251 }
2252
2253 #[inline(always)]
2255 pub fn xor_u8x16(self, a: u8x16, b: u8x16) -> u8x16 {
2256 cast!(self.sse2._mm_xor_si128(cast!(a), cast!(b)))
2257 }
2258}