1use super::*;
2use crate::core_arch::x86::Avx2;
3
4#[cfg(target_arch = "x86")]
5use core::arch::x86::*;
6#[cfg(target_arch = "x86_64")]
7use core::arch::x86_64::*;
8
9mod v2;
10mod v3;
11
12#[cfg(feature = "nightly")]
13#[cfg_attr(docsrs, doc(cfg(feature = "nightly")))]
14mod v4;
15
16pub use v2::*;
17pub use v3::*;
18
19#[cfg(feature = "nightly")]
20#[cfg_attr(docsrs, doc(cfg(feature = "nightly")))]
21pub use v4::*;
22
23#[target_feature(enable = "avx,avx2")]
24#[inline]
25unsafe fn avx_ld_u32s(ptr: *const u32, f: unsafe extern "C" fn()) -> u32x8 {
26 let ret: __m256;
27 #[cfg(target_arch = "x86_64")]
28 core::arch::asm! {
29 "lea rcx, [rip + 2f]",
30 "jmp {f}",
31 "2:",
32 f = in(reg) f,
33 in("rax") ptr,
34 out("rcx") _,
35 out("ymm0") ret,
36 out("ymm1") _,
37 };
38
39 #[cfg(target_arch = "x86")]
40 core::arch::asm! {
41 "lea ecx, [eip + 2f]",
42 "jmp {f}",
43 "2:",
44 f = in(reg) f,
45 in("eax") ptr,
46 out("ecx") _,
47 out("ymm0") ret,
48 out("ymm1") _,
49 };
50
51 cast!(ret)
52}
53
54#[target_feature(enable = "avx,avx2")]
55#[inline]
56unsafe fn avx_st_u32s(ptr: *mut u32, value: u32x8, f: unsafe extern "C" fn()) {
57 #[cfg(target_arch = "x86_64")]
58 core::arch::asm! {
59 "lea rcx, [rip + 2f]",
60 "jmp {f}",
61 "2:",
62 f = in(reg) f,
63
64 in("rax") ptr,
65 out("rcx") _,
66 inout("ymm0") cast::<_, __m256>(value) => _,
67 out("ymm1") _,
68 };
69
70 #[cfg(target_arch = "x86")]
71 core::arch::asm! {
72 "lea ecx, [eip + 2f]",
73 "jmp {f}",
74 "2:",
75 f = in(reg) f,
76
77 in("eax") ptr,
78 out("ecx") _,
79 inout("ymm0") cast::<_, __m256>(value) => _,
80 out("ymm1") _,
81 };
82}
83
84#[derive(Debug, Clone, Copy)]
86#[non_exhaustive]
87#[repr(u8)]
88pub enum Arch {
89 Scalar = 0,
90
91 #[cfg(feature = "x86-v3")]
92 #[cfg_attr(docsrs, doc(cfg(feature = "x86-v3")))]
93 V3(V3) = 1,
94
95 #[cfg(feature = "nightly-x86-v4")]
96 #[cfg_attr(docsrs, doc(cfg(feature = "nightly-x86-v4")))]
97 V4(V4) = 2,
98}
99
100impl Arch {
101 #[inline]
103 pub fn new() -> Self {
104 #[cfg(feature = "nightly-x86-v4")]
105 if let Some(simd) = V4::try_new() {
106 return Self::V4(simd);
107 }
108 #[cfg(feature = "x86-v3")]
109 if let Some(simd) = V3::try_new() {
110 return Self::V3(simd);
111 }
112 Self::Scalar
113 }
114
115 #[inline(always)]
117 pub fn dispatch<Op: WithSimd>(self, op: Op) -> Op::Output {
118 match self {
119 #[cfg(feature = "nightly-x86-v4")]
120 Arch::V4(simd) => Simd::vectorize(simd, op),
121 #[cfg(feature = "x86-v3")]
122 Arch::V3(simd) => Simd::vectorize(simd, op),
123
124 Arch::Scalar => Simd::vectorize(Scalar, op),
125 }
126 }
127}
128
129impl Default for Arch {
130 #[inline]
131 fn default() -> Self {
132 Self::new()
133 }
134}
135
136include!(concat!(env!("OUT_DIR"), "/x86_64_asm.rs"));
137
138#[cfg(test)]
139mod tests {
140 extern crate alloc;
141
142 use super::*;
143 use alloc::vec;
144 use alloc::vec::Vec;
145 use assert_approx_eq::assert_approx_eq;
146 use core::iter::zip;
147 use rand::random;
148
149 #[allow(unused_macros)]
150 macro_rules! dbgx {
151 () => {
152 ::std::eprintln!("[{}:{}]", ::std::file!(), ::std::line!())
153 };
154 ($val:expr $(,)?) => {
155 match $val {
156 tmp => {
157 ::std::eprintln!("[{}:{}] {} = {:#X?}",
158 ::std::file!(), ::std::line!(), ::std::stringify!($val), &tmp);
159 tmp
160 }
161 }
162 };
163 ($($val:expr),+ $(,)?) => {
164 ($(dbgx!($val)),+,)
165 };
166 }
167
168 #[test]
169 fn times_two() {
170 let n = 1312;
171 let mut v = (0..n).map(|i| i as f64).collect::<Vec<_>>();
172 let arch = Arch::new();
173
174 struct TimesThree<'a>(&'a mut [f64]);
175 impl WithSimd for TimesThree<'_> {
176 type Output = ();
177
178 #[inline(always)]
179 fn with_simd<S: Simd>(self, simd: S) -> Self::Output {
180 let v = self.0;
181 let (head, tail) = S::as_mut_simd_f64s(v);
182
183 let three = simd.splat_f64s(3.0);
184 for x in head {
185 *x = simd.mul_f64s(three, *x);
186 }
187
188 for x in tail {
189 *x *= 3.0;
190 }
191 }
192 }
193
194 arch.dispatch(|| {
195 for x in &mut v {
196 *x *= 2.0;
197 }
198 });
199
200 arch.dispatch(TimesThree(&mut v));
201
202 for (i, x) in v.into_iter().enumerate() {
203 assert_eq!(x, 6.0 * i as f64);
204 }
205 }
206
207 #[test]
208 fn cplx_ops() {
209 let n = 16;
210 let a = (0..n)
211 .map(|_| c32 {
212 re: random(),
213 im: random(),
214 })
215 .collect::<Vec<_>>();
216 let b = (0..n)
217 .map(|_| c32 {
218 re: random(),
219 im: random(),
220 })
221 .collect::<Vec<_>>();
222 let c = (0..n)
223 .map(|_| c32 {
224 re: random(),
225 im: random(),
226 })
227 .collect::<Vec<_>>();
228
229 let axb_target = zip(&a, &b).map(|(a, b)| a * b).collect::<Vec<_>>();
230 let conjaxb_target = zip(&a, &b).map(|(a, b)| a.conj() * b).collect::<Vec<_>>();
231 let axbpc_target = zip(zip(&a, &b), &c)
232 .map(|((a, b), c)| a * b + c)
233 .collect::<Vec<_>>();
234 let conjaxbpc_target = zip(zip(&a, &b), &c)
235 .map(|((a, b), c)| a.conj() * b + c)
236 .collect::<Vec<_>>();
237
238 if let Some(simd) = V3::try_new() {
239 let mut axb = vec![c32::new(0.0, 0.0); n];
240 let mut conjaxb = vec![c32::new(0.0, 0.0); n];
241 let mut axbpc = vec![c32::new(0.0, 0.0); n];
242 let mut conjaxbpc = vec![c32::new(0.0, 0.0); n];
243
244 {
245 let a = V3::as_simd_c32s(&a).0;
246 let b = V3::as_simd_c32s(&b).0;
247 let c = V3::as_simd_c32s(&c).0;
248 let axb = V3::as_mut_simd_c32s(&mut axb).0;
249 let conjaxb = V3::as_mut_simd_c32s(&mut conjaxb).0;
250 let axbpc = V3::as_mut_simd_c32s(&mut axbpc).0;
251 let conjaxbpc = V3::as_mut_simd_c32s(&mut conjaxbpc).0;
252
253 for (axb, (a, b)) in zip(axb, zip(a, b)) {
254 *axb = simd.mul_e_c32s(*a, *b);
255 }
256 for (conjaxb, (a, b)) in zip(conjaxb, zip(a, b)) {
257 *conjaxb = simd.conj_mul_e_c32s(*a, *b);
258 }
259 for (axbpc, ((a, b), c)) in zip(axbpc, zip(zip(a, b), c)) {
260 *axbpc = simd.mul_add_e_c32s(*a, *b, *c);
261 }
262 for (conjaxbpc, ((a, b), c)) in zip(conjaxbpc, zip(zip(a, b), c)) {
263 *conjaxbpc = simd.conj_mul_add_e_c32s(*a, *b, *c);
264 }
265 }
266
267 for (target, actual) in zip(&axb_target, &axb) {
268 assert_approx_eq!(target.re, actual.re);
269 assert_approx_eq!(target.im, actual.im);
270 }
271 for (target, actual) in zip(&conjaxb_target, &conjaxb) {
272 assert_approx_eq!(target.re, actual.re);
273 assert_approx_eq!(target.im, actual.im);
274 }
275 for (target, actual) in zip(&axbpc_target, &axbpc) {
276 assert_approx_eq!(target.re, actual.re);
277 assert_approx_eq!(target.im, actual.im);
278 }
279 for (target, actual) in zip(&conjaxbpc_target, &conjaxbpc) {
280 assert_approx_eq!(target.re, actual.re);
281 assert_approx_eq!(target.im, actual.im);
282 }
283 }
284
285 #[cfg(feature = "nightly")]
286 if let Some(simd) = V4::try_new() {
287 let mut axb = vec![c32::new(0.0, 0.0); n];
288 let mut conjaxb = vec![c32::new(0.0, 0.0); n];
289 let mut axbpc = vec![c32::new(0.0, 0.0); n];
290 let mut conjaxbpc = vec![c32::new(0.0, 0.0); n];
291
292 {
293 let a = V4::as_simd_c32s(&a).0;
294 let b = V4::as_simd_c32s(&b).0;
295 let c = V4::as_simd_c32s(&c).0;
296 let axb = V4::as_mut_simd_c32s(&mut axb).0;
297 let conjaxb = V4::as_mut_simd_c32s(&mut conjaxb).0;
298 let axbpc = V4::as_mut_simd_c32s(&mut axbpc).0;
299 let conjaxbpc = V4::as_mut_simd_c32s(&mut conjaxbpc).0;
300
301 for (axb, (a, b)) in zip(axb, zip(a, b)) {
302 *axb = simd.mul_e_c32s(*a, *b);
303 }
304 for (conjaxb, (a, b)) in zip(conjaxb, zip(a, b)) {
305 *conjaxb = simd.conj_mul_e_c32s(*a, *b);
306 }
307 for (axbpc, ((a, b), c)) in zip(axbpc, zip(zip(a, b), c)) {
308 *axbpc = simd.mul_add_e_c32s(*a, *b, *c);
309 }
310 for (conjaxbpc, ((a, b), c)) in zip(conjaxbpc, zip(zip(a, b), c)) {
311 *conjaxbpc = simd.conj_mul_add_e_c32s(*a, *b, *c);
312 }
313 }
314
315 for (target, actual) in zip(&axb_target, &axb) {
316 assert_approx_eq!(target.re, actual.re);
317 assert_approx_eq!(target.im, actual.im);
318 }
319 for (target, actual) in zip(&conjaxb_target, &conjaxb) {
320 assert_approx_eq!(target.re, actual.re);
321 assert_approx_eq!(target.im, actual.im);
322 }
323 for (target, actual) in zip(&axbpc_target, &axbpc) {
324 assert_approx_eq!(target.re, actual.re);
325 assert_approx_eq!(target.im, actual.im);
326 }
327 for (target, actual) in zip(&conjaxbpc_target, &conjaxbpc) {
328 assert_approx_eq!(target.re, actual.re);
329 assert_approx_eq!(target.im, actual.im);
330 }
331 }
332 }
333
334 #[test]
335 fn test_to_ref() {
336 let simd_ref = unsafe { V2::new_unchecked() }.to_ref();
337 let _ = *simd_ref;
338 }
339
340 #[test]
341 fn test_widening_mul_u32x4() {
342 if let Some(simd) = V2::try_new() {
343 const N: usize = 4;
344 let a = u32x4(2298413717, 568259975, 2905436181, 175547995);
345 let b = u32x4(2022374205, 1446824162, 3165580604, 3011091403);
346 let a_array: [u32; N] = cast!(a);
347 let b_array: [u32; N] = cast!(b);
348 let mut lo_array = [0u32; N];
349 let mut hi_array = [0u32; N];
350
351 for i in 0..N {
352 let prod = a_array[i] as u64 * b_array[i] as u64;
353 let lo = prod as u32;
354 let hi = (prod >> 32) as u32;
355 lo_array[i] = lo;
356 hi_array[i] = hi;
357 }
358
359 let (lo, hi) = simd.widening_mul_u32x4(a, b);
360 assert_eq!(lo, cast!(lo_array));
361 assert_eq!(hi, cast!(hi_array));
362 }
363 if let Some(simd) = V3::try_new() {
364 const N: usize = 8;
365 let a = u32x8(
366 2298413717, 568259975, 2905436181, 175547995, 2298413717, 568259975, 2905436181,
367 175547995,
368 );
369 let b = u32x8(
370 2022374205, 1446824162, 3165580604, 3011091403, 2022374205, 1446824162, 3165580604,
371 3011091403,
372 );
373 let a_array: [u32; N] = cast!(a);
374 let b_array: [u32; N] = cast!(b);
375 let mut lo_array = [0u32; N];
376 let mut hi_array = [0u32; N];
377
378 for i in 0..N {
379 let prod = a_array[i] as u64 * b_array[i] as u64;
380 let lo = prod as u32;
381 let hi = (prod >> 32) as u32;
382 lo_array[i] = lo;
383 hi_array[i] = hi;
384 }
385
386 let (lo, hi) = simd.widening_mul_u32x8(a, b);
387 assert_eq!(lo, cast!(lo_array));
388 assert_eq!(hi, cast!(hi_array));
389 }
390 }
391
392 #[test]
393 fn test_widening_mul_i32() {
394 if let Some(simd) = V2::try_new() {
395 const N: usize = 4;
396 let a = cast!(u32x4(2298413717, 568259975, 2905436181, 175547995));
397 let b = cast!(u32x4(2022374205, 1446824162, 3165580604, 3011091403));
398
399 let a_array: [i32; N] = cast!(a);
400 let b_array: [i32; N] = cast!(b);
401 let mut lo_array = [0i32; N];
402 let mut hi_array = [0i32; N];
403
404 for i in 0..N {
405 let prod = a_array[i] as i64 * b_array[i] as i64;
406 let lo = prod as i32;
407 let hi = (prod >> 32) as i32;
408 lo_array[i] = lo;
409 hi_array[i] = hi;
410 }
411
412 let (lo, hi) = simd.widening_mul_i32x4(a, b);
413 assert_eq!(lo, cast!(lo_array));
414 assert_eq!(hi, cast!(hi_array));
415 }
416 if let Some(simd) = V3::try_new() {
417 const N: usize = 8;
418 let a = cast!(u32x8(
419 2298413717, 568259975, 2905436181, 175547995, 2298413717, 568259975, 2905436181,
420 175547995,
421 ));
422 let b = cast!(u32x8(
423 2022374205, 1446824162, 3165580604, 3011091403, 2022374205, 1446824162, 3165580604,
424 3011091403,
425 ));
426
427 let a_array: [i32; N] = cast!(a);
428 let b_array: [i32; N] = cast!(b);
429 let mut lo_array = [0i32; N];
430 let mut hi_array = [0i32; N];
431
432 for i in 0..N {
433 let prod = a_array[i] as i64 * b_array[i] as i64;
434 let lo = prod as i32;
435 let hi = (prod >> 32) as i32;
436 lo_array[i] = lo;
437 hi_array[i] = hi;
438 }
439
440 let (lo, hi) = simd.widening_mul_i32x8(a, b);
441 assert_eq!(lo, cast!(lo_array));
442 assert_eq!(hi, cast!(hi_array));
443 }
444 }
445
446 #[test]
447 fn test_shift() {
448 if let Some(simd) = V2::try_new() {
449 let a = u16x8(54911, 46958, 49991, 22366, 46365, 39572, 22704, 60060);
450 assert_eq!(simd.shl_const_u16x8::<16>(a), simd.splat_u16x8(0));
451 assert_eq!(simd.shl_u16x8(a, simd.splat_u64x2(!0)), simd.splat_u16x8(0),);
452 }
453 }
454
455 #[test]
456 fn test_abs() {
457 if let Some(simd) = V2::try_new() {
458 let a = f32x4(1.0, -2.0, -1.0, 2.0);
459 assert_eq!(simd.abs_f32x4(a), f32x4(1.0, 2.0, 1.0, 2.0));
460 let a = f64x2(1.0, -2.0);
461 assert_eq!(simd.abs_f64x2(a), f64x2(1.0, 2.0));
462 }
463 }
464
465 #[test]
466 fn test_subadd() {
467 if let Some(simd) = V2::try_new() {
468 let a = f32x4(1.0, -2.0, -1.0, 2.0);
469 assert_eq!(simd.subadd_f32x4(a, a), f32x4(0.0, -4.0, 0.0, 4.0));
470 }
471 }
472
473 #[test]
474 fn test_signed_to_unsigned() {
475 if let Some(simd) = V2::try_new() {
476 let a = i8x16(1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
477 assert_eq!(simd.convert_i8x16_to_u64x2(a), u64x2(1, !0));
478 }
479 }
480
481 #[test]
482 fn test_int_cmp() {
483 if let Some(simd) = V2::try_new() {
484 {
485 const N: usize = 16;
486
487 let a = u8x16(
488 174, 191, 248, 232, 11, 186, 42, 236, 3, 59, 223, 72, 161, 146, 98, 69,
489 );
490 let b = u8x16(
491 97, 239, 164, 173, 208, 0, 121, 247, 218, 58, 119, 131, 213, 133, 22, 128,
492 );
493 let lt = simd.cmp_lt_u8x16(a, b);
494
495 let a_array: [u8; N] = cast!(a);
496 let b_array: [u8; N] = cast!(b);
497 let mut lt_array = [m8::new(false); N];
498
499 for i in 0..N {
500 lt_array[i] = m8::new(a_array[i] < b_array[i]);
501 }
502
503 assert_eq!(lt, cast!(lt_array));
504 }
505 {
506 const N: usize = 8;
507
508 let a = u16x8(174, 191, 248, 232, 11, 186, 42, 236);
509 let b = u16x8(97, 239, 164, 173, 208, 0, 121, 247);
510 let lt = simd.cmp_lt_u16x8(a, b);
511
512 let a_array: [u16; N] = cast!(a);
513 let b_array: [u16; N] = cast!(b);
514 let mut lt_array = [m16::new(false); N];
515
516 for i in 0..N {
517 lt_array[i] = m16::new(a_array[i] < b_array[i]);
518 }
519
520 assert_eq!(lt, cast!(lt_array));
521 }
522 {
523 const N: usize = 4;
524
525 let a = u32x4(174, 191, 248, 232);
526 let b = u32x4(97, 239, 164, 173);
527 let lt = simd.cmp_lt_u32x4(a, b);
528
529 let a_array: [u32; N] = cast!(a);
530 let b_array: [u32; N] = cast!(b);
531 let mut lt_array = [m32::new(false); N];
532
533 for i in 0..N {
534 lt_array[i] = m32::new(a_array[i] < b_array[i]);
535 }
536
537 assert_eq!(lt, cast!(lt_array));
538 }
539 {
540 const N: usize = 2;
541
542 let a = u64x2(174, 191);
543 let b = u64x2(97, 239);
544 let lt = simd.cmp_lt_u64x2(a, b);
545
546 let a_array: [u64; N] = cast!(a);
547 let b_array: [u64; N] = cast!(b);
548 let mut lt_array = [m64::new(false); N];
549
550 for i in 0..N {
551 lt_array[i] = m64::new(a_array[i] < b_array[i]);
552 }
553
554 assert_eq!(lt, cast!(lt_array));
555 }
556 }
557 }
558
559 #[test]
560 fn test_is_nan() {
561 if let Some(simd) = V2::try_new() {
562 assert_eq!(
563 simd.is_nan_f32x4(f32x4(0.0, f32::NAN, f32::INFINITY, -f32::NAN)),
564 m32x4(
565 m32::new(false),
566 m32::new(true),
567 m32::new(false),
568 m32::new(true),
569 ),
570 );
571 assert_eq!(
572 simd.is_nan_f64x2(f64x2(0.0, f64::NAN)),
573 m64x2(m64::new(false), m64::new(true)),
574 );
575 }
576 }
577
578 #[test]
579 fn test_rotate() {
580 if let Some(simd) = V3::try_new() {
581 for amount in 0..128 {
582 let mut array = [0u32; 8];
583 for (i, dst) in array.iter_mut().enumerate() {
584 *dst = 1000 + i as u32;
585 }
586
587 let rot: [u32; 8] = cast!(simd.rotate_right_u32s(cast!(array), amount));
588 for i in 0..8 {
589 assert_eq!(rot[(i + amount) % 8], array[i]);
590 }
591 }
592 for amount in 0..128 {
593 let mut array = [0u64; 4];
594 for (i, dst) in array.iter_mut().enumerate() {
595 *dst = 1000 + i as u64;
596 }
597
598 let rot: [u64; 4] = cast!(simd.rotate_right_u64s(cast!(array), amount));
599 for i in 0..4 {
600 assert_eq!(rot[(i + amount) % 4], array[i]);
601 }
602 }
603 }
604
605 #[cfg(feature = "nightly")]
606 if let Some(simd) = V4::try_new() {
607 for amount in 0..128 {
608 let mut array = [0u32; 16];
609 for (i, dst) in array.iter_mut().enumerate() {
610 *dst = 1000 + i as u32;
611 }
612
613 let rot: [u32; 16] = cast!(simd.rotate_right_u32s(cast!(array), amount));
614 for i in 0..16 {
615 assert_eq!(rot[(i + amount) % 16], array[i]);
616 }
617 }
618 for amount in 0..128 {
619 let mut array = [0u64; 8];
620 for (i, dst) in array.iter_mut().enumerate() {
621 *dst = 1000 + i as u64;
622 }
623
624 let rot: [u64; 8] = cast!(simd.rotate_right_u64s(cast!(array), amount));
625 for i in 0..8 {
626 assert_eq!(rot[(i + amount) % 8], array[i]);
627 }
628 }
629 }
630 }
631
632 #[test]
633 fn test_partial() {
634 if let Some(simd) = V3::try_new() {
635 for n in 0..=8 {
636 let src = core::array::from_fn::<f32, 8, _>(|i| i as _);
637 let mut dst = [0.0f32; 8];
638 let zero = dst;
639
640 assert_eq!(simd.partial_load_f32s(&src[..n]), unsafe {
641 simd.mask_load_ptr_f32s(simd.mask_between_m32s(0, n as u32), src.as_ptr())
642 });
643 {
644 let src = &src[..n];
645 let dst = &mut dst[..n];
646
647 simd.partial_store_f32s(dst, simd.partial_load_f32s(src));
648
649 assert_eq!(src, dst);
650 }
651 assert_eq!(dst[n..], zero[n..]);
652 }
653 }
654
655 #[cfg(feature = "nightly")]
656 if let Some(simd) = V4::try_new() {
657 for n in 0..=16 {
658 let src = core::array::from_fn::<f32, 16, _>(|i| i as _);
659 let mut dst = [0.0f32; 16];
660 let zero = dst;
661
662 assert_eq!(simd.partial_load_f32s(&src[..n]), unsafe {
663 simd.mask_load_ptr_f32s(simd.mask_between_m32s(0, n as u32), src.as_ptr())
664 });
665
666 {
667 let src = &src[..n];
668 let dst = &mut dst[..n];
669
670 simd.partial_store_f32s(dst, simd.partial_load_f32s(src));
671
672 assert_eq!(src, dst);
673 }
674 assert_eq!(dst[n..], zero[n..]);
675 }
676 }
677 }
678
679 #[test]
680 fn test_interleave() {
681 if let Some(simd) = V3::try_new() {
682 {
683 let src = [f64x4(0.0, 0.1, 1.0, 1.1), f64x4(2.0, 2.1, 3.0, 3.1)];
684 let dst = simd.deinterleave_shfl_f64s(src);
685 assert_eq!(dst[1], simd.add_f64x4(dst[0], simd.splat_f64x4(0.1)));
686 assert_eq!(src, simd.interleave_shfl_f64s(dst));
687 }
688 {
689 let src = [
690 f64x4(0.0, 0.1, 0.2, 0.3),
691 f64x4(1.0, 1.1, 1.2, 1.3),
692 f64x4(2.0, 2.1, 2.2, 2.3),
693 f64x4(3.0, 3.1, 3.2, 3.3),
694 ];
695 let dst = simd.deinterleave_shfl_f64s(src);
696 assert_eq!(dst[1], simd.add_f64x4(dst[0], simd.splat_f64x4(0.1)));
697 assert_eq!(dst[2], simd.add_f64x4(dst[0], simd.splat_f64x4(0.2)));
698 assert_eq!(dst[3], simd.add_f64x4(dst[0], simd.splat_f64x4(0.3)));
699 assert_eq!(src, simd.interleave_shfl_f64s(dst));
700 }
701 {
702 let src = [
703 f32x8(0.0, 0.1, 1.0, 1.1, 2.0, 2.1, 3.0, 3.1),
704 f32x8(4.0, 4.1, 5.0, 5.1, 6.0, 6.1, 7.0, 7.1),
705 ];
706 let dst = simd.deinterleave_shfl_f32s(src);
707 assert_eq!(dst[1], simd.add_f32x8(dst[0], simd.splat_f32x8(0.1)));
708 assert_eq!(src, simd.interleave_shfl_f32s(dst));
709 }
710 {
711 let src = [
712 f32x8(0.0, 0.1, 0.2, 0.3, 1.0, 1.1, 1.2, 1.3),
713 f32x8(2.0, 2.1, 2.2, 2.3, 3.0, 3.1, 3.2, 3.3),
714 f32x8(4.0, 4.1, 4.2, 4.3, 5.0, 5.1, 5.2, 5.3),
715 f32x8(6.0, 6.1, 6.2, 6.3, 7.0, 7.1, 7.2, 7.3),
716 ];
717 let dst = simd.deinterleave_shfl_f32s(src);
718 assert_eq!(dst[1], simd.add_f32x8(dst[0], simd.splat_f32x8(0.1)));
719 assert_eq!(dst[2], simd.add_f32x8(dst[0], simd.splat_f32x8(0.2)));
720 assert_eq!(dst[3], simd.add_f32x8(dst[0], simd.splat_f32x8(0.3)));
721 assert_eq!(src, simd.interleave_shfl_f32s(dst));
722 }
723 }
724 #[cfg(feature = "nightly")]
725 if let Some(simd) = V4::try_new() {
726 {
727 let src = [
728 f64x8(0.0, 0.1, 1.0, 1.1, 2.0, 2.1, 3.0, 3.1),
729 f64x8(4.0, 4.1, 5.0, 5.1, 6.0, 6.1, 7.0, 7.1),
730 ];
731 let dst = simd.deinterleave_shfl_f64s(src);
732 assert_eq!(dst[1], simd.add_f64x8(dst[0], simd.splat_f64x8(0.1)));
733 assert_eq!(src, simd.interleave_shfl_f64s(dst));
734 }
735
736 {
737 let src = [
738 f64x8(0.0, 0.1, 0.2, 1.0, 1.1, 1.2, 2.0, 2.1),
739 f64x8(2.2, 3.0, 3.1, 3.2, 4.0, 4.1, 4.2, 5.0),
740 f64x8(5.1, 5.2, 6.0, 6.1, 6.2, 7.0, 7.1, 7.2),
741 ];
742 let dst = simd.deinterleave_shfl_f64s(src);
743 assert_eq!(dst[1], simd.add_f64x8(dst[0], simd.splat_f64x8(0.1)));
744 assert_eq!(dst[2], simd.add_f64x8(dst[0], simd.splat_f64x8(0.2)));
745 assert_eq!(src, simd.interleave_shfl_f64s(dst));
746 }
747 {
748 let src = [
749 f64x8(0.0, 0.1, 0.2, 0.3, 1.0, 1.1, 1.2, 1.3),
750 f64x8(2.0, 2.1, 2.2, 2.3, 3.0, 3.1, 3.2, 3.3),
751 f64x8(4.0, 4.1, 4.2, 4.3, 5.0, 5.1, 5.2, 5.3),
752 f64x8(6.0, 6.1, 6.2, 6.3, 7.0, 7.1, 7.2, 7.3),
753 ];
754 let dst = simd.deinterleave_shfl_f64s(src);
755 assert_eq!(dst[1], simd.add_f64x8(dst[0], simd.splat_f64x8(0.1)));
756 assert_eq!(dst[2], simd.add_f64x8(dst[0], simd.splat_f64x8(0.2)));
757 assert_eq!(dst[3], simd.add_f64x8(dst[0], simd.splat_f64x8(0.3)));
758 assert_eq!(src, simd.interleave_shfl_f64s(dst));
759 }
760
761 {
762 let src = [
763 f32x16(
764 0.0, 0.1, 1.0, 1.1, 2.0, 2.1, 3.0, 3.1, 4.0, 4.1, 5.0, 5.1, 6.0, 6.1, 7.0,
765 7.1,
766 ),
767 f32x16(
768 8.0, 8.1, 9.0, 9.1, 10.0, 10.1, 11.0, 11.1, 12.0, 12.1, 13.0, 13.1, 14.0,
769 14.1, 15.0, 15.1,
770 ),
771 ];
772 let dst = simd.deinterleave_shfl_f32s(src);
773 assert_eq!(dst[1], simd.add_f32x16(dst[0], simd.splat_f32x16(0.1)));
774 assert_eq!(src, simd.interleave_shfl_f32s(dst));
775 }
776 {
777 let src = [
778 f32x16(
779 0.0, 0.1, 0.2, 0.3, 1.0, 1.1, 1.2, 1.3, 2.0, 2.1, 2.2, 2.3, 3.0, 3.1, 3.2,
780 3.3,
781 ),
782 f32x16(
783 4.0, 4.1, 4.2, 4.3, 5.0, 5.1, 5.2, 5.3, 6.0, 6.1, 6.2, 6.3, 7.0, 7.1, 7.2,
784 7.3,
785 ),
786 f32x16(
787 8.0, 8.1, 8.2, 8.3, 9.0, 9.1, 9.2, 9.3, 10.0, 10.1, 10.2, 10.3, 11.0, 11.1,
788 11.2, 11.3,
789 ),
790 f32x16(
791 12.0, 12.1, 12.2, 12.3, 13.0, 13.1, 13.2, 13.3, 14.0, 14.1, 14.2, 14.3,
792 15.0, 15.1, 15.2, 15.3,
793 ),
794 ];
795 let dst = simd.deinterleave_shfl_f32s(src);
796 assert_eq!(dst[1], simd.add_f32x16(dst[0], simd.splat_f32x16(0.1)));
797 assert_eq!(dst[2], simd.add_f32x16(dst[0], simd.splat_f32x16(0.2)));
798 assert_eq!(dst[3], simd.add_f32x16(dst[0], simd.splat_f32x16(0.3)));
799 assert_eq!(src, simd.interleave_shfl_f32s(dst));
800 }
801 }
802 }
803}