1#![allow(
61 non_camel_case_types,
62 unknown_lints,
63 clippy::zero_prefixed_literal,
64 clippy::identity_op,
65 clippy::too_many_arguments,
66 clippy::type_complexity,
67 clippy::missing_transmute_annotations,
68 clippy::tabs_in_doc_comments,
69 clippy::modulo_one
70)]
71#![cfg_attr(
72 all(feature = "nightly", any(target_arch = "x86", target_arch = "x86_64")),
73 feature(stdarch_x86_avx512),
74 feature(avx512_target_feature)
75)]
76#![cfg_attr(
77 all(feature = "nightly", any(target_arch = "aarch64")),
78 feature(stdarch_neon_i8mm),
79 feature(stdarch_neon_sm4),
80 feature(stdarch_neon_ftts),
81 feature(stdarch_neon_fcma),
82 feature(stdarch_neon_dotprod)
83)]
84#![cfg_attr(not(feature = "std"), no_std)]
85#![cfg_attr(docsrs, feature(doc_cfg))]
86
87#[cfg(libpulp_const)]
88#[macro_export]
89macro_rules! try_const {
90 ($e: expr) => {
91 const { $e }
92 };
93}
94
95#[cfg(not(libpulp_const))]
96#[macro_export]
97macro_rules! try_const {
98 ($e: expr) => {{ $e }};
99}
100
101macro_rules! match_cfg {
102 (item, match cfg!() {
103 $(
104 const { $i_meta:meta } => { $( $i_tokens:tt )* },
105 )*
106 $(_ => { $( $e_tokens:tt )* },)?
107 }) => {
108 $crate::match_cfg! {
109 @__items () ;
110 $(
111 (( $i_meta ) ( $( $i_tokens )* )) ,
112 )*
113 $((() ( $( $e_tokens )* )),)?
114 }
115 };
116
117 (match cfg!() {
118 $(
119 const { $i_meta:meta } => $i_expr: expr,
120 )*
121 $(_ => $e_expr: expr,)?
122 }) => {
123 $crate::match_cfg! {
124 @ __result @ __exprs ();
125 $(
126 (( $i_meta ) ( $i_expr )) ,
127 )*
128 $((() ( $e_expr )),)?
129 }
130 };
131
132 (@__items ( $( $_:meta , )* ) ; ) => {};
137 (
138 @__items ( $( $no:meta , )* ) ;
139 (( $( $yes:meta )? ) ( $( $tokens:tt )* )) ,
140 $( $rest:tt , )*
141 ) => {
142 #[cfg(all(
146 $( $yes , )?
147 not(any( $( $no ),* ))
148 ))]
149 $crate::match_cfg! { @__identity $( $tokens )* }
150
151 $crate::match_cfg! {
155 @__items ( $( $no , )* $( $yes , )? ) ;
156 $( $rest , )*
157 }
158 };
159
160 (@ $ret: ident @ __exprs ( $( $_:meta , )* ) ; ) => {
165 $ret
166 };
167
168 (
169 @ $ret: ident @__exprs ( $( $no:meta , )* ) ;
170 (( $( $yes:meta )? ) ( $( $tokens:tt )* )) ,
171 $( $rest:tt , )*
172 ) => {{
173 #[cfg(all(
177 $( $yes , )?
178 not(any( $( $no ),* ))
179 ))]
180 let $ret = $crate::match_cfg! { @__identity $( $tokens )* };
181
182 $crate::match_cfg! {
186 @ $ret @ __exprs ( $( $no , )* $( $yes , )? ) ;
187 $( $rest , )*
188 }
189 }};
190
191 (@__identity $( $tokens:tt )* ) => {
194 $( $tokens )*
195 };
196}
197
198use match_cfg;
199
200#[macro_export]
204macro_rules! cast {
205 ($val: expr $(,)?) => {{
206 let __val = $val;
207 if $crate::try_const! { false } {
208 $crate::cast(__val)
210 } else {
211 #[allow(unused_unsafe, clippy::missing_transmute_annotations)]
212 unsafe {
213 ::core::mem::transmute(__val)
214 }
215 }
216 }};
217}
218
219use bytemuck::{AnyBitPattern, NoUninit, Pod, Zeroable};
220use core::fmt::Debug;
221use core::marker::PhantomData;
222use core::mem::MaybeUninit;
223use core::slice::{from_raw_parts, from_raw_parts_mut};
224use num_complex::Complex;
225use seal::Seal;
226
227#[cfg(feature = "macro")]
231#[cfg_attr(docsrs, doc(cfg(feature = "macro")))]
232pub use pulp_macro::with_simd;
233
234pub use {bytemuck, num_complex};
235
236pub type c32 = Complex<f32>;
237pub type c64 = Complex<f64>;
238
239match_cfg!(item, match cfg!() {
240 const { any(target_arch = "x86_64", target_arch = "x86") } => {
241 #[derive(Debug, Copy, Clone)]
242 pub struct MemMask<T> {
243 mask: T,
244 load: Option<unsafe extern "C" fn()>,
245 store: Option<unsafe extern "C" fn()>,
246 }
247
248 impl<T> MemMask<T> {
249 #[inline]
250 pub fn new(mask: T) -> Self {
251 Self {
252 mask,
253 load: None,
254 store: None,
255 }
256 }
257 }
258
259 impl<T> From<T> for MemMask<T> {
260 #[inline]
261 fn from(value: T) -> Self {
262 Self {
263 mask: value,
264 load: None,
265 store: None,
266 }
267 }
268 }
269 },
270
271 _ => {
272 #[derive(Debug, Copy, Clone)]
273 pub struct MemMask<T> {
274 mask: T,
275 }
276
277 impl<T> MemMask<T> {
278 #[inline]
279 pub fn new(mask: T) -> Self {
280 Self { mask }
281 }
282 }
283
284 impl<T> From<T> for MemMask<T> {
285 #[inline]
286 fn from(value: T) -> Self {
287 Self { mask: value }
288 }
289 }
290 },
291});
292
293impl<T: Copy> MemMask<T> {
294 #[inline]
295 pub fn mask(self) -> T {
296 self.mask
297 }
298}
299
300mod seal {
301 pub trait Seal {}
302}
303
304pub trait NullaryFnOnce {
305 type Output;
306
307 fn call(self) -> Self::Output;
308}
309
310impl<R, F: FnOnce() -> R> NullaryFnOnce for F {
311 type Output = R;
312
313 #[inline(always)]
314 fn call(self) -> Self::Output {
315 self()
316 }
317}
318
319pub trait WithSimd {
320 type Output;
321
322 fn with_simd<S: Simd>(self, simd: S) -> Self::Output;
323}
324
325impl<F: NullaryFnOnce> WithSimd for F {
326 type Output = F::Output;
327
328 #[inline(always)]
329 fn with_simd<S: Simd>(self, simd: S) -> Self::Output {
330 let _simd = &simd;
331 self.call()
332 }
333}
334
335#[inline(always)]
336fn fma_f32(a: f32, b: f32, c: f32) -> f32 {
337 match_cfg!(match cfg!() {
338 const { feature = "std" } => f32::mul_add(a, b, c),
339 _ => libm::fmaf(a, b, c),
340 })
341}
342
343#[inline(always)]
344fn fma_f64(a: f64, b: f64, c: f64) -> f64 {
345 match_cfg!(match cfg!() {
346 const { feature = "std" } => f64::mul_add(a, b, c),
347 _ => libm::fma(a, b, c),
348 })
349}
350
351#[inline(always)]
355unsafe fn interleave_fallback<Unit: Pod, Reg: Pod, AosReg>(x: AosReg) -> AosReg {
356 assert!(core::mem::size_of::<AosReg>() % core::mem::size_of::<Reg>() == 0);
357 assert!(core::mem::size_of::<Reg>() % core::mem::size_of::<Unit>() == 0);
358 assert!(!core::mem::needs_drop::<AosReg>());
359
360 if try_const! { core::mem::size_of::<AosReg>() == core::mem::size_of::<Reg>() } {
361 x
362 } else {
363 let mut y = core::ptr::read(&x);
364
365 let n = try_const! { core::mem::size_of::<AosReg>() / core::mem::size_of::<Reg>() };
366 let m = try_const! { core::mem::size_of::<Reg>() / core::mem::size_of::<Unit>() };
367
368 unsafe {
369 let y = (&mut y) as *mut _ as *mut Unit;
370 let x = (&x) as *const _ as *const Unit;
371 for j in 0..m {
372 for i in 0..n {
373 *y.add(i + n * j) = *x.add(j + i * m);
374 }
375 }
376 }
377
378 y
379 }
380}
381
382#[inline(always)]
383unsafe fn deinterleave_fallback<Unit: Pod, Reg: Pod, SoaReg>(y: SoaReg) -> SoaReg {
384 assert!(core::mem::size_of::<SoaReg>() % core::mem::size_of::<Reg>() == 0);
385 assert!(core::mem::size_of::<Reg>() % core::mem::size_of::<Unit>() == 0);
386 assert!(!core::mem::needs_drop::<SoaReg>());
387
388 if try_const! { core::mem::size_of::<SoaReg>() == core::mem::size_of::<Reg>() } {
389 y
390 } else {
391 let mut x = core::ptr::read(&y);
392
393 let n = try_const! { core::mem::size_of::<SoaReg>() / core::mem::size_of::<Reg>() };
394 let m = try_const! { core::mem::size_of::<Reg>() / core::mem::size_of::<Unit>() };
395
396 unsafe {
397 let y = (&y) as *const _ as *const Unit;
398 let x = (&mut x) as *mut _ as *mut Unit;
399 for j in 0..m {
400 for i in 0..n {
401 *x.add(j + i * m) = *y.add(i + n * j);
402 }
403 }
404 }
405
406 x
407 }
408}
409
410pub unsafe trait Interleave {}
415unsafe impl<T: Pod> Interleave for T {}
416
417pub trait Simd: Seal + Debug + Copy + Send + Sync + 'static {
418 const IS_SCALAR: bool = false;
419
420 const U64_LANES: usize = core::mem::size_of::<Self::u64s>() / core::mem::size_of::<u64>();
421 const I64_LANES: usize = core::mem::size_of::<Self::i64s>() / core::mem::size_of::<i64>();
422 const F64_LANES: usize = core::mem::size_of::<Self::f64s>() / core::mem::size_of::<f64>();
423 const C64_LANES: usize = core::mem::size_of::<Self::c64s>() / core::mem::size_of::<c64>();
424
425 const U32_LANES: usize = core::mem::size_of::<Self::u32s>() / core::mem::size_of::<u32>();
426 const I32_LANES: usize = core::mem::size_of::<Self::i32s>() / core::mem::size_of::<i32>();
427 const F32_LANES: usize = core::mem::size_of::<Self::f32s>() / core::mem::size_of::<f32>();
428 const C32_LANES: usize = core::mem::size_of::<Self::c32s>() / core::mem::size_of::<c32>();
429
430 const REGISTER_COUNT: usize;
431
432 type m32s: Debug + Copy + Send + Sync + Zeroable + NoUninit + 'static;
433 type f32s: Debug + Copy + Send + Sync + Pod + 'static;
434 type c32s: Debug + Copy + Send + Sync + Pod + 'static;
435 type i32s: Debug + Copy + Send + Sync + Pod + 'static;
436 type u32s: Debug + Copy + Send + Sync + Pod + 'static;
437
438 type m64s: Debug + Copy + Send + Sync + Zeroable + NoUninit + 'static;
439 type f64s: Debug + Copy + Send + Sync + Pod + 'static;
440 type c64s: Debug + Copy + Send + Sync + Pod + 'static;
441 type i64s: Debug + Copy + Send + Sync + Pod + 'static;
442 type u64s: Debug + Copy + Send + Sync + Pod + 'static;
443
444 fn abs2_c32s(self, a: Self::c32s) -> Self::c32s;
446
447 fn abs2_c64s(self, a: Self::c64s) -> Self::c64s;
449 #[inline]
450 fn abs_f32s(self, a: Self::f32s) -> Self::f32s {
451 self.and_f32s(self.not_f32s(self.splat_f32s(-0.0)), a)
452 }
453 #[inline]
454 fn abs_f64s(self, a: Self::f64s) -> Self::f64s {
455 self.and_f64s(self.not_f64s(self.splat_f64s(-0.0)), a)
456 }
457 fn abs_max_c32s(self, a: Self::c32s) -> Self::c32s;
459 fn abs_max_c64s(self, a: Self::c64s) -> Self::c64s;
461 fn add_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s;
462 fn add_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s;
463 fn add_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s;
464 fn add_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s;
465 #[inline]
466 fn add_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::i32s {
467 self.transmute_i32s_u32s(
468 self.add_u32s(self.transmute_u32s_i32s(a), self.transmute_u32s_i32s(b)),
469 )
470 }
471 #[inline]
472 fn add_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::i64s {
473 self.transmute_i64s_u64s(
474 self.add_u64s(self.transmute_u64s_i64s(a), self.transmute_u64s_i64s(b)),
475 )
476 }
477 fn add_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s;
478 fn add_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s;
479 #[inline]
480 fn and_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
481 self.transmute_f32s_u32s(
482 self.and_u32s(self.transmute_u32s_f32s(a), self.transmute_u32s_f32s(b)),
483 )
484 }
485 #[inline]
486 fn and_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
487 self.transmute_f64s_u64s(
488 self.and_u64s(self.transmute_u64s_f64s(a), self.transmute_u64s_f64s(b)),
489 )
490 }
491 #[inline]
492 fn and_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::i32s {
493 self.transmute_i32s_u32s(
494 self.and_u32s(self.transmute_u32s_i32s(a), self.transmute_u32s_i32s(b)),
495 )
496 }
497 #[inline]
498 fn and_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::i64s {
499 self.transmute_i64s_u64s(
500 self.and_u64s(self.transmute_u64s_i64s(a), self.transmute_u64s_i64s(b)),
501 )
502 }
503 fn and_m32s(self, a: Self::m32s, b: Self::m32s) -> Self::m32s;
504 fn and_m64s(self, a: Self::m64s, b: Self::m64s) -> Self::m64s;
505 fn and_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s;
506 fn and_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s;
507 #[inline(always)]
508 fn as_mut_rsimd_c32s(slice: &mut [c32]) -> (&mut [c32], &mut [Self::c32s]) {
509 unsafe { rsplit_mut_slice(slice) }
510 }
511 #[inline(always)]
512 fn as_mut_rsimd_c64s(slice: &mut [c64]) -> (&mut [c64], &mut [Self::c64s]) {
513 unsafe { rsplit_mut_slice(slice) }
514 }
515 #[inline(always)]
516 fn as_mut_rsimd_f32s(slice: &mut [f32]) -> (&mut [f32], &mut [Self::f32s]) {
517 unsafe { rsplit_mut_slice(slice) }
518 }
519
520 #[inline(always)]
521 fn as_mut_rsimd_f64s(slice: &mut [f64]) -> (&mut [f64], &mut [Self::f64s]) {
522 unsafe { rsplit_mut_slice(slice) }
523 }
524 #[inline(always)]
525 fn as_mut_rsimd_i32s(slice: &mut [i32]) -> (&mut [i32], &mut [Self::i32s]) {
526 unsafe { rsplit_mut_slice(slice) }
527 }
528 #[inline(always)]
529 fn as_mut_rsimd_i64s(slice: &mut [i64]) -> (&mut [i64], &mut [Self::i64s]) {
530 unsafe { rsplit_mut_slice(slice) }
531 }
532 #[inline(always)]
533 fn as_mut_rsimd_u32s(slice: &mut [u32]) -> (&mut [u32], &mut [Self::u32s]) {
534 unsafe { rsplit_mut_slice(slice) }
535 }
536 #[inline(always)]
537 fn as_mut_rsimd_u64s(slice: &mut [u64]) -> (&mut [u64], &mut [Self::u64s]) {
538 unsafe { rsplit_mut_slice(slice) }
539 }
540 #[inline(always)]
541 fn as_mut_simd_c32s(slice: &mut [c32]) -> (&mut [Self::c32s], &mut [c32]) {
542 unsafe { split_mut_slice(slice) }
543 }
544 #[inline(always)]
545 fn as_mut_simd_c64s(slice: &mut [c64]) -> (&mut [Self::c64s], &mut [c64]) {
546 unsafe { split_mut_slice(slice) }
547 }
548 #[inline(always)]
549 fn as_mut_simd_f32s(slice: &mut [f32]) -> (&mut [Self::f32s], &mut [f32]) {
550 unsafe { split_mut_slice(slice) }
551 }
552 #[inline(always)]
553 fn as_mut_simd_f64s(slice: &mut [f64]) -> (&mut [Self::f64s], &mut [f64]) {
554 unsafe { split_mut_slice(slice) }
555 }
556 #[inline(always)]
557 fn as_mut_simd_i32s(slice: &mut [i32]) -> (&mut [Self::i32s], &mut [i32]) {
558 unsafe { split_mut_slice(slice) }
559 }
560 #[inline(always)]
561 fn as_mut_simd_i64s(slice: &mut [i64]) -> (&mut [Self::i64s], &mut [i64]) {
562 unsafe { split_mut_slice(slice) }
563 }
564 #[inline(always)]
565 fn as_mut_simd_u32s(slice: &mut [u32]) -> (&mut [Self::u32s], &mut [u32]) {
566 unsafe { split_mut_slice(slice) }
567 }
568 #[inline(always)]
569 fn as_mut_simd_u64s(slice: &mut [u64]) -> (&mut [Self::u64s], &mut [u64]) {
570 unsafe { split_mut_slice(slice) }
571 }
572 #[inline(always)]
573 fn as_rsimd_c32s(slice: &[c32]) -> (&[c32], &[Self::c32s]) {
574 unsafe { rsplit_slice(slice) }
575 }
576 #[inline(always)]
577 fn as_rsimd_c64s(slice: &[c64]) -> (&[c64], &[Self::c64s]) {
578 unsafe { rsplit_slice(slice) }
579 }
580 #[inline(always)]
581 fn as_rsimd_f32s(slice: &[f32]) -> (&[f32], &[Self::f32s]) {
582 unsafe { rsplit_slice(slice) }
583 }
584 #[inline(always)]
585 fn as_rsimd_f64s(slice: &[f64]) -> (&[f64], &[Self::f64s]) {
586 unsafe { rsplit_slice(slice) }
587 }
588 #[inline(always)]
589 fn as_rsimd_i32s(slice: &[i32]) -> (&[i32], &[Self::i32s]) {
590 unsafe { rsplit_slice(slice) }
591 }
592 #[inline(always)]
593 fn as_rsimd_i64s(slice: &[i64]) -> (&[i64], &[Self::i64s]) {
594 unsafe { rsplit_slice(slice) }
595 }
596 #[inline(always)]
597 fn as_rsimd_u32s(slice: &[u32]) -> (&[u32], &[Self::u32s]) {
598 unsafe { rsplit_slice(slice) }
599 }
600 #[inline(always)]
601 fn as_rsimd_u64s(slice: &[u64]) -> (&[u64], &[Self::u64s]) {
602 unsafe { rsplit_slice(slice) }
603 }
604 #[inline(always)]
605 fn as_simd_c32s(slice: &[c32]) -> (&[Self::c32s], &[c32]) {
606 unsafe { split_slice(slice) }
607 }
608 #[inline(always)]
609 fn as_simd_c64s(slice: &[c64]) -> (&[Self::c64s], &[c64]) {
610 unsafe { split_slice(slice) }
611 }
612 #[inline(always)]
613 fn as_simd_f32s(slice: &[f32]) -> (&[Self::f32s], &[f32]) {
614 unsafe { split_slice(slice) }
615 }
616
617 #[inline(always)]
618 fn as_simd_f64s(slice: &[f64]) -> (&[Self::f64s], &[f64]) {
619 unsafe { split_slice(slice) }
620 }
621 #[inline(always)]
622 fn as_simd_i32s(slice: &[i32]) -> (&[Self::i32s], &[i32]) {
623 unsafe { split_slice(slice) }
624 }
625 #[inline(always)]
626 fn as_simd_i64s(slice: &[i64]) -> (&[Self::i64s], &[i64]) {
627 unsafe { split_slice(slice) }
628 }
629 #[inline(always)]
630 fn as_simd_u32s(slice: &[u32]) -> (&[Self::u32s], &[u32]) {
631 unsafe { split_slice(slice) }
632 }
633 #[inline(always)]
634 fn as_simd_u64s(slice: &[u64]) -> (&[Self::u64s], &[u64]) {
635 unsafe { split_slice(slice) }
636 }
637 #[inline(always)]
638 fn as_uninit_mut_rsimd_c32s(
639 slice: &mut [MaybeUninit<c32>],
640 ) -> (&mut [MaybeUninit<c32>], &mut [MaybeUninit<Self::c32s>]) {
641 unsafe { rsplit_mut_slice(slice) }
642 }
643 #[inline(always)]
644 fn as_uninit_mut_rsimd_c64s(
645 slice: &mut [MaybeUninit<c64>],
646 ) -> (&mut [MaybeUninit<c64>], &mut [MaybeUninit<Self::c64s>]) {
647 unsafe { rsplit_mut_slice(slice) }
648 }
649 #[inline(always)]
650 fn as_uninit_mut_rsimd_f32s(
651 slice: &mut [MaybeUninit<f32>],
652 ) -> (&mut [MaybeUninit<f32>], &mut [MaybeUninit<Self::f32s>]) {
653 unsafe { rsplit_mut_slice(slice) }
654 }
655
656 #[inline(always)]
657 fn as_uninit_mut_rsimd_f64s(
658 slice: &mut [MaybeUninit<f64>],
659 ) -> (&mut [MaybeUninit<f64>], &mut [MaybeUninit<Self::f64s>]) {
660 unsafe { rsplit_mut_slice(slice) }
661 }
662 #[inline(always)]
663 fn as_uninit_mut_rsimd_i32s(
664 slice: &mut [MaybeUninit<i32>],
665 ) -> (&mut [MaybeUninit<i32>], &mut [MaybeUninit<Self::i32s>]) {
666 unsafe { rsplit_mut_slice(slice) }
667 }
668 #[inline(always)]
669 fn as_uninit_mut_rsimd_i64s(
670 slice: &mut [MaybeUninit<i64>],
671 ) -> (&mut [MaybeUninit<i64>], &mut [MaybeUninit<Self::i64s>]) {
672 unsafe { rsplit_mut_slice(slice) }
673 }
674 #[inline(always)]
675 fn as_uninit_mut_rsimd_u32s(
676 slice: &mut [MaybeUninit<u32>],
677 ) -> (&mut [MaybeUninit<u32>], &mut [MaybeUninit<Self::u32s>]) {
678 unsafe { rsplit_mut_slice(slice) }
679 }
680 #[inline(always)]
681 fn as_uninit_mut_rsimd_u64s(
682 slice: &mut [MaybeUninit<u64>],
683 ) -> (&mut [MaybeUninit<u64>], &mut [MaybeUninit<Self::u64s>]) {
684 unsafe { rsplit_mut_slice(slice) }
685 }
686 #[inline(always)]
687 fn as_uninit_mut_simd_c32s(
688 slice: &mut [MaybeUninit<c32>],
689 ) -> (&mut [MaybeUninit<Self::c32s>], &mut [MaybeUninit<c32>]) {
690 unsafe { split_mut_slice(slice) }
691 }
692 #[inline(always)]
693 fn as_uninit_mut_simd_c64s(
694 slice: &mut [MaybeUninit<c64>],
695 ) -> (&mut [MaybeUninit<Self::c64s>], &mut [MaybeUninit<c64>]) {
696 unsafe { split_mut_slice(slice) }
697 }
698 #[inline(always)]
699 fn as_uninit_mut_simd_f32s(
700 slice: &mut [MaybeUninit<f32>],
701 ) -> (&mut [MaybeUninit<Self::f32s>], &mut [MaybeUninit<f32>]) {
702 unsafe { split_mut_slice(slice) }
703 }
704
705 #[inline(always)]
706 fn as_uninit_mut_simd_f64s(
707 slice: &mut [MaybeUninit<f64>],
708 ) -> (&mut [MaybeUninit<Self::f64s>], &mut [MaybeUninit<f64>]) {
709 unsafe { split_mut_slice(slice) }
710 }
711 #[inline(always)]
712 fn as_uninit_mut_simd_i32s(
713 slice: &mut [MaybeUninit<i32>],
714 ) -> (&mut [MaybeUninit<Self::i32s>], &mut [MaybeUninit<i32>]) {
715 unsafe { split_mut_slice(slice) }
716 }
717 #[inline(always)]
718 fn as_uninit_mut_simd_i64s(
719 slice: &mut [MaybeUninit<i64>],
720 ) -> (&mut [MaybeUninit<Self::i64s>], &mut [MaybeUninit<i64>]) {
721 unsafe { split_mut_slice(slice) }
722 }
723 #[inline(always)]
724 fn as_uninit_mut_simd_u32s(
725 slice: &mut [MaybeUninit<u32>],
726 ) -> (&mut [MaybeUninit<Self::u32s>], &mut [MaybeUninit<u32>]) {
727 unsafe { split_mut_slice(slice) }
728 }
729
730 #[inline(always)]
731 fn as_uninit_mut_simd_u64s(
732 slice: &mut [MaybeUninit<u64>],
733 ) -> (&mut [MaybeUninit<Self::u64s>], &mut [MaybeUninit<u64>]) {
734 unsafe { split_mut_slice(slice) }
735 }
736 fn conj_c32s(self, a: Self::c32s) -> Self::c32s;
737 fn conj_c64s(self, a: Self::c64s) -> Self::c64s;
738 fn conj_mul_add_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s;
739
740 fn conj_mul_add_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s;
741 #[inline]
743 fn conj_mul_add_e_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
744 self.conj_mul_add_c32s(a, b, c)
745 }
746 #[inline]
748 fn conj_mul_add_e_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
749 self.conj_mul_add_c64s(a, b, c)
750 }
751 fn conj_mul_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s;
752
753 fn conj_mul_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s;
754 #[inline]
756 fn conj_mul_e_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
757 self.conj_mul_c32s(a, b)
758 }
759 #[inline]
761 fn conj_mul_e_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
762 self.conj_mul_c64s(a, b)
763 }
764 #[inline(always)]
765 fn deinterleave_shfl_f32s<T: Interleave>(self, values: T) -> T {
766 unsafe { deinterleave_fallback::<f32, Self::f32s, T>(values) }
767 }
768
769 #[inline(always)]
770 fn deinterleave_shfl_f64s<T: Interleave>(self, values: T) -> T {
771 unsafe { deinterleave_fallback::<f64, Self::f64s, T>(values) }
772 }
773 fn div_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s;
774 fn div_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s;
775 fn equal_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::m32s;
776
777 fn equal_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::m64s;
778 #[inline(always)]
779 fn first_true_m32s(self, mask: Self::m32s) -> usize {
780 if try_const! { core::mem::size_of::<Self::m32s>() == core::mem::size_of::<Self::u32s>() } {
781 let mask: Self::u32s = bytemuck::cast(mask);
782 let slice = bytemuck::cast_slice::<Self::u32s, u32>(core::slice::from_ref(&mask));
783 let mut i = 0;
784 for &x in slice.iter() {
785 if x != 0 {
786 break;
787 }
788 i += 1;
789 }
790 i
791 } else if try_const! { core::mem::size_of::<Self::m32s>() == core::mem::size_of::<u8>() } {
792 let mask: u8 = bytemuck::cast(mask);
793 mask.leading_zeros() as usize
794 } else if try_const! { core::mem::size_of::<Self::m32s>() == core::mem::size_of::<u16>() } {
795 let mask: u16 = bytemuck::cast(mask);
796 mask.leading_zeros() as usize
797 } else {
798 panic!()
799 }
800 }
801
802 #[inline(always)]
803 fn first_true_m64s(self, mask: Self::m64s) -> usize {
804 if try_const! { core::mem::size_of::<Self::m64s>() == core::mem::size_of::<Self::u64s>() } {
805 let mask: Self::u64s = bytemuck::cast(mask);
806 let slice = bytemuck::cast_slice::<Self::u64s, u64>(core::slice::from_ref(&mask));
807 let mut i = 0;
808 for &x in slice.iter() {
809 if x != 0 {
810 break;
811 }
812 i += 1;
813 }
814 i
815 } else if try_const! { core::mem::size_of::<Self::m64s>() == core::mem::size_of::<u8>() } {
816 let mask: u8 = bytemuck::cast(mask);
817 mask.leading_zeros() as usize
818 } else if try_const! { core::mem::size_of::<Self::m64s>() == core::mem::size_of::<u16>() } {
819 let mask: u16 = bytemuck::cast(mask);
820 mask.leading_zeros() as usize
821 } else {
822 panic!()
823 }
824 }
825
826 #[inline]
827 fn greater_than_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::m32s {
828 self.less_than_f32s(b, a)
829 }
830
831 #[inline]
832 fn greater_than_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::m64s {
833 self.less_than_f64s(b, a)
834 }
835 #[inline]
836 fn greater_than_or_equal_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::m32s {
837 self.less_than_or_equal_f32s(b, a)
838 }
839 #[inline]
840 fn greater_than_or_equal_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::m64s {
841 self.less_than_or_equal_f64s(b, a)
842 }
843
844 fn greater_than_or_equal_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::m32s;
845 fn greater_than_or_equal_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::m64s;
846 fn greater_than_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::m32s;
847 fn greater_than_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::m64s;
848 fn greater_than_or_equal_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::m32s;
849 fn greater_than_or_equal_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::m64s;
850 fn greater_than_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::m32s;
851 fn greater_than_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::m64s;
852
853 #[inline(always)]
854 fn interleave_shfl_f32s<T: Interleave>(self, values: T) -> T {
855 unsafe { interleave_fallback::<f32, Self::f32s, T>(values) }
856 }
857
858 #[inline(always)]
859 fn interleave_shfl_f64s<T: Interleave>(self, values: T) -> T {
860 unsafe { interleave_fallback::<f64, Self::f64s, T>(values) }
861 }
862
863 fn less_than_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::m32s;
864 fn less_than_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::m64s;
865 fn less_than_or_equal_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::m32s;
866 fn less_than_or_equal_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::m64s;
867
868 fn less_than_or_equal_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::m32s;
869 fn less_than_or_equal_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::m64s;
870 fn less_than_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::m32s;
871 fn less_than_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::m64s;
872 fn less_than_or_equal_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::m32s;
873 fn less_than_or_equal_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::m64s;
874 fn less_than_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::m32s;
875 fn less_than_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::m64s;
876
877 #[inline(always)]
878 fn mask_between_m32s(self, start: u32, end: u32) -> MemMask<Self::m32s> {
879 let iota: Self::u32s =
880 try_const! { unsafe { core::mem::transmute_copy(&iota_32::<u32>()) } };
881 self.and_m32s(
882 self.greater_than_or_equal_u32s(iota, self.splat_u32s(start)),
883 self.less_than_u32s(iota, self.splat_u32s(end)),
884 )
885 .into()
886 }
887
888 #[inline(always)]
889 fn mask_between_m64s(self, start: u64, end: u64) -> MemMask<Self::m64s> {
890 let iota: Self::u64s =
891 try_const! { unsafe { core::mem::transmute_copy(&iota_64::<u64>()) } };
892 self.and_m64s(
893 self.greater_than_or_equal_u64s(iota, self.splat_u64s(start)),
894 self.less_than_u64s(iota, self.splat_u64s(end)),
895 )
896 .into()
897 }
898 unsafe fn mask_load_ptr_c32s(self, mask: MemMask<Self::m32s>, ptr: *const c32) -> Self::c32s;
903 unsafe fn mask_load_ptr_c64s(self, mask: MemMask<Self::m64s>, ptr: *const c64) -> Self::c64s;
908 #[inline(always)]
913 unsafe fn mask_load_ptr_f32s(self, mask: MemMask<Self::m32s>, ptr: *const f32) -> Self::f32s {
914 self.transmute_f32s_u32s(self.mask_load_ptr_u32s(mask, ptr as *const u32))
915 }
916
917 #[inline(always)]
922 unsafe fn mask_load_ptr_f64s(self, mask: MemMask<Self::m64s>, ptr: *const f64) -> Self::f64s {
923 self.transmute_f64s_u64s(self.mask_load_ptr_u64s(mask, ptr as *const u64))
924 }
925 #[inline(always)]
930 unsafe fn mask_load_ptr_i32s(self, mask: MemMask<Self::m32s>, ptr: *const i32) -> Self::i32s {
931 self.transmute_i32s_u32s(self.mask_load_ptr_u32s(mask, ptr as *const u32))
932 }
933 #[inline(always)]
938 unsafe fn mask_load_ptr_i64s(self, mask: MemMask<Self::m64s>, ptr: *const i64) -> Self::i64s {
939 self.transmute_i64s_u64s(self.mask_load_ptr_u64s(mask, ptr as *const u64))
940 }
941 unsafe fn mask_load_ptr_u32s(self, mask: MemMask<Self::m32s>, ptr: *const u32) -> Self::u32s;
946
947 unsafe fn mask_load_ptr_u64s(self, mask: MemMask<Self::m64s>, ptr: *const u64) -> Self::u64s;
952 unsafe fn mask_store_ptr_c32s(
957 self,
958 mask: MemMask<Self::m32s>,
959 ptr: *mut c32,
960 values: Self::c32s,
961 );
962 unsafe fn mask_store_ptr_c64s(
967 self,
968 mask: MemMask<Self::m64s>,
969 ptr: *mut c64,
970 values: Self::c64s,
971 );
972 #[inline(always)]
977 unsafe fn mask_store_ptr_f32s(
978 self,
979 mask: MemMask<Self::m32s>,
980 ptr: *mut f32,
981 values: Self::f32s,
982 ) {
983 self.mask_store_ptr_u32s(mask, ptr as *mut u32, self.transmute_u32s_f32s(values));
984 }
985
986 #[inline(always)]
991 unsafe fn mask_store_ptr_f64s(
992 self,
993 mask: MemMask<Self::m64s>,
994 ptr: *mut f64,
995 values: Self::f64s,
996 ) {
997 self.mask_store_ptr_u64s(mask, ptr as *mut u64, self.transmute_u64s_f64s(values));
998 }
999 #[inline(always)]
1004 unsafe fn mask_store_ptr_i32s(
1005 self,
1006 mask: MemMask<Self::m32s>,
1007 ptr: *mut i32,
1008 values: Self::i32s,
1009 ) {
1010 self.mask_store_ptr_u32s(mask, ptr as *mut u32, self.transmute_u32s_i32s(values));
1011 }
1012 #[inline(always)]
1017 unsafe fn mask_store_ptr_i64s(
1018 self,
1019 mask: MemMask<Self::m64s>,
1020 ptr: *mut i64,
1021 values: Self::i64s,
1022 ) {
1023 self.mask_store_ptr_u64s(mask, ptr as *mut u64, self.transmute_u64s_i64s(values));
1024 }
1025 unsafe fn mask_store_ptr_u32s(
1030 self,
1031 mask: MemMask<Self::m32s>,
1032 ptr: *mut u32,
1033 values: Self::u32s,
1034 );
1035
1036 unsafe fn mask_store_ptr_u64s(
1041 self,
1042 mask: MemMask<Self::m64s>,
1043 ptr: *mut u64,
1044 values: Self::u64s,
1045 );
1046 fn max_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s;
1047 fn max_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s;
1048 fn min_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s;
1049 fn min_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s;
1050
1051 fn mul_add_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s;
1052 fn mul_add_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s;
1053 #[inline]
1055 fn mul_add_e_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
1056 self.mul_add_c32s(a, b, c)
1057 }
1058 #[inline]
1060 fn mul_add_e_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
1061 self.mul_add_c64s(a, b, c)
1062 }
1063 fn mul_add_e_f32s(self, a: Self::f32s, b: Self::f32s, c: Self::f32s) -> Self::f32s;
1064 fn mul_add_e_f64s(self, a: Self::f64s, b: Self::f64s, c: Self::f64s) -> Self::f64s;
1065 fn mul_add_f32s(self, a: Self::f32s, b: Self::f32s, c: Self::f32s) -> Self::f32s;
1066 fn mul_add_f64s(self, a: Self::f64s, b: Self::f64s, c: Self::f64s) -> Self::f64s;
1067 fn mul_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s;
1068
1069 fn mul_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s;
1070 #[inline]
1072 fn mul_e_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
1073 self.mul_c32s(a, b)
1074 }
1075 fn mul_e_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
1077 self.mul_c64s(a, b)
1078 }
1079 fn mul_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s;
1080 fn mul_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s;
1081 fn neg_c32s(self, a: Self::c32s) -> Self::c32s;
1082 fn neg_c64s(self, a: Self::c64s) -> Self::c64s;
1083
1084 #[inline]
1085 fn neg_f32s(self, a: Self::f32s) -> Self::f32s {
1086 self.xor_f32s(self.splat_f32s(-0.0), a)
1087 }
1088 #[inline]
1089 fn neg_f64s(self, a: Self::f64s) -> Self::f64s {
1090 self.xor_f64s(a, self.splat_f64s(-0.0))
1091 }
1092 #[inline]
1093 fn not_f32s(self, a: Self::f32s) -> Self::f32s {
1094 self.transmute_f32s_u32s(self.not_u32s(self.transmute_u32s_f32s(a)))
1095 }
1096
1097 #[inline]
1098 fn not_f64s(self, a: Self::f64s) -> Self::f64s {
1099 self.transmute_f64s_u64s(self.not_u64s(self.transmute_u64s_f64s(a)))
1100 }
1101 #[inline]
1102 fn not_i32s(self, a: Self::i32s) -> Self::i32s {
1103 self.transmute_i32s_u32s(self.not_u32s(self.transmute_u32s_i32s(a)))
1104 }
1105 #[inline]
1106 fn not_i64s(self, a: Self::i64s) -> Self::i64s {
1107 self.transmute_i64s_u64s(self.not_u64s(self.transmute_u64s_i64s(a)))
1108 }
1109
1110 fn not_m32s(self, a: Self::m32s) -> Self::m32s;
1111 fn not_m64s(self, a: Self::m64s) -> Self::m64s;
1112 fn not_u32s(self, a: Self::u32s) -> Self::u32s;
1113 fn not_u64s(self, a: Self::u64s) -> Self::u64s;
1114 #[inline]
1115 fn or_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
1116 self.transmute_f32s_u32s(
1117 self.or_u32s(self.transmute_u32s_f32s(a), self.transmute_u32s_f32s(b)),
1118 )
1119 }
1120 #[inline]
1121 fn or_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
1122 self.transmute_f64s_u64s(
1123 self.or_u64s(self.transmute_u64s_f64s(a), self.transmute_u64s_f64s(b)),
1124 )
1125 }
1126 #[inline]
1127 fn or_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::i32s {
1128 self.transmute_i32s_u32s(
1129 self.or_u32s(self.transmute_u32s_i32s(a), self.transmute_u32s_i32s(b)),
1130 )
1131 }
1132 #[inline]
1133 fn or_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::i64s {
1134 self.transmute_i64s_u64s(
1135 self.or_u64s(self.transmute_u64s_i64s(a), self.transmute_u64s_i64s(b)),
1136 )
1137 }
1138 fn or_m32s(self, a: Self::m32s, b: Self::m32s) -> Self::m32s;
1139
1140 fn or_m64s(self, a: Self::m64s, b: Self::m64s) -> Self::m64s;
1141 fn or_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s;
1142 fn or_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s;
1143 #[inline(always)]
1144 fn partial_load_c32s(self, slice: &[c32]) -> Self::c32s {
1145 cast(self.partial_load_f64s(bytemuck::cast_slice(slice)))
1146 }
1147 #[inline(always)]
1148 fn partial_load_c64s(self, slice: &[c64]) -> Self::c64s {
1149 cast(self.partial_load_f64s(bytemuck::cast_slice(slice)))
1150 }
1151 #[inline(always)]
1152 fn partial_load_f32s(self, slice: &[f32]) -> Self::f32s {
1153 cast(self.partial_load_u32s(bytemuck::cast_slice(slice)))
1154 }
1155 #[inline(always)]
1156 fn partial_load_f64s(self, slice: &[f64]) -> Self::f64s {
1157 cast(self.partial_load_u64s(bytemuck::cast_slice(slice)))
1158 }
1159 #[inline(always)]
1160 fn partial_load_i32s(self, slice: &[i32]) -> Self::i32s {
1161 cast(self.partial_load_u32s(bytemuck::cast_slice(slice)))
1162 }
1163 #[inline(always)]
1164 fn partial_load_i64s(self, slice: &[i64]) -> Self::i64s {
1165 cast(self.partial_load_u64s(bytemuck::cast_slice(slice)))
1166 }
1167 #[inline(always)]
1168 fn partial_load_u32s(self, slice: &[u32]) -> Self::u32s {
1169 unsafe {
1170 self.mask_load_ptr_u32s(
1171 self.mask_between_m32s(0, slice.len() as u32),
1172 slice.as_ptr(),
1173 )
1174 }
1175 }
1176 #[inline(always)]
1177 fn partial_load_u64s(self, slice: &[u64]) -> Self::u64s {
1178 unsafe {
1179 self.mask_load_ptr_u64s(
1180 self.mask_between_m64s(0, slice.len() as u64),
1181 slice.as_ptr(),
1182 )
1183 }
1184 }
1185
1186 #[inline(always)]
1187 fn partial_store_c32s(self, slice: &mut [c32], values: Self::c32s) {
1188 self.partial_store_f64s(bytemuck::cast_slice_mut(slice), cast(values))
1189 }
1190 #[inline(always)]
1191 fn partial_store_c64s(self, slice: &mut [c64], values: Self::c64s) {
1192 self.partial_store_f64s(bytemuck::cast_slice_mut(slice), cast(values))
1193 }
1194
1195 #[inline(always)]
1196 fn partial_store_f32s(self, slice: &mut [f32], values: Self::f32s) {
1197 self.partial_store_u32s(bytemuck::cast_slice_mut(slice), cast(values))
1198 }
1199 #[inline(always)]
1200 fn partial_store_f64s(self, slice: &mut [f64], values: Self::f64s) {
1201 self.partial_store_u64s(bytemuck::cast_slice_mut(slice), cast(values))
1202 }
1203 #[inline(always)]
1204 fn partial_store_i32s(self, slice: &mut [i32], values: Self::i32s) {
1205 self.partial_store_u32s(bytemuck::cast_slice_mut(slice), cast(values))
1206 }
1207 #[inline(always)]
1208 fn partial_store_i64s(self, slice: &mut [i64], values: Self::i64s) {
1209 self.partial_store_u64s(bytemuck::cast_slice_mut(slice), cast(values))
1210 }
1211 #[inline(always)]
1212 fn partial_store_u32s(self, slice: &mut [u32], values: Self::u32s) {
1213 unsafe {
1214 self.mask_store_ptr_u32s(
1215 self.mask_between_m32s(0, slice.len() as u32),
1216 slice.as_mut_ptr(),
1217 values,
1218 )
1219 }
1220 }
1221 #[inline(always)]
1222 fn partial_store_u64s(self, slice: &mut [u64], values: Self::u64s) {
1223 unsafe {
1224 self.mask_store_ptr_u64s(
1225 self.mask_between_m64s(0, slice.len() as u64),
1226 slice.as_mut_ptr(),
1227 values,
1228 )
1229 }
1230 }
1231 fn reduce_max_c32s(self, a: Self::c32s) -> c32;
1232 fn reduce_max_c64s(self, a: Self::c64s) -> c64;
1233 fn reduce_max_f32s(self, a: Self::f32s) -> f32;
1234 fn reduce_max_f64s(self, a: Self::f64s) -> f64;
1235 fn reduce_min_c32s(self, a: Self::c32s) -> c32;
1236 fn reduce_min_c64s(self, a: Self::c64s) -> c64;
1237 fn reduce_min_f32s(self, a: Self::f32s) -> f32;
1238 fn reduce_min_f64s(self, a: Self::f64s) -> f64;
1239
1240 fn reduce_product_f32s(self, a: Self::f32s) -> f32;
1241 fn reduce_product_f64s(self, a: Self::f64s) -> f64;
1242 fn reduce_sum_c32s(self, a: Self::c32s) -> c32;
1243 fn reduce_sum_c64s(self, a: Self::c64s) -> c64;
1244
1245 fn reduce_sum_f32s(self, a: Self::f32s) -> f32;
1246 fn reduce_sum_f64s(self, a: Self::f64s) -> f64;
1247 #[inline(always)]
1248 fn rotate_left_c32s(self, a: Self::c32s, amount: usize) -> Self::c32s {
1249 self.rotate_right_c32s(a, amount.wrapping_neg())
1250 }
1251 #[inline(always)]
1252 fn rotate_left_c64s(self, a: Self::c64s, amount: usize) -> Self::c64s {
1253 self.rotate_right_c64s(a, amount.wrapping_neg())
1254 }
1255
1256 #[inline(always)]
1257 fn rotate_left_f32s(self, a: Self::f32s, amount: usize) -> Self::f32s {
1258 cast(self.rotate_left_u32s(cast(a), amount))
1259 }
1260 #[inline(always)]
1261 fn rotate_left_f64s(self, a: Self::f64s, amount: usize) -> Self::f64s {
1262 cast(self.rotate_left_u64s(cast(a), amount))
1263 }
1264 #[inline(always)]
1265 fn rotate_left_i32s(self, a: Self::i32s, amount: usize) -> Self::i32s {
1266 cast(self.rotate_left_u32s(cast(a), amount))
1267 }
1268
1269 #[inline(always)]
1270 fn rotate_left_i64s(self, a: Self::i64s, amount: usize) -> Self::i64s {
1271 cast(self.rotate_left_u64s(cast(a), amount))
1272 }
1273
1274 #[inline(always)]
1275 fn rotate_left_u32s(self, a: Self::u32s, amount: usize) -> Self::u32s {
1276 self.rotate_right_u32s(a, amount.wrapping_neg())
1277 }
1278 #[inline(always)]
1279 fn rotate_left_u64s(self, a: Self::u64s, amount: usize) -> Self::u64s {
1280 self.rotate_right_u64s(a, amount.wrapping_neg())
1281 }
1282 fn rotate_right_c32s(self, a: Self::c32s, amount: usize) -> Self::c32s;
1283 fn rotate_right_c64s(self, a: Self::c64s, amount: usize) -> Self::c64s;
1284 #[inline(always)]
1285 fn rotate_right_f32s(self, a: Self::f32s, amount: usize) -> Self::f32s {
1286 cast(self.rotate_right_u32s(cast(a), amount))
1287 }
1288 #[inline(always)]
1289 fn rotate_right_f64s(self, a: Self::f64s, amount: usize) -> Self::f64s {
1290 cast(self.rotate_right_u64s(cast(a), amount))
1291 }
1292 #[inline(always)]
1293 fn rotate_right_i32s(self, a: Self::i32s, amount: usize) -> Self::i32s {
1294 cast(self.rotate_right_u32s(cast(a), amount))
1295 }
1296 #[inline(always)]
1297 fn rotate_right_i64s(self, a: Self::i64s, amount: usize) -> Self::i64s {
1298 cast(self.rotate_right_u64s(cast(a), amount))
1299 }
1300 fn rotate_right_u32s(self, a: Self::u32s, amount: usize) -> Self::u32s;
1301 fn rotate_right_u64s(self, a: Self::u64s, amount: usize) -> Self::u64s;
1302
1303 #[inline]
1304 fn select_f32s_m32s(
1305 self,
1306 mask: Self::m32s,
1307 if_true: Self::f32s,
1308 if_false: Self::f32s,
1309 ) -> Self::f32s {
1310 self.transmute_f32s_u32s(self.select_u32s_m32s(
1311 mask,
1312 self.transmute_u32s_f32s(if_true),
1313 self.transmute_u32s_f32s(if_false),
1314 ))
1315 }
1316 #[inline]
1317 fn select_f64s_m64s(
1318 self,
1319 mask: Self::m64s,
1320 if_true: Self::f64s,
1321 if_false: Self::f64s,
1322 ) -> Self::f64s {
1323 self.transmute_f64s_u64s(self.select_u64s_m64s(
1324 mask,
1325 self.transmute_u64s_f64s(if_true),
1326 self.transmute_u64s_f64s(if_false),
1327 ))
1328 }
1329 #[inline]
1330 fn select_i32s_m32s(
1331 self,
1332 mask: Self::m32s,
1333 if_true: Self::i32s,
1334 if_false: Self::i32s,
1335 ) -> Self::i32s {
1336 self.transmute_i32s_u32s(self.select_u32s_m32s(
1337 mask,
1338 self.transmute_u32s_i32s(if_true),
1339 self.transmute_u32s_i32s(if_false),
1340 ))
1341 }
1342 #[inline]
1343 fn select_i64s_m64s(
1344 self,
1345 mask: Self::m64s,
1346 if_true: Self::i64s,
1347 if_false: Self::i64s,
1348 ) -> Self::i64s {
1349 self.transmute_i64s_u64s(self.select_u64s_m64s(
1350 mask,
1351 self.transmute_u64s_i64s(if_true),
1352 self.transmute_u64s_i64s(if_false),
1353 ))
1354 }
1355 fn select_u32s_m32s(
1356 self,
1357 mask: Self::m32s,
1358 if_true: Self::u32s,
1359 if_false: Self::u32s,
1360 ) -> Self::u32s;
1361 fn select_u64s_m64s(
1362 self,
1363 mask: Self::m64s,
1364 if_true: Self::u64s,
1365 if_false: Self::u64s,
1366 ) -> Self::u64s;
1367 fn splat_c32s(self, value: c32) -> Self::c32s;
1368 fn splat_c64s(self, value: c64) -> Self::c64s;
1369 fn splat_f32s(self, value: f32) -> Self::f32s;
1370 fn splat_f64s(self, value: f64) -> Self::f64s;
1371
1372 #[inline]
1373 fn splat_i32s(self, value: i32) -> Self::i32s {
1374 self.transmute_i32s_u32s(self.splat_u32s(value as u32))
1375 }
1376 #[inline]
1377 fn splat_i64s(self, value: i64) -> Self::i64s {
1378 self.transmute_i64s_u64s(self.splat_u64s(value as u64))
1379 }
1380 fn splat_u32s(self, value: u32) -> Self::u32s;
1381 fn splat_u64s(self, value: u64) -> Self::u64s;
1382
1383 fn sub_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s;
1384 fn sub_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s;
1385 fn sub_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s;
1386 fn sub_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s;
1387
1388 #[inline]
1389 fn sub_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::i32s {
1390 self.transmute_i32s_u32s(
1391 self.sub_u32s(self.transmute_u32s_i32s(a), self.transmute_u32s_i32s(b)),
1392 )
1393 }
1394 #[inline]
1395 fn sub_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::i64s {
1396 self.transmute_i64s_u64s(
1397 self.sub_u64s(self.transmute_u64s_i64s(a), self.transmute_u64s_i64s(b)),
1398 )
1399 }
1400
1401 fn sub_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s;
1402 fn sub_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s;
1403 fn swap_re_im_c32s(self, a: Self::c32s) -> Self::c32s;
1404 fn swap_re_im_c64s(self, a: Self::c64s) -> Self::c64s;
1405 #[inline]
1406 fn transmute_f32s_i32s(self, a: Self::i32s) -> Self::f32s {
1407 cast(a)
1408 }
1409 #[inline]
1410 fn transmute_f32s_u32s(self, a: Self::u32s) -> Self::f32s {
1411 cast(a)
1412 }
1413
1414 #[inline]
1415 fn transmute_f64s_i64s(self, a: Self::i64s) -> Self::f64s {
1416 cast(a)
1417 }
1418 #[inline]
1419 fn transmute_f64s_u64s(self, a: Self::u64s) -> Self::f64s {
1420 cast(a)
1421 }
1422 #[inline]
1423 fn transmute_i32s_f32s(self, a: Self::f32s) -> Self::i32s {
1424 cast(a)
1425 }
1426 #[inline]
1427 fn transmute_i32s_u32s(self, a: Self::u32s) -> Self::i32s {
1428 cast(a)
1429 }
1430 #[inline]
1431 fn transmute_i64s_f64s(self, a: Self::f64s) -> Self::i64s {
1432 cast(a)
1433 }
1434 #[inline]
1435 fn transmute_i64s_u64s(self, a: Self::u64s) -> Self::i64s {
1436 cast(a)
1437 }
1438
1439 #[inline]
1440 fn transmute_u32s_f32s(self, a: Self::f32s) -> Self::u32s {
1441 cast(a)
1442 }
1443 #[inline]
1444 fn transmute_u32s_i32s(self, a: Self::i32s) -> Self::u32s {
1445 cast(a)
1446 }
1447 #[inline]
1448 fn transmute_u64s_f64s(self, a: Self::f64s) -> Self::u64s {
1449 cast(a)
1450 }
1451 #[inline]
1452 fn transmute_u64s_i64s(self, a: Self::i64s) -> Self::u64s {
1453 cast(a)
1454 }
1455
1456 fn vectorize<Op: WithSimd>(self, op: Op) -> Op::Output;
1457 fn widening_mul_u32s(self, a: Self::u32s, b: Self::u32s) -> (Self::u32s, Self::u32s);
1458 fn wrapping_dyn_shl_u32s(self, a: Self::u32s, amount: Self::u32s) -> Self::u32s;
1459 fn wrapping_dyn_shr_u32s(self, a: Self::u32s, amount: Self::u32s) -> Self::u32s;
1460
1461 #[inline]
1462 fn xor_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
1463 self.transmute_f32s_u32s(
1464 self.xor_u32s(self.transmute_u32s_f32s(a), self.transmute_u32s_f32s(b)),
1465 )
1466 }
1467 #[inline]
1468 fn xor_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
1469 self.transmute_f64s_u64s(
1470 self.xor_u64s(self.transmute_u64s_f64s(a), self.transmute_u64s_f64s(b)),
1471 )
1472 }
1473 #[inline]
1474 fn xor_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::i32s {
1475 self.transmute_i32s_u32s(
1476 self.xor_u32s(self.transmute_u32s_i32s(a), self.transmute_u32s_i32s(b)),
1477 )
1478 }
1479 #[inline]
1480 fn xor_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::i64s {
1481 self.transmute_i64s_u64s(
1482 self.xor_u64s(self.transmute_u64s_i64s(a), self.transmute_u64s_i64s(b)),
1483 )
1484 }
1485
1486 fn xor_m32s(self, a: Self::m32s, b: Self::m32s) -> Self::m32s;
1487 fn xor_m64s(self, a: Self::m64s, b: Self::m64s) -> Self::m64s;
1488 fn xor_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s;
1489 fn xor_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s;
1490}
1491
1492pub trait PortableSimd: Simd {}
1493
1494impl PortableSimd for Scalar {}
1495impl PortableSimd for Scalar128b {}
1496impl PortableSimd for Scalar256b {}
1497impl PortableSimd for Scalar512b {}
1498
1499#[derive(Debug, Copy, Clone)]
1500pub struct Scalar;
1501
1502#[derive(Debug, Copy, Clone)]
1503pub struct Scalar128b;
1504#[derive(Debug, Copy, Clone)]
1505pub struct Scalar256b;
1506#[derive(Debug, Copy, Clone)]
1507pub struct Scalar512b;
1508
1509macro_rules! scalar_simd {
1510 ($ty: ty, $register_count: expr, $m32s: ty, $f32s: ty, $i32s: ty, $u32s: ty, $m64s: ty, $f64s: ty, $i64s: ty, $u64s: ty $(,)?) => {
1511 impl Seal for $ty {}
1512 impl Simd for $ty {
1513 type c32s = $f32s;
1514 type c64s = $f64s;
1515 type f32s = $f32s;
1516 type f64s = $f64s;
1517 type i32s = $i32s;
1518 type i64s = $i64s;
1519 type m32s = $m32s;
1520 type m64s = $m64s;
1521 type u32s = $u32s;
1522 type u64s = $u64s;
1523
1524 const REGISTER_COUNT: usize = $register_count;
1525
1526 #[inline]
1527 fn vectorize<Op: WithSimd>(self, op: Op) -> Op::Output {
1528 op.with_simd(self)
1529 }
1530
1531 #[inline]
1532 unsafe fn mask_load_ptr_u32s(
1533 self,
1534 mask: MemMask<Self::m32s>,
1535 ptr: *const u32,
1536 ) -> Self::u32s {
1537 let mut values = [0u32; Self::F32_LANES];
1538 let mask: [m32; Self::F32_LANES] = cast(mask.mask());
1539 for i in 0..Self::F32_LANES {
1540 if mask[i].is_set() {
1541 values[i] = *ptr.add(i);
1542 }
1543 }
1544 cast(values)
1545 }
1546
1547 #[inline]
1548 unsafe fn mask_load_ptr_c32s(
1549 self,
1550 mask: MemMask<Self::m32s>,
1551 ptr: *const c32,
1552 ) -> Self::c32s {
1553 cast(self.mask_load_ptr_u32s(mask, ptr as *const u32))
1554 }
1555
1556 #[inline]
1557 unsafe fn mask_store_ptr_u32s(
1558 self,
1559 mask: MemMask<Self::m32s>,
1560 ptr: *mut u32,
1561 values: Self::u32s,
1562 ) {
1563 let mask: [m32; Self::F32_LANES] = cast(mask.mask());
1564 let values: [u32; Self::F32_LANES] = cast(values);
1565 for i in 0..Self::F32_LANES {
1566 if mask[i].is_set() {
1567 *ptr.add(i) = values[i];
1568 }
1569 }
1570 }
1571
1572 #[inline]
1573 unsafe fn mask_store_ptr_c32s(
1574 self,
1575 mask: MemMask<Self::m32s>,
1576 ptr: *mut c32,
1577 values: Self::c32s,
1578 ) {
1579 self.mask_store_ptr_u32s(mask, ptr as *mut u32, cast(values))
1580 }
1581
1582 #[inline]
1583 unsafe fn mask_load_ptr_u64s(
1584 self,
1585 mask: MemMask<Self::m64s>,
1586 ptr: *const u64,
1587 ) -> Self::u64s {
1588 let mut values = [0u64; Self::F64_LANES];
1589 let mask: [m64; Self::F64_LANES] = cast(mask.mask());
1590 for i in 0..Self::F64_LANES {
1591 if mask[i].is_set() {
1592 values[i] = *ptr.add(i);
1593 }
1594 }
1595 cast(values)
1596 }
1597
1598 #[inline]
1599 unsafe fn mask_load_ptr_c64s(
1600 self,
1601 mask: MemMask<Self::m64s>,
1602 ptr: *const c64,
1603 ) -> Self::c64s {
1604 cast(self.mask_load_ptr_u64s(mask, ptr as *const u64))
1605 }
1606
1607 #[inline]
1608 unsafe fn mask_store_ptr_u64s(
1609 self,
1610 mask: MemMask<Self::m64s>,
1611 ptr: *mut u64,
1612 values: Self::u64s,
1613 ) {
1614 let mask: [m64; Self::F64_LANES] = cast(mask.mask());
1615 let values: [u64; Self::F64_LANES] = cast(values);
1616 for i in 0..Self::F64_LANES {
1617 if mask[i].is_set() {
1618 *ptr.add(i) = values[i];
1619 }
1620 }
1621 }
1622
1623 #[inline]
1624 unsafe fn mask_store_ptr_c64s(
1625 self,
1626 mask: MemMask<Self::m64s>,
1627 ptr: *mut c64,
1628 values: Self::c64s,
1629 ) {
1630 self.mask_store_ptr_u64s(mask, ptr as *mut u64, cast(values))
1631 }
1632
1633 #[inline]
1634 fn partial_load_u32s(self, slice: &[u32]) -> Self::u32s {
1635 let mut values = [0u32; Self::F32_LANES];
1636 for i in 0..Ord::min(values.len(), slice.len()) {
1637 values[i] = slice[i];
1638 }
1639 cast(values)
1640 }
1641
1642 #[inline]
1643 fn partial_store_u32s(self, slice: &mut [u32], values: Self::u32s) {
1644 let values: [u32; Self::F32_LANES] = cast(values);
1645 for i in 0..Ord::min(values.len(), slice.len()) {
1646 slice[i] = values[i];
1647 }
1648 }
1649
1650 #[inline]
1651 fn partial_load_u64s(self, slice: &[u64]) -> Self::u64s {
1652 let mut values = [0u64; Self::F64_LANES];
1653 for i in 0..Ord::min(values.len(), slice.len()) {
1654 values[i] = slice[i];
1655 }
1656 cast(values)
1657 }
1658
1659 #[inline]
1660 fn partial_store_u64s(self, slice: &mut [u64], values: Self::u64s) {
1661 let values: [u64; Self::F64_LANES] = cast(values);
1662 for i in 0..Ord::min(values.len(), slice.len()) {
1663 slice[i] = values[i];
1664 }
1665 }
1666
1667 #[inline]
1668 fn not_m32s(self, a: Self::m32s) -> Self::m32s {
1669 let mut out = [m32::new(false); Self::F32_LANES];
1670 let a: [m32; Self::F32_LANES] = cast(a);
1671 for i in 0..Self::F32_LANES {
1672 out[i] = !a[i];
1673 }
1674 cast(out)
1675 }
1676
1677 #[inline]
1678 fn and_m32s(self, a: Self::m32s, b: Self::m32s) -> Self::m32s {
1679 let mut out = [m32::new(false); Self::F32_LANES];
1680 let a: [m32; Self::F32_LANES] = cast(a);
1681 let b: [m32; Self::F32_LANES] = cast(b);
1682 for i in 0..Self::F32_LANES {
1683 out[i] = a[i] & b[i];
1684 }
1685 cast(out)
1686 }
1687
1688 #[inline]
1689 fn or_m32s(self, a: Self::m32s, b: Self::m32s) -> Self::m32s {
1690 let mut out = [m32::new(false); Self::F32_LANES];
1691 let a: [m32; Self::F32_LANES] = cast(a);
1692 let b: [m32; Self::F32_LANES] = cast(b);
1693 for i in 0..Self::F32_LANES {
1694 out[i] = a[i] | b[i];
1695 }
1696 cast(out)
1697 }
1698
1699 #[inline]
1700 fn xor_m32s(self, a: Self::m32s, b: Self::m32s) -> Self::m32s {
1701 let mut out = [m32::new(false); Self::F32_LANES];
1702 let a: [m32; Self::F32_LANES] = cast(a);
1703 let b: [m32; Self::F32_LANES] = cast(b);
1704 for i in 0..Self::F32_LANES {
1705 out[i] = a[i] ^ b[i];
1706 }
1707 cast(out)
1708 }
1709
1710 #[inline]
1711 fn not_m64s(self, a: Self::m64s) -> Self::m64s {
1712 let mut out = [m64::new(false); Self::F64_LANES];
1713 let a: [m64; Self::F64_LANES] = cast(a);
1714 for i in 0..Self::F64_LANES {
1715 out[i] = !a[i];
1716 }
1717 cast(out)
1718 }
1719
1720 #[inline]
1721 fn and_m64s(self, a: Self::m64s, b: Self::m64s) -> Self::m64s {
1722 let mut out = [m64::new(false); Self::F64_LANES];
1723 let a: [m64; Self::F64_LANES] = cast(a);
1724 let b: [m64; Self::F64_LANES] = cast(b);
1725 for i in 0..Self::F64_LANES {
1726 out[i] = a[i] & b[i];
1727 }
1728 cast(out)
1729 }
1730
1731 #[inline]
1732 fn or_m64s(self, a: Self::m64s, b: Self::m64s) -> Self::m64s {
1733 let mut out = [m64::new(false); Self::F64_LANES];
1734 let a: [m64; Self::F64_LANES] = cast(a);
1735 let b: [m64; Self::F64_LANES] = cast(b);
1736 for i in 0..Self::F64_LANES {
1737 out[i] = a[i] | b[i];
1738 }
1739 cast(out)
1740 }
1741
1742 #[inline]
1743 fn xor_m64s(self, a: Self::m64s, b: Self::m64s) -> Self::m64s {
1744 let mut out = [m64::new(false); Self::F64_LANES];
1745 let a: [m64; Self::F64_LANES] = cast(a);
1746 let b: [m64; Self::F64_LANES] = cast(b);
1747 for i in 0..Self::F64_LANES {
1748 out[i] = a[i] ^ b[i];
1749 }
1750 cast(out)
1751 }
1752
1753 #[inline]
1754 fn not_u32s(self, a: Self::u32s) -> Self::u32s {
1755 let mut out = [0u32; Self::F32_LANES];
1756 let a: [u32; Self::F32_LANES] = cast(a);
1757 for i in 0..Self::F32_LANES {
1758 out[i] = !a[i];
1759 }
1760 cast(out)
1761 }
1762
1763 #[inline]
1764 fn and_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s {
1765 let mut out = [0u32; Self::F32_LANES];
1766 let a: [u32; Self::F32_LANES] = cast(a);
1767 let b: [u32; Self::F32_LANES] = cast(b);
1768 for i in 0..Self::F32_LANES {
1769 out[i] = a[i] & b[i];
1770 }
1771 cast(out)
1772 }
1773
1774 #[inline]
1775 fn or_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s {
1776 let mut out = [0u32; Self::F32_LANES];
1777 let a: [u32; Self::F32_LANES] = cast(a);
1778 let b: [u32; Self::F32_LANES] = cast(b);
1779 for i in 0..Self::F32_LANES {
1780 out[i] = a[i] | b[i];
1781 }
1782 cast(out)
1783 }
1784
1785 #[inline]
1786 fn xor_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s {
1787 let mut out = [0u32; Self::F32_LANES];
1788 let a: [u32; Self::F32_LANES] = cast(a);
1789 let b: [u32; Self::F32_LANES] = cast(b);
1790 for i in 0..Self::F32_LANES {
1791 out[i] = a[i] ^ b[i];
1792 }
1793 cast(out)
1794 }
1795
1796 #[inline]
1797 fn not_u64s(self, a: Self::u64s) -> Self::u64s {
1798 let mut out = [0u64; Self::F64_LANES];
1799 let a: [u64; Self::F64_LANES] = cast(a);
1800 for i in 0..Self::F64_LANES {
1801 out[i] = !a[i];
1802 }
1803 cast(out)
1804 }
1805
1806 #[inline]
1807 fn and_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s {
1808 let mut out = [0u64; Self::F64_LANES];
1809 let a: [u64; Self::F64_LANES] = cast(a);
1810 let b: [u64; Self::F64_LANES] = cast(b);
1811 for i in 0..Self::F64_LANES {
1812 out[i] = a[i] & b[i];
1813 }
1814 cast(out)
1815 }
1816
1817 #[inline]
1818 fn or_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s {
1819 let mut out = [0u64; Self::F64_LANES];
1820 let a: [u64; Self::F64_LANES] = cast(a);
1821 let b: [u64; Self::F64_LANES] = cast(b);
1822 for i in 0..Self::F64_LANES {
1823 out[i] = a[i] | b[i];
1824 }
1825 cast(out)
1826 }
1827
1828 #[inline]
1829 fn xor_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s {
1830 let mut out = [0u64; Self::F64_LANES];
1831 let a: [u64; Self::F64_LANES] = cast(a);
1832 let b: [u64; Self::F64_LANES] = cast(b);
1833 for i in 0..Self::F64_LANES {
1834 out[i] = a[i] ^ b[i];
1835 }
1836 cast(out)
1837 }
1838
1839 #[inline]
1840 fn select_u32s_m32s(
1841 self,
1842 mask: Self::m32s,
1843 if_true: Self::u32s,
1844 if_false: Self::u32s,
1845 ) -> Self::u32s {
1846 let mut out = [0u32; Self::F32_LANES];
1847 let mask: [m32; Self::F32_LANES] = cast(mask);
1848 let if_true: [u32; Self::F32_LANES] = cast(if_true);
1849 let if_false: [u32; Self::F32_LANES] = cast(if_false);
1850
1851 for i in 0..Self::F32_LANES {
1852 out[i] = if mask[i].is_set() {
1853 if_true[i]
1854 } else {
1855 if_false[i]
1856 };
1857 }
1858
1859 cast(out)
1860 }
1861
1862 #[inline]
1863 fn select_u64s_m64s(
1864 self,
1865 mask: Self::m64s,
1866 if_true: Self::u64s,
1867 if_false: Self::u64s,
1868 ) -> Self::u64s {
1869 let mut out = [0u64; Self::F64_LANES];
1870 let mask: [m64; Self::F64_LANES] = cast(mask);
1871 let if_true: [u64; Self::F64_LANES] = cast(if_true);
1872 let if_false: [u64; Self::F64_LANES] = cast(if_false);
1873
1874 for i in 0..Self::F64_LANES {
1875 out[i] = if mask[i].is_set() {
1876 if_true[i]
1877 } else {
1878 if_false[i]
1879 };
1880 }
1881
1882 cast(out)
1883 }
1884
1885 #[inline]
1886 fn splat_u32s(self, value: u32) -> Self::u32s {
1887 cast([value; Self::F32_LANES])
1888 }
1889
1890 #[inline]
1891 fn add_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s {
1892 let mut out = [0u32; Self::F32_LANES];
1893 let a: [u32; Self::F32_LANES] = cast(a);
1894 let b: [u32; Self::F32_LANES] = cast(b);
1895 for i in 0..Self::F32_LANES {
1896 out[i] = a[i].wrapping_add(b[i]);
1897 }
1898 cast(out)
1899 }
1900
1901 #[inline]
1902 fn sub_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s {
1903 let mut out = [0u32; Self::F32_LANES];
1904 let a: [u32; Self::F32_LANES] = cast(a);
1905 let b: [u32; Self::F32_LANES] = cast(b);
1906 for i in 0..Self::F32_LANES {
1907 out[i] = a[i].wrapping_sub(b[i]);
1908 }
1909 cast(out)
1910 }
1911
1912 #[inline]
1913 fn less_than_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::m32s {
1914 let mut out = [m32::new(false); Self::F32_LANES];
1915 let a: [u32; Self::F32_LANES] = cast(a);
1916 let b: [u32; Self::F32_LANES] = cast(b);
1917 for i in 0..Self::F32_LANES {
1918 out[i] = m32::new(a[i] < b[i]);
1919 }
1920 cast(out)
1921 }
1922
1923 #[inline]
1924 fn greater_than_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::m32s {
1925 let mut out = [m32::new(false); Self::F32_LANES];
1926 let a: [u32; Self::F32_LANES] = cast(a);
1927 let b: [u32; Self::F32_LANES] = cast(b);
1928 for i in 0..Self::F32_LANES {
1929 out[i] = m32::new(a[i] > b[i]);
1930 }
1931 cast(out)
1932 }
1933
1934 #[inline]
1935 fn less_than_or_equal_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::m32s {
1936 let mut out = [m32::new(false); Self::F32_LANES];
1937 let a: [u32; Self::F32_LANES] = cast(a);
1938 let b: [u32; Self::F32_LANES] = cast(b);
1939 for i in 0..Self::F32_LANES {
1940 out[i] = m32::new(a[i] <= b[i]);
1941 }
1942 cast(out)
1943 }
1944
1945 #[inline]
1946 fn greater_than_or_equal_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::m32s {
1947 let mut out = [m32::new(false); Self::F32_LANES];
1948 let a: [u32; Self::F32_LANES] = cast(a);
1949 let b: [u32; Self::F32_LANES] = cast(b);
1950 for i in 0..Self::F32_LANES {
1951 out[i] = m32::new(a[i] >= b[i]);
1952 }
1953 cast(out)
1954 }
1955
1956 #[inline]
1957 fn less_than_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::m32s {
1958 let mut out = [m32::new(false); Self::F32_LANES];
1959 let a: [i32; Self::F32_LANES] = cast(a);
1960 let b: [i32; Self::F32_LANES] = cast(b);
1961 for i in 0..Self::F32_LANES {
1962 out[i] = m32::new(a[i] < b[i]);
1963 }
1964 cast(out)
1965 }
1966
1967 #[inline]
1968 fn greater_than_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::m32s {
1969 let mut out = [m32::new(false); Self::F32_LANES];
1970 let a: [i32; Self::F32_LANES] = cast(a);
1971 let b: [i32; Self::F32_LANES] = cast(b);
1972 for i in 0..Self::F32_LANES {
1973 out[i] = m32::new(a[i] > b[i]);
1974 }
1975 cast(out)
1976 }
1977
1978 #[inline]
1979 fn less_than_or_equal_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::m32s {
1980 let mut out = [m32::new(false); Self::F32_LANES];
1981 let a: [i32; Self::F32_LANES] = cast(a);
1982 let b: [i32; Self::F32_LANES] = cast(b);
1983 for i in 0..Self::F32_LANES {
1984 out[i] = m32::new(a[i] <= b[i]);
1985 }
1986 cast(out)
1987 }
1988
1989 #[inline]
1990 fn greater_than_or_equal_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::m32s {
1991 let mut out = [m32::new(false); Self::F32_LANES];
1992 let a: [i32; Self::F32_LANES] = cast(a);
1993 let b: [i32; Self::F32_LANES] = cast(b);
1994 for i in 0..Self::F32_LANES {
1995 out[i] = m32::new(a[i] >= b[i]);
1996 }
1997 cast(out)
1998 }
1999
2000 #[inline]
2001 fn wrapping_dyn_shl_u32s(self, a: Self::u32s, amount: Self::u32s) -> Self::u32s {
2002 let mut out = [0u32; Self::F32_LANES];
2003 let a: [u32; Self::F32_LANES] = cast(a);
2004 let b: [u32; Self::F32_LANES] = cast(amount);
2005 for i in 0..Self::F32_LANES {
2006 out[i] = a[i].wrapping_shl(b[i]);
2007 }
2008 cast(out)
2009 }
2010
2011 #[inline]
2012 fn wrapping_dyn_shr_u32s(self, a: Self::u32s, amount: Self::u32s) -> Self::u32s {
2013 let mut out = [0u32; Self::F32_LANES];
2014 let a: [u32; Self::F32_LANES] = cast(a);
2015 let b: [u32; Self::F32_LANES] = cast(amount);
2016 for i in 0..Self::F32_LANES {
2017 out[i] = a[i].wrapping_shr(b[i]);
2018 }
2019 cast(out)
2020 }
2021
2022 #[inline]
2023 fn widening_mul_u32s(self, a: Self::u32s, b: Self::u32s) -> (Self::u32s, Self::u32s) {
2024 let mut lo = [0u32; Self::F32_LANES];
2025 let mut hi = [0u32; Self::F32_LANES];
2026 let a: [u32; Self::F32_LANES] = cast(a);
2027 let b: [u32; Self::F32_LANES] = cast(b);
2028 for i in 0..Self::F32_LANES {
2029 let m = a[i] as u64 * b[i] as u64;
2030
2031 (lo[i], hi[i]) = (m as u32, (m >> 32) as u32);
2032 }
2033 (cast(lo), cast(hi))
2034 }
2035
2036 #[inline]
2037 fn splat_u64s(self, value: u64) -> Self::u64s {
2038 cast([value; Self::F64_LANES])
2039 }
2040
2041 #[inline]
2042 fn add_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s {
2043 let mut out = [0u64; Self::F64_LANES];
2044 let a: [u64; Self::F64_LANES] = cast(a);
2045 let b: [u64; Self::F64_LANES] = cast(b);
2046 for i in 0..Self::F64_LANES {
2047 out[i] = a[i].wrapping_add(b[i]);
2048 }
2049 cast(out)
2050 }
2051
2052 #[inline]
2053 fn sub_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s {
2054 let mut out = [0u64; Self::F64_LANES];
2055 let a: [u64; Self::F64_LANES] = cast(a);
2056 let b: [u64; Self::F64_LANES] = cast(b);
2057 for i in 0..Self::F64_LANES {
2058 out[i] = a[i].wrapping_sub(b[i]);
2059 }
2060 cast(out)
2061 }
2062
2063 #[inline]
2064 fn less_than_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::m64s {
2065 let mut out = [m64::new(false); Self::F64_LANES];
2066 let a: [u64; Self::F64_LANES] = cast(a);
2067 let b: [u64; Self::F64_LANES] = cast(b);
2068 for i in 0..Self::F64_LANES {
2069 out[i] = m64::new(a[i] < b[i]);
2070 }
2071 cast(out)
2072 }
2073
2074 #[inline]
2075 fn greater_than_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::m64s {
2076 let mut out = [m64::new(false); Self::F64_LANES];
2077 let a: [u64; Self::F64_LANES] = cast(a);
2078 let b: [u64; Self::F64_LANES] = cast(b);
2079 for i in 0..Self::F64_LANES {
2080 out[i] = m64::new(a[i] > b[i]);
2081 }
2082 cast(out)
2083 }
2084
2085 #[inline]
2086 fn less_than_or_equal_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::m64s {
2087 let mut out = [m64::new(false); Self::F64_LANES];
2088 let a: [u64; Self::F64_LANES] = cast(a);
2089 let b: [u64; Self::F64_LANES] = cast(b);
2090 for i in 0..Self::F64_LANES {
2091 out[i] = m64::new(a[i] <= b[i]);
2092 }
2093 cast(out)
2094 }
2095
2096 #[inline]
2097 fn greater_than_or_equal_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::m64s {
2098 let mut out = [m64::new(false); Self::F64_LANES];
2099 let a: [u64; Self::F64_LANES] = cast(a);
2100 let b: [u64; Self::F64_LANES] = cast(b);
2101 for i in 0..Self::F64_LANES {
2102 out[i] = m64::new(a[i] >= b[i]);
2103 }
2104 cast(out)
2105 }
2106
2107 #[inline]
2108 fn less_than_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::m64s {
2109 let mut out = [m64::new(false); Self::F64_LANES];
2110 let a: [i64; Self::F64_LANES] = cast(a);
2111 let b: [i64; Self::F64_LANES] = cast(b);
2112 for i in 0..Self::F64_LANES {
2113 out[i] = m64::new(a[i] < b[i]);
2114 }
2115 cast(out)
2116 }
2117
2118 #[inline]
2119 fn greater_than_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::m64s {
2120 let mut out = [m64::new(false); Self::F64_LANES];
2121 let a: [i64; Self::F64_LANES] = cast(a);
2122 let b: [i64; Self::F64_LANES] = cast(b);
2123 for i in 0..Self::F64_LANES {
2124 out[i] = m64::new(a[i] > b[i]);
2125 }
2126 cast(out)
2127 }
2128
2129 #[inline]
2130 fn less_than_or_equal_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::m64s {
2131 let mut out = [m64::new(false); Self::F64_LANES];
2132 let a: [i64; Self::F64_LANES] = cast(a);
2133 let b: [i64; Self::F64_LANES] = cast(b);
2134 for i in 0..Self::F64_LANES {
2135 out[i] = m64::new(a[i] <= b[i]);
2136 }
2137 cast(out)
2138 }
2139
2140 #[inline]
2141 fn greater_than_or_equal_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::m64s {
2142 let mut out = [m64::new(false); Self::F64_LANES];
2143 let a: [i64; Self::F64_LANES] = cast(a);
2144 let b: [i64; Self::F64_LANES] = cast(b);
2145 for i in 0..Self::F64_LANES {
2146 out[i] = m64::new(a[i] >= b[i]);
2147 }
2148 cast(out)
2149 }
2150
2151 #[inline]
2152 fn splat_f32s(self, value: f32) -> Self::f32s {
2153 cast([value; Self::F32_LANES])
2154 }
2155
2156 #[inline]
2157 fn add_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
2158 let mut out = [0.0f32; Self::F32_LANES];
2159 let a: [f32; Self::F32_LANES] = cast(a);
2160 let b: [f32; Self::F32_LANES] = cast(b);
2161
2162 for i in 0..Self::F32_LANES {
2163 out[i] = a[i] + b[i];
2164 }
2165
2166 cast(out)
2167 }
2168
2169 #[inline]
2170 fn sub_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
2171 let mut out = [0.0f32; Self::F32_LANES];
2172 let a: [f32; Self::F32_LANES] = cast(a);
2173 let b: [f32; Self::F32_LANES] = cast(b);
2174
2175 for i in 0..Self::F32_LANES {
2176 out[i] = a[i] - b[i];
2177 }
2178
2179 cast(out)
2180 }
2181
2182 #[inline]
2183 fn mul_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
2184 let mut out = [0.0f32; Self::F32_LANES];
2185 let a: [f32; Self::F32_LANES] = cast(a);
2186 let b: [f32; Self::F32_LANES] = cast(b);
2187
2188 for i in 0..Self::F32_LANES {
2189 out[i] = a[i] * b[i];
2190 }
2191
2192 cast(out)
2193 }
2194
2195 #[inline]
2196 fn div_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
2197 let mut out = [0.0f32; Self::F32_LANES];
2198 let a: [f32; Self::F32_LANES] = cast(a);
2199 let b: [f32; Self::F32_LANES] = cast(b);
2200
2201 for i in 0..Self::F32_LANES {
2202 out[i] = a[i] / b[i];
2203 }
2204
2205 cast(out)
2206 }
2207
2208 #[inline]
2209 fn mul_add_f32s(self, a: Self::f32s, b: Self::f32s, c: Self::f32s) -> Self::f32s {
2210 let mut out = [0.0f32; Self::F32_LANES];
2211 let a: [f32; Self::F32_LANES] = cast(a);
2212 let b: [f32; Self::F32_LANES] = cast(b);
2213 let c: [f32; Self::F32_LANES] = cast(c);
2214
2215 for i in 0..Self::F32_LANES {
2216 out[i] = fma_f32(a[i], b[i], c[i]);
2217 }
2218
2219 cast(out)
2220 }
2221
2222 #[inline]
2223 fn equal_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::m32s {
2224 let mut out = [m32::new(false); Self::F32_LANES];
2225 let a: [f32; Self::F32_LANES] = cast(a);
2226 let b: [f32; Self::F32_LANES] = cast(b);
2227
2228 for i in 0..Self::F32_LANES {
2229 out[i] = m32::new(a[i] == b[i]);
2230 }
2231
2232 cast(out)
2233 }
2234
2235 #[inline]
2236 fn less_than_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::m32s {
2237 let mut out = [m32::new(false); Self::F32_LANES];
2238 let a: [f32; Self::F32_LANES] = cast(a);
2239 let b: [f32; Self::F32_LANES] = cast(b);
2240
2241 for i in 0..Self::F32_LANES {
2242 out[i] = m32::new(a[i] < b[i]);
2243 }
2244
2245 cast(out)
2246 }
2247
2248 #[inline]
2249 fn less_than_or_equal_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::m32s {
2250 let mut out = [m32::new(false); Self::F32_LANES];
2251 let a: [f32; Self::F32_LANES] = cast(a);
2252 let b: [f32; Self::F32_LANES] = cast(b);
2253
2254 for i in 0..Self::F32_LANES {
2255 out[i] = m32::new(a[i] <= b[i]);
2256 }
2257
2258 cast(out)
2259 }
2260
2261 #[inline]
2262 fn min_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
2263 let mut out = [0.0f32; Self::F32_LANES];
2264 let a: [f32; Self::F32_LANES] = cast(a);
2265 let b: [f32; Self::F32_LANES] = cast(b);
2266
2267 for i in 0..Self::F32_LANES {
2268 out[i] = f32::min(a[i], b[i]);
2269 }
2270
2271 cast(out)
2272 }
2273
2274 #[inline]
2275 fn max_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
2276 let mut out = [0.0f32; Self::F32_LANES];
2277 let a: [f32; Self::F32_LANES] = cast(a);
2278 let b: [f32; Self::F32_LANES] = cast(b);
2279
2280 for i in 0..Self::F32_LANES {
2281 out[i] = f32::max(a[i], b[i]);
2282 }
2283
2284 cast(out)
2285 }
2286
2287 #[inline]
2288 fn reduce_sum_f32s(self, a: Self::f32s) -> f32 {
2289 let mut a: [f32; Self::F32_LANES] = cast(a);
2290
2291 let mut n = Self::F32_LANES;
2292 while n > 1 {
2293 n /= 2;
2294 for i in 0..n {
2295 a[i] += a[i + n];
2296 }
2297 }
2298
2299 a[0]
2300 }
2301
2302 #[inline]
2303 fn reduce_product_f32s(self, a: Self::f32s) -> f32 {
2304 let mut a: [f32; Self::F32_LANES] = cast(a);
2305
2306 let mut n = Self::F32_LANES;
2307 while n > 1 {
2308 n /= 2;
2309 for i in 0..n {
2310 a[i] *= a[i + n];
2311 }
2312 }
2313
2314 a[0]
2315 }
2316
2317 #[inline]
2318 fn reduce_min_f32s(self, a: Self::f32s) -> f32 {
2319 let mut a: [f32; Self::F32_LANES] = cast(a);
2320
2321 let mut n = Self::F32_LANES;
2322 while n > 1 {
2323 n /= 2;
2324 for i in 0..n {
2325 a[i] = f32::min(a[i], a[i + n]);
2326 }
2327 }
2328
2329 a[0]
2330 }
2331
2332 #[inline]
2333 fn reduce_max_f32s(self, a: Self::f32s) -> f32 {
2334 let mut a: [f32; Self::F32_LANES] = cast(a);
2335
2336 let mut n = Self::F32_LANES;
2337 while n > 1 {
2338 n /= 2;
2339 for i in 0..n {
2340 a[i] = f32::max(a[i], a[i + n]);
2341 }
2342 }
2343
2344 a[0]
2345 }
2346
2347 #[inline]
2348 fn splat_c32s(self, value: c32) -> Self::c32s {
2349 cast([value; Self::C32_LANES])
2350 }
2351
2352 #[inline]
2353 fn conj_c32s(self, a: Self::c32s) -> Self::c32s {
2354 let mut out = [c32::ZERO; Self::C32_LANES];
2355 let a: [c32; Self::C32_LANES] = cast(a);
2356
2357 for i in 0..Self::C32_LANES {
2358 out[i] = c32::new(a[i].re, -a[i].im);
2359 }
2360
2361 cast(out)
2362 }
2363
2364 #[inline]
2365 fn neg_c32s(self, a: Self::c32s) -> Self::c32s {
2366 let mut out = [c32::ZERO; Self::C32_LANES];
2367 let a: [c32; Self::C32_LANES] = cast(a);
2368
2369 for i in 0..Self::C32_LANES {
2370 out[i] = c32::new(-a[i].re, -a[i].im);
2371 }
2372
2373 cast(out)
2374 }
2375
2376 #[inline]
2377 fn swap_re_im_c32s(self, a: Self::c32s) -> Self::c32s {
2378 let mut out = [c32::ZERO; Self::C32_LANES];
2379 let a: [c32; Self::C32_LANES] = cast(a);
2380
2381 for i in 0..Self::C32_LANES {
2382 out[i] = c32::new(a[i].im, a[i].re);
2383 }
2384
2385 cast(out)
2386 }
2387
2388 #[inline]
2389 fn add_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
2390 let mut out = [c32::ZERO; Self::C32_LANES];
2391 let a: [c32; Self::C32_LANES] = cast(a);
2392 let b: [c32; Self::C32_LANES] = cast(b);
2393
2394 for i in 0..Self::C32_LANES {
2395 out[i] = c32::new(a[i].re + b[i].re, a[i].im + b[i].im);
2396 }
2397
2398 cast(out)
2399 }
2400
2401 #[inline]
2402 fn sub_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
2403 let mut out = [c32::ZERO; Self::C32_LANES];
2404 let a: [c32; Self::C32_LANES] = cast(a);
2405 let b: [c32; Self::C32_LANES] = cast(b);
2406
2407 for i in 0..Self::C32_LANES {
2408 out[i] = c32::new(a[i].re - b[i].re, a[i].im - b[i].im);
2409 }
2410
2411 cast(out)
2412 }
2413
2414 #[inline]
2415 fn mul_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
2416 let mut out = [c32::ZERO; Self::C32_LANES];
2417 let a: [c32; Self::C32_LANES] = cast(a);
2418 let b: [c32; Self::C32_LANES] = cast(b);
2419
2420 for i in 0..Self::C32_LANES {
2421 out[i].re = fma_f32(a[i].re, b[i].re, -(a[i].im * b[i].im));
2422 out[i].im = fma_f32(a[i].re, b[i].im, a[i].im * b[i].re);
2423 }
2424
2425 cast(out)
2426 }
2427
2428 #[inline]
2429 fn conj_mul_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
2430 let mut out = [c32::ZERO; Self::C32_LANES];
2431 let a: [c32; Self::C32_LANES] = cast(a);
2432 let b: [c32; Self::C32_LANES] = cast(b);
2433
2434 for i in 0..Self::C32_LANES {
2435 out[i].re = fma_f32(a[i].re, b[i].re, a[i].im * b[i].im);
2436 out[i].im = fma_f32(a[i].re, b[i].im, -(a[i].im * b[i].re));
2437 }
2438
2439 cast(out)
2440 }
2441
2442 #[inline]
2443 fn mul_add_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
2444 let mut out = [c32::ZERO; Self::C32_LANES];
2445 let a: [c32; Self::C32_LANES] = cast(a);
2446 let b: [c32; Self::C32_LANES] = cast(b);
2447 let c: [c32; Self::C32_LANES] = cast(c);
2448
2449 for i in 0..Self::C32_LANES {
2450 out[i].re = fma_f32(a[i].re, b[i].re, -fma_f32(a[i].im, b[i].im, -c[i].re));
2451 out[i].im = fma_f32(a[i].re, b[i].im, fma_f32(a[i].im, b[i].re, c[i].im));
2452 }
2453
2454 cast(out)
2455 }
2456
2457 #[inline]
2458 fn conj_mul_add_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
2459 let mut out = [c32::ZERO; Self::C32_LANES];
2460 let a: [c32; Self::C32_LANES] = cast(a);
2461 let b: [c32; Self::C32_LANES] = cast(b);
2462 let c: [c32; Self::C32_LANES] = cast(c);
2463
2464 for i in 0..Self::C32_LANES {
2465 out[i].re = fma_f32(a[i].re, b[i].re, fma_f32(a[i].im, b[i].im, c[i].re));
2466 out[i].im = fma_f32(a[i].re, b[i].im, -fma_f32(a[i].im, b[i].re, -c[i].im));
2467 }
2468
2469 cast(out)
2470 }
2471
2472 #[inline]
2473 fn abs2_c32s(self, a: Self::c32s) -> Self::c32s {
2474 let mut out = [c32::ZERO; Self::C32_LANES];
2475 let a: [c32; Self::C32_LANES] = cast(a);
2476
2477 for i in 0..Self::C32_LANES {
2478 let x = a[i].re * a[i].re + a[i].im * a[i].im;
2479 out[i].re = x;
2480 out[i].im = x;
2481 }
2482
2483 cast(out)
2484 }
2485
2486 #[inline]
2487 fn abs_max_c32s(self, a: Self::c32s) -> Self::c32s {
2488 let mut out = [c32::ZERO; Self::C32_LANES];
2489 let a: [c32; Self::C32_LANES] = cast(self.abs_f32s(a));
2490
2491 for i in 0..Self::C32_LANES {
2492 let x = f32::max(a[i].re, a[i].im);
2493 out[i].re = x;
2494 out[i].im = x;
2495 }
2496
2497 cast(out)
2498 }
2499
2500 #[inline]
2501 fn reduce_sum_c32s(self, a: Self::c32s) -> c32 {
2502 let mut a: [c32; Self::C32_LANES] = cast(a);
2503
2504 let mut n = Self::C32_LANES;
2505 while n > 1 {
2506 n /= 2;
2507 for i in 0..n {
2508 a[i].re += a[i + n].re;
2509 a[i].im += a[i + n].im;
2510 }
2511 }
2512
2513 a[0]
2514 }
2515
2516 #[inline]
2517 fn reduce_min_c32s(self, a: Self::c32s) -> c32 {
2518 let mut a: [c32; Self::C32_LANES] = cast(a);
2519
2520 let mut n = Self::C32_LANES;
2521 while n > 1 {
2522 n /= 2;
2523 for i in 0..n {
2524 a[i].re = f32::min(a[i].re, a[i + n].re);
2525 a[i].im = f32::min(a[i].im, a[i + n].im);
2526 }
2527 }
2528
2529 a[0]
2530 }
2531
2532 #[inline]
2533 fn reduce_max_c32s(self, a: Self::c32s) -> c32 {
2534 let mut a: [c32; Self::C32_LANES] = cast(a);
2535
2536 let mut n = Self::C32_LANES;
2537 while n > 1 {
2538 n /= 2;
2539 for i in 0..n {
2540 a[i].re = f32::max(a[i].re, a[i + n].re);
2541 a[i].im = f32::max(a[i].im, a[i + n].im);
2542 }
2543 }
2544
2545 a[0]
2546 }
2547
2548 #[inline]
2549 fn rotate_right_u32s(self, a: Self::u32s, amount: usize) -> Self::u32s {
2550 let mut a: [u32; Self::F32_LANES] = cast(a);
2551 let amount = amount % Self::F32_LANES;
2552 a.rotate_right(amount);
2553 cast(a)
2554 }
2555
2556 #[inline]
2557 fn rotate_right_c32s(self, a: Self::c32s, amount: usize) -> Self::c32s {
2558 let mut a: [c32; Self::C32_LANES] = cast(a);
2559 let amount = amount % Self::C32_LANES;
2560 a.rotate_right(amount);
2561 cast(a)
2562 }
2563
2564 #[inline]
2565 fn splat_f64s(self, value: f64) -> Self::f64s {
2566 cast([value; Self::F64_LANES])
2567 }
2568
2569 #[inline]
2570 fn add_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
2571 let mut out = [0.0f64; Self::F64_LANES];
2572 let a: [f64; Self::F64_LANES] = cast(a);
2573 let b: [f64; Self::F64_LANES] = cast(b);
2574
2575 for i in 0..Self::F64_LANES {
2576 out[i] = a[i] + b[i];
2577 }
2578
2579 cast(out)
2580 }
2581
2582 #[inline]
2583 fn sub_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
2584 let mut out = [0.0f64; Self::F64_LANES];
2585 let a: [f64; Self::F64_LANES] = cast(a);
2586 let b: [f64; Self::F64_LANES] = cast(b);
2587
2588 for i in 0..Self::F64_LANES {
2589 out[i] = a[i] - b[i];
2590 }
2591
2592 cast(out)
2593 }
2594
2595 #[inline]
2596 fn mul_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
2597 let mut out = [0.0f64; Self::F64_LANES];
2598 let a: [f64; Self::F64_LANES] = cast(a);
2599 let b: [f64; Self::F64_LANES] = cast(b);
2600
2601 for i in 0..Self::F64_LANES {
2602 out[i] = a[i] * b[i];
2603 }
2604
2605 cast(out)
2606 }
2607
2608 #[inline]
2609 fn div_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
2610 let mut out = [0.0f64; Self::F64_LANES];
2611 let a: [f64; Self::F64_LANES] = cast(a);
2612 let b: [f64; Self::F64_LANES] = cast(b);
2613
2614 for i in 0..Self::F64_LANES {
2615 out[i] = a[i] / b[i];
2616 }
2617
2618 cast(out)
2619 }
2620
2621 #[inline]
2622 fn mul_add_f64s(self, a: Self::f64s, b: Self::f64s, c: Self::f64s) -> Self::f64s {
2623 let mut out = [0.0f64; Self::F64_LANES];
2624 let a: [f64; Self::F64_LANES] = cast(a);
2625 let b: [f64; Self::F64_LANES] = cast(b);
2626 let c: [f64; Self::F64_LANES] = cast(c);
2627
2628 for i in 0..Self::F64_LANES {
2629 out[i] = fma_f64(a[i], b[i], c[i]);
2630 }
2631
2632 cast(out)
2633 }
2634
2635 #[inline]
2636 fn equal_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::m64s {
2637 let mut out = [m64::new(false); Self::F64_LANES];
2638 let a: [f64; Self::F64_LANES] = cast(a);
2639 let b: [f64; Self::F64_LANES] = cast(b);
2640
2641 for i in 0..Self::F64_LANES {
2642 out[i] = m64::new(a[i] == b[i]);
2643 }
2644
2645 cast(out)
2646 }
2647
2648 #[inline]
2649 fn less_than_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::m64s {
2650 let mut out = [m64::new(false); Self::F64_LANES];
2651 let a: [f64; Self::F64_LANES] = cast(a);
2652 let b: [f64; Self::F64_LANES] = cast(b);
2653
2654 for i in 0..Self::F64_LANES {
2655 out[i] = m64::new(a[i] < b[i]);
2656 }
2657
2658 cast(out)
2659 }
2660
2661 #[inline]
2662 fn less_than_or_equal_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::m64s {
2663 let mut out = [m64::new(false); Self::F64_LANES];
2664 let a: [f64; Self::F64_LANES] = cast(a);
2665 let b: [f64; Self::F64_LANES] = cast(b);
2666
2667 for i in 0..Self::F64_LANES {
2668 out[i] = m64::new(a[i] <= b[i]);
2669 }
2670
2671 cast(out)
2672 }
2673
2674 #[inline]
2675 fn min_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
2676 let mut out = [0.0f64; Self::F64_LANES];
2677 let a: [f64; Self::F64_LANES] = cast(a);
2678 let b: [f64; Self::F64_LANES] = cast(b);
2679
2680 for i in 0..Self::F64_LANES {
2681 out[i] = f64::min(a[i], b[i]);
2682 }
2683
2684 cast(out)
2685 }
2686
2687 #[inline]
2688 fn max_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
2689 let mut out = [0.0f64; Self::F64_LANES];
2690 let a: [f64; Self::F64_LANES] = cast(a);
2691 let b: [f64; Self::F64_LANES] = cast(b);
2692
2693 for i in 0..Self::F64_LANES {
2694 out[i] = f64::max(a[i], b[i]);
2695 }
2696
2697 cast(out)
2698 }
2699
2700 #[inline]
2701 fn reduce_sum_f64s(self, a: Self::f64s) -> f64 {
2702 let mut a: [f64; Self::F64_LANES] = cast(a);
2703
2704 let mut n = Self::F64_LANES;
2705 while n > 1 {
2706 n /= 2;
2707 for i in 0..n {
2708 a[i] += a[i + n];
2709 }
2710 }
2711
2712 a[0]
2713 }
2714
2715 #[inline]
2716 fn reduce_product_f64s(self, a: Self::f64s) -> f64 {
2717 let mut a: [f64; Self::F64_LANES] = cast(a);
2718
2719 let mut n = Self::F64_LANES;
2720 while n > 1 {
2721 n /= 2;
2722 for i in 0..n {
2723 a[i] *= a[i + n];
2724 }
2725 }
2726
2727 a[0]
2728 }
2729
2730 #[inline]
2731 fn reduce_min_f64s(self, a: Self::f64s) -> f64 {
2732 let mut a: [f64; Self::F64_LANES] = cast(a);
2733
2734 let mut n = Self::F64_LANES;
2735 while n > 1 {
2736 n /= 2;
2737 for i in 0..n {
2738 a[i] = f64::min(a[i], a[i + n]);
2739 }
2740 }
2741
2742 a[0]
2743 }
2744
2745 #[inline]
2746 fn reduce_max_f64s(self, a: Self::f64s) -> f64 {
2747 let mut a: [f64; Self::F64_LANES] = cast(a);
2748
2749 let mut n = Self::F64_LANES;
2750 while n > 1 {
2751 n /= 2;
2752 for i in 0..n {
2753 a[i] = f64::max(a[i], a[i + n]);
2754 }
2755 }
2756
2757 a[0]
2758 }
2759
2760 #[inline]
2761 fn splat_c64s(self, value: c64) -> Self::c64s {
2762 cast([value; Self::C64_LANES])
2763 }
2764
2765 #[inline]
2766 fn conj_c64s(self, a: Self::c64s) -> Self::c64s {
2767 let mut out = [c64::ZERO; Self::C64_LANES];
2768 let a: [c64; Self::C64_LANES] = cast(a);
2769
2770 for i in 0..Self::C64_LANES {
2771 out[i] = c64::new(a[i].re, -a[i].im);
2772 }
2773
2774 cast(out)
2775 }
2776
2777 #[inline]
2778 fn neg_c64s(self, a: Self::c64s) -> Self::c64s {
2779 let mut out = [c64::ZERO; Self::C64_LANES];
2780 let a: [c64; Self::C64_LANES] = cast(a);
2781
2782 for i in 0..Self::C64_LANES {
2783 out[i] = c64::new(-a[i].re, -a[i].im);
2784 }
2785
2786 cast(out)
2787 }
2788
2789 #[inline]
2790 fn swap_re_im_c64s(self, a: Self::c64s) -> Self::c64s {
2791 let mut out = [c64::ZERO; Self::C64_LANES];
2792 let a: [c64; Self::C64_LANES] = cast(a);
2793
2794 for i in 0..Self::C64_LANES {
2795 out[i] = c64::new(a[i].im, a[i].re);
2796 }
2797
2798 cast(out)
2799 }
2800
2801 #[inline]
2802 fn add_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
2803 let mut out = [c64::ZERO; Self::C64_LANES];
2804 let a: [c64; Self::C64_LANES] = cast(a);
2805 let b: [c64; Self::C64_LANES] = cast(b);
2806
2807 for i in 0..Self::C64_LANES {
2808 out[i] = c64::new(a[i].re + b[i].re, a[i].im + b[i].im);
2809 }
2810
2811 cast(out)
2812 }
2813
2814 #[inline]
2815 fn sub_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
2816 let mut out = [c64::ZERO; Self::C64_LANES];
2817 let a: [c64; Self::C64_LANES] = cast(a);
2818 let b: [c64; Self::C64_LANES] = cast(b);
2819
2820 for i in 0..Self::C64_LANES {
2821 out[i] = c64::new(a[i].re - b[i].re, a[i].im - b[i].im);
2822 }
2823
2824 cast(out)
2825 }
2826
2827 #[inline]
2828 fn mul_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
2829 let mut out = [c64::ZERO; Self::C64_LANES];
2830 let a: [c64; Self::C64_LANES] = cast(a);
2831 let b: [c64; Self::C64_LANES] = cast(b);
2832
2833 for i in 0..Self::C64_LANES {
2834 out[i].re = fma_f64(a[i].re, b[i].re, -(a[i].im * b[i].im));
2835 out[i].im = fma_f64(a[i].re, b[i].im, a[i].im * b[i].re);
2836 }
2837
2838 cast(out)
2839 }
2840
2841 #[inline]
2842 fn conj_mul_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
2843 let mut out = [c64::ZERO; Self::C64_LANES];
2844 let a: [c64; Self::C64_LANES] = cast(a);
2845 let b: [c64; Self::C64_LANES] = cast(b);
2846
2847 for i in 0..Self::C64_LANES {
2848 out[i].re = fma_f64(a[i].re, b[i].re, a[i].im * b[i].im);
2849 out[i].im = fma_f64(a[i].re, b[i].im, -(a[i].im * b[i].re));
2850 }
2851
2852 cast(out)
2853 }
2854
2855 #[inline]
2856 fn mul_add_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
2857 let mut out = [c64::ZERO; Self::C64_LANES];
2858 let a: [c64; Self::C64_LANES] = cast(a);
2859 let b: [c64; Self::C64_LANES] = cast(b);
2860 let c: [c64; Self::C64_LANES] = cast(c);
2861
2862 for i in 0..Self::C64_LANES {
2863 out[i].re = fma_f64(a[i].re, b[i].re, -fma_f64(a[i].im, b[i].im, -c[i].re));
2864 out[i].im = fma_f64(a[i].re, b[i].im, fma_f64(a[i].im, b[i].re, c[i].im));
2865 }
2866
2867 cast(out)
2868 }
2869
2870 #[inline]
2871 fn conj_mul_add_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
2872 let mut out = [c64::ZERO; Self::C64_LANES];
2873 let a: [c64; Self::C64_LANES] = cast(a);
2874 let b: [c64; Self::C64_LANES] = cast(b);
2875 let c: [c64; Self::C64_LANES] = cast(c);
2876
2877 for i in 0..Self::C64_LANES {
2878 out[i].re = fma_f64(a[i].re, b[i].re, fma_f64(a[i].im, b[i].im, c[i].re));
2879 out[i].im = fma_f64(a[i].re, b[i].im, -fma_f64(a[i].im, b[i].re, -c[i].im));
2880 }
2881
2882 cast(out)
2883 }
2884
2885 #[inline]
2886 fn abs2_c64s(self, a: Self::c64s) -> Self::c64s {
2887 let mut out = [c64::ZERO; Self::C64_LANES];
2888 let a: [c64; Self::C64_LANES] = cast(a);
2889
2890 for i in 0..Self::C64_LANES {
2891 let x = a[i].re * a[i].re + a[i].im * a[i].im;
2892 out[i].re = x;
2893 out[i].im = x;
2894 }
2895
2896 cast(out)
2897 }
2898
2899 #[inline]
2900 fn abs_max_c64s(self, a: Self::c64s) -> Self::c64s {
2901 let mut out = [c64::ZERO; Self::C64_LANES];
2902 let a: [c64; Self::C64_LANES] = cast(self.abs_f64s(a));
2903
2904 for i in 0..Self::C64_LANES {
2905 let x = f64::max(a[i].re, a[i].im);
2906 out[i].re = x;
2907 out[i].im = x;
2908 }
2909
2910 cast(out)
2911 }
2912
2913 #[inline]
2914 fn reduce_sum_c64s(self, a: Self::c64s) -> c64 {
2915 let mut a: [c64; Self::C64_LANES] = cast(a);
2916
2917 let mut n = Self::C64_LANES;
2918 while n > 1 {
2919 n /= 2;
2920 for i in 0..n {
2921 a[i].re += a[i + n].re;
2922 a[i].im += a[i + n].im;
2923 }
2924 }
2925
2926 a[0]
2927 }
2928
2929 #[inline]
2930 fn reduce_min_c64s(self, a: Self::c64s) -> c64 {
2931 let mut a: [c64; Self::C64_LANES] = cast(a);
2932
2933 let mut n = Self::C64_LANES;
2934 while n > 1 {
2935 n /= 2;
2936 for i in 0..n {
2937 a[i].re = f64::min(a[i].re, a[i + n].re);
2938 a[i].im = f64::min(a[i].im, a[i + n].im);
2939 }
2940 }
2941
2942 a[0]
2943 }
2944
2945 #[inline]
2946 fn reduce_max_c64s(self, a: Self::c64s) -> c64 {
2947 let mut a: [c64; Self::C64_LANES] = cast(a);
2948
2949 let mut n = Self::C64_LANES;
2950 while n > 1 {
2951 n /= 2;
2952 for i in 0..n {
2953 a[i].re = f64::max(a[i].re, a[i + n].re);
2954 a[i].im = f64::max(a[i].im, a[i + n].im);
2955 }
2956 }
2957
2958 a[0]
2959 }
2960
2961 #[inline]
2962 fn rotate_right_u64s(self, a: Self::u64s, amount: usize) -> Self::u64s {
2963 let mut a: [u64; Self::F64_LANES] = cast(a);
2964 let amount = amount % Self::F64_LANES;
2965 a.rotate_right(amount);
2966 cast(a)
2967 }
2968
2969 #[inline]
2970 fn rotate_right_c64s(self, a: Self::c64s, amount: usize) -> Self::c64s {
2971 let mut a: [c64; Self::C64_LANES] = cast(a);
2972 let amount = amount % Self::C64_LANES;
2973 a.rotate_right(amount);
2974 cast(a)
2975 }
2976
2977 #[inline]
2978 fn mul_add_e_f32s(self, a: Self::f32s, b: Self::f32s, c: Self::f32s) -> Self::f32s {
2979 self.mul_add_f32s(a, b, c)
2980 }
2981
2982 #[inline]
2983 fn mul_add_e_f64s(self, a: Self::f64s, b: Self::f64s, c: Self::f64s) -> Self::f64s {
2984 self.mul_add_f64s(a, b, c)
2985 }
2986 }
2987 };
2988}
2989
2990scalar_simd!(
2991 Scalar128b, 16, m32x4, f32x4, i32x4, u32x4, m64x2, f64x2, i64x2, u64x2
2992);
2993scalar_simd!(
2994 Scalar256b, 16, m32x8, f32x8, i32x8, u32x8, m64x4, f64x4, i64x4, u64x4
2995);
2996scalar_simd!(
2997 Scalar512b, 8, m32x16, f32x16, i32x16, u32x16, m64x8, f64x8, i64x8, u64x8
2998);
2999
3000impl Default for Scalar {
3001 #[inline]
3002 fn default() -> Self {
3003 Self::new()
3004 }
3005}
3006
3007impl Scalar {
3008 #[inline]
3009 pub fn new() -> Self {
3010 Self
3011 }
3012}
3013
3014impl Seal for Scalar {}
3015impl Simd for Scalar {
3016 type c32s = c32;
3017 type c64s = c64;
3018 type f32s = f32;
3019 type f64s = f64;
3020 type i32s = i32;
3021 type i64s = i64;
3022 type m32s = bool;
3023 type m64s = bool;
3024 type u32s = u32;
3025 type u64s = u64;
3026
3027 const IS_SCALAR: bool = true;
3028 const REGISTER_COUNT: usize = 16;
3029
3030 #[inline]
3031 fn abs2_c32s(self, a: Self::c32s) -> Self::c32s {
3032 let norm2 = a.re * a.re + a.im * a.im;
3033 c32::new(norm2, norm2)
3034 }
3035
3036 #[inline]
3037 fn abs2_c64s(self, a: Self::c64s) -> Self::c64s {
3038 let norm2 = a.re * a.re + a.im * a.im;
3039 c64::new(norm2, norm2)
3040 }
3041
3042 #[inline(always)]
3043 fn abs_max_c32s(self, a: Self::c32s) -> Self::c32s {
3044 let re = if a.re > a.im { a.re } else { a.im };
3045 let im = re;
3046 Complex { re, im }
3047 }
3048
3049 #[inline(always)]
3050 fn abs_max_c64s(self, a: Self::c64s) -> Self::c64s {
3051 let re = if a.re > a.im { a.re } else { a.im };
3052 let im = re;
3053 Complex { re, im }
3054 }
3055
3056 #[inline]
3057 fn add_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
3058 a + b
3059 }
3060
3061 #[inline]
3062 fn add_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
3063 a + b
3064 }
3065
3066 #[inline]
3067 fn add_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
3068 a + b
3069 }
3070
3071 #[inline]
3072 fn add_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
3073 a + b
3074 }
3075
3076 #[inline]
3077 fn add_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s {
3078 a.wrapping_add(b)
3079 }
3080
3081 #[inline]
3082 fn add_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s {
3083 a.wrapping_add(b)
3084 }
3085
3086 #[inline]
3087 fn and_m32s(self, a: Self::m32s, b: Self::m32s) -> Self::m32s {
3088 a & b
3089 }
3090
3091 #[inline]
3092 fn and_m64s(self, a: Self::m64s, b: Self::m64s) -> Self::m64s {
3093 a & b
3094 }
3095
3096 #[inline]
3097 fn and_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s {
3098 a & b
3099 }
3100
3101 #[inline]
3102 fn and_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s {
3103 a & b
3104 }
3105
3106 #[inline]
3107 fn conj_c32s(self, a: Self::c32s) -> Self::c32s {
3108 a.conj()
3109 }
3110
3111 #[inline]
3112 fn conj_c64s(self, a: Self::c64s) -> Self::c64s {
3113 a.conj()
3114 }
3115
3116 #[inline]
3117 fn conj_mul_add_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
3118 let re = fma_f32(a.re, b.re, fma_f32(a.im, b.im, c.re));
3119 let im = fma_f32(a.re, b.im, -fma_f32(a.im, b.re, -c.im));
3120 Complex { re, im }
3121 }
3122
3123 #[inline]
3124 fn conj_mul_add_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
3125 let re = fma_f64(a.re, b.re, fma_f64(a.im, b.im, c.re));
3126 let im = fma_f64(a.re, b.im, -fma_f64(a.im, b.re, -c.im));
3127 Complex { re, im }
3128 }
3129
3130 #[inline]
3131 fn conj_mul_add_e_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
3132 a.conj() * b + c
3133 }
3134
3135 #[inline]
3136 fn conj_mul_add_e_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
3137 a.conj() * b + c
3138 }
3139
3140 #[inline]
3141 fn conj_mul_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
3142 let re = fma_f32(a.re, b.re, a.im * b.im);
3143 let im = fma_f32(a.re, b.im, -(a.im * b.re));
3144 Complex { re, im }
3145 }
3146
3147 #[inline]
3148 fn conj_mul_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
3149 let re = fma_f64(a.re, b.re, a.im * b.im);
3150 let im = fma_f64(a.re, b.im, -(a.im * b.re));
3151 Complex { re, im }
3152 }
3153
3154 #[inline]
3155 fn conj_mul_e_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
3156 a.conj() * b
3157 }
3158
3159 #[inline]
3160 fn conj_mul_e_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
3161 a.conj() * b
3162 }
3163
3164 #[inline]
3165 fn div_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
3166 a / b
3167 }
3168
3169 #[inline]
3170 fn div_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
3171 a / b
3172 }
3173
3174 #[inline]
3175 fn equal_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::m32s {
3176 a == b
3177 }
3178
3179 #[inline]
3180 fn equal_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::m64s {
3181 a == b
3182 }
3183
3184 #[inline(always)]
3185 fn first_true_m32s(self, mask: Self::m32s) -> usize {
3186 if mask { 0 } else { 1 }
3187 }
3188
3189 #[inline(always)]
3190 fn first_true_m64s(self, mask: Self::m64s) -> usize {
3191 if mask { 0 } else { 1 }
3192 }
3193
3194 #[inline]
3195 fn greater_than_or_equal_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::m32s {
3196 a >= b
3197 }
3198
3199 #[inline(always)]
3200 fn greater_than_or_equal_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::m64s {
3201 a >= b
3202 }
3203
3204 #[inline]
3205 fn greater_than_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::m32s {
3206 a > b
3207 }
3208
3209 #[inline(always)]
3210 fn greater_than_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::m64s {
3211 a > b
3212 }
3213
3214 #[inline]
3215 fn greater_than_or_equal_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::m32s {
3216 a >= b
3217 }
3218
3219 #[inline(always)]
3220 fn greater_than_or_equal_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::m64s {
3221 a >= b
3222 }
3223
3224 #[inline]
3225 fn greater_than_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::m32s {
3226 a > b
3227 }
3228
3229 #[inline(always)]
3230 fn greater_than_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::m64s {
3231 a > b
3232 }
3233
3234 #[inline]
3235 fn less_than_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::m32s {
3236 a < b
3237 }
3238
3239 #[inline]
3240 fn less_than_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::m64s {
3241 a < b
3242 }
3243
3244 #[inline]
3245 fn less_than_or_equal_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::m32s {
3246 a <= b
3247 }
3248
3249 #[inline]
3250 fn less_than_or_equal_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::m64s {
3251 a <= b
3252 }
3253
3254 #[inline]
3255 fn less_than_or_equal_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::m32s {
3256 a <= b
3257 }
3258
3259 #[inline(always)]
3260 fn less_than_or_equal_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::m64s {
3261 a <= b
3262 }
3263
3264 #[inline]
3265 fn less_than_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::m32s {
3266 a < b
3267 }
3268
3269 #[inline(always)]
3270 fn less_than_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::m64s {
3271 a < b
3272 }
3273
3274 #[inline]
3275 fn less_than_or_equal_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::m32s {
3276 a <= b
3277 }
3278
3279 #[inline(always)]
3280 fn less_than_or_equal_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::m64s {
3281 a <= b
3282 }
3283
3284 #[inline]
3285 fn less_than_i32s(self, a: Self::i32s, b: Self::i32s) -> Self::m32s {
3286 a < b
3287 }
3288
3289 #[inline(always)]
3290 fn less_than_i64s(self, a: Self::i64s, b: Self::i64s) -> Self::m64s {
3291 a < b
3292 }
3293
3294 #[inline(always)]
3295 unsafe fn mask_load_ptr_c32s(self, mask: MemMask<Self::m32s>, ptr: *const c32) -> Self::c32s {
3296 if mask.mask { *ptr } else { core::mem::zeroed() }
3297 }
3298
3299 #[inline(always)]
3300 unsafe fn mask_load_ptr_c64s(self, mask: MemMask<Self::m64s>, ptr: *const c64) -> Self::c64s {
3301 if mask.mask { *ptr } else { core::mem::zeroed() }
3302 }
3303
3304 #[inline(always)]
3305 unsafe fn mask_load_ptr_u32s(self, mask: MemMask<Self::m32s>, ptr: *const u32) -> Self::u32s {
3306 if mask.mask { *ptr } else { 0 }
3307 }
3308
3309 #[inline(always)]
3310 unsafe fn mask_load_ptr_u64s(self, mask: MemMask<Self::m64s>, ptr: *const u64) -> Self::u64s {
3311 if mask.mask { *ptr } else { 0 }
3312 }
3313
3314 #[inline(always)]
3315 unsafe fn mask_store_ptr_c32s(
3316 self,
3317 mask: MemMask<Self::m32s>,
3318 ptr: *mut c32,
3319 values: Self::c32s,
3320 ) {
3321 if mask.mask {
3322 *ptr = values
3323 }
3324 }
3325
3326 #[inline(always)]
3327 unsafe fn mask_store_ptr_c64s(
3328 self,
3329 mask: MemMask<Self::m64s>,
3330 ptr: *mut c64,
3331 values: Self::c64s,
3332 ) {
3333 if mask.mask {
3334 *ptr = values
3335 }
3336 }
3337
3338 #[inline(always)]
3339 unsafe fn mask_store_ptr_u32s(
3340 self,
3341 mask: MemMask<Self::m32s>,
3342 ptr: *mut u32,
3343 values: Self::u32s,
3344 ) {
3345 if mask.mask {
3346 *ptr = values
3347 }
3348 }
3349
3350 #[inline(always)]
3351 unsafe fn mask_store_ptr_u64s(
3352 self,
3353 mask: MemMask<Self::m64s>,
3354 ptr: *mut u64,
3355 values: Self::u64s,
3356 ) {
3357 if mask.mask {
3358 *ptr = values
3359 }
3360 }
3361
3362 #[inline]
3363 fn max_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
3364 a.max(b)
3365 }
3366
3367 #[inline]
3368 fn max_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
3369 a.max(b)
3370 }
3371
3372 #[inline]
3373 fn min_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
3374 a.min(b)
3375 }
3376
3377 #[inline]
3378 fn min_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
3379 a.min(b)
3380 }
3381
3382 #[inline]
3383 fn mul_add_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
3384 let re = fma_f32(a.re, b.re, -fma_f32(a.im, b.im, -c.re));
3385 let im = fma_f32(a.re, b.im, fma_f32(a.im, b.re, c.im));
3386 Complex { re, im }
3387 }
3388
3389 #[inline]
3390 fn mul_add_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
3391 let re = fma_f64(a.re, b.re, -fma_f64(a.im, b.im, -c.re));
3392 let im = fma_f64(a.re, b.im, fma_f64(a.im, b.re, c.im));
3393 Complex { re, im }
3394 }
3395
3396 #[inline]
3397 fn mul_add_e_c32s(self, a: Self::c32s, b: Self::c32s, c: Self::c32s) -> Self::c32s {
3398 a * b + c
3399 }
3400
3401 #[inline]
3402 fn mul_add_e_c64s(self, a: Self::c64s, b: Self::c64s, c: Self::c64s) -> Self::c64s {
3403 a * b + c
3404 }
3405
3406 #[inline(always)]
3407 fn mul_add_e_f32s(self, a: Self::f32s, b: Self::f32s, c: Self::f32s) -> Self::f32s {
3408 a * b + c
3409 }
3410
3411 #[inline(always)]
3412 fn mul_add_e_f64s(self, a: Self::f64s, b: Self::f64s, c: Self::f64s) -> Self::f64s {
3413 a * b + c
3414 }
3415
3416 #[inline]
3417 fn mul_add_f32s(self, a: Self::f32s, b: Self::f32s, c: Self::f32s) -> Self::f32s {
3418 fma_f32(a, b, c)
3419 }
3420
3421 #[inline]
3422 fn mul_add_f64s(self, a: Self::f64s, b: Self::f64s, c: Self::f64s) -> Self::f64s {
3423 fma_f64(a, b, c)
3424 }
3425
3426 #[inline]
3427 fn mul_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
3428 let re = fma_f32(a.re, b.re, -(a.im * b.im));
3429 let im = fma_f32(a.re, b.im, a.im * b.re);
3430 Complex { re, im }
3431 }
3432
3433 #[inline]
3434 fn mul_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
3435 let re = fma_f64(a.re, b.re, -(a.im * b.im));
3436 let im = fma_f64(a.re, b.im, a.im * b.re);
3437 Complex { re, im }
3438 }
3439
3440 #[inline]
3441 fn mul_e_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
3442 a * b
3443 }
3444
3445 #[inline]
3446 fn mul_e_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
3447 a * b
3448 }
3449
3450 #[inline]
3451 fn mul_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
3452 a * b
3453 }
3454
3455 #[inline]
3456 fn mul_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
3457 a * b
3458 }
3459
3460 #[inline]
3461 fn neg_c32s(self, a: Self::c32s) -> Self::c32s {
3462 -a
3463 }
3464
3465 #[inline]
3466 fn neg_c64s(self, a: Self::c64s) -> Self::c64s {
3467 -a
3468 }
3469
3470 #[inline]
3471 fn not_m32s(self, a: Self::m32s) -> Self::m32s {
3472 !a
3473 }
3474
3475 #[inline]
3476 fn not_m64s(self, a: Self::m64s) -> Self::m64s {
3477 !a
3478 }
3479
3480 #[inline]
3481 fn not_u32s(self, a: Self::u32s) -> Self::u32s {
3482 !a
3483 }
3484
3485 #[inline]
3486 fn not_u64s(self, a: Self::u64s) -> Self::u64s {
3487 !a
3488 }
3489
3490 #[inline]
3491 fn or_m32s(self, a: Self::m32s, b: Self::m32s) -> Self::m32s {
3492 a | b
3493 }
3494
3495 #[inline]
3496 fn or_m64s(self, a: Self::m64s, b: Self::m64s) -> Self::m64s {
3497 a | b
3498 }
3499
3500 #[inline]
3501 fn or_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s {
3502 a | b
3503 }
3504
3505 #[inline]
3506 fn or_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s {
3507 a | b
3508 }
3509
3510 #[inline]
3511 fn partial_load_c64s(self, slice: &[c64]) -> Self::c64s {
3512 if let Some((head, _)) = slice.split_first() {
3513 *head
3514 } else {
3515 c64 { re: 0.0, im: 0.0 }
3516 }
3517 }
3518
3519 #[inline]
3520 fn partial_load_u32s(self, slice: &[u32]) -> Self::u32s {
3521 if let Some((head, _)) = slice.split_first() {
3522 *head
3523 } else {
3524 0
3525 }
3526 }
3527
3528 #[inline]
3529 fn partial_load_u64s(self, slice: &[u64]) -> Self::u64s {
3530 if let Some((head, _)) = slice.split_first() {
3531 *head
3532 } else {
3533 0
3534 }
3535 }
3536
3537 #[inline]
3538 fn partial_store_c64s(self, slice: &mut [c64], values: Self::c64s) {
3539 if let Some((head, _)) = slice.split_first_mut() {
3540 *head = values;
3541 }
3542 }
3543
3544 #[inline]
3545 fn partial_store_u32s(self, slice: &mut [u32], values: Self::u32s) {
3546 if let Some((head, _)) = slice.split_first_mut() {
3547 *head = values;
3548 }
3549 }
3550
3551 #[inline]
3552 fn partial_store_u64s(self, slice: &mut [u64], values: Self::u64s) {
3553 if let Some((head, _)) = slice.split_first_mut() {
3554 *head = values;
3555 }
3556 }
3557
3558 #[inline(always)]
3559 fn reduce_max_c32s(self, a: Self::c32s) -> c32 {
3560 a
3561 }
3562
3563 #[inline(always)]
3564 fn reduce_max_c64s(self, a: Self::c64s) -> c64 {
3565 a
3566 }
3567
3568 #[inline]
3569 fn reduce_max_f32s(self, a: Self::f32s) -> f32 {
3570 a
3571 }
3572
3573 #[inline]
3574 fn reduce_max_f64s(self, a: Self::f64s) -> f64 {
3575 a
3576 }
3577
3578 #[inline(always)]
3579 fn reduce_min_c32s(self, a: Self::c32s) -> c32 {
3580 a
3581 }
3582
3583 #[inline(always)]
3584 fn reduce_min_c64s(self, a: Self::c64s) -> c64 {
3585 a
3586 }
3587
3588 #[inline]
3589 fn reduce_min_f32s(self, a: Self::f32s) -> f32 {
3590 a
3591 }
3592
3593 #[inline]
3594 fn reduce_min_f64s(self, a: Self::f64s) -> f64 {
3595 a
3596 }
3597
3598 #[inline]
3599 fn reduce_product_f32s(self, a: Self::f32s) -> f32 {
3600 a
3601 }
3602
3603 #[inline]
3604 fn reduce_product_f64s(self, a: Self::f64s) -> f64 {
3605 a
3606 }
3607
3608 #[inline]
3609 fn reduce_sum_c32s(self, a: Self::c32s) -> c32 {
3610 a
3611 }
3612
3613 #[inline]
3614 fn reduce_sum_c64s(self, a: Self::c64s) -> c64 {
3615 a
3616 }
3617
3618 #[inline]
3619 fn reduce_sum_f32s(self, a: Self::f32s) -> f32 {
3620 a
3621 }
3622
3623 #[inline]
3624 fn reduce_sum_f64s(self, a: Self::f64s) -> f64 {
3625 a
3626 }
3627
3628 #[inline(always)]
3629 fn rotate_right_c32s(self, a: Self::c32s, _amount: usize) -> Self::c32s {
3630 a
3631 }
3632
3633 #[inline(always)]
3634 fn rotate_right_c64s(self, a: Self::c64s, _amount: usize) -> Self::c64s {
3635 a
3636 }
3637
3638 #[inline(always)]
3639 fn rotate_right_u32s(self, a: Self::u32s, _amount: usize) -> Self::u32s {
3640 a
3641 }
3642
3643 #[inline(always)]
3644 fn rotate_right_u64s(self, a: Self::u64s, _amount: usize) -> Self::u64s {
3645 a
3646 }
3647
3648 #[inline]
3649 fn select_u32s_m32s(
3650 self,
3651 mask: Self::m32s,
3652 if_true: Self::u32s,
3653 if_false: Self::u32s,
3654 ) -> Self::u32s {
3655 if mask { if_true } else { if_false }
3656 }
3657
3658 #[inline]
3659 fn select_u64s_m64s(
3660 self,
3661 mask: Self::m64s,
3662 if_true: Self::u64s,
3663 if_false: Self::u64s,
3664 ) -> Self::u64s {
3665 if mask { if_true } else { if_false }
3666 }
3667
3668 #[inline]
3669 fn splat_c32s(self, value: c32) -> Self::c32s {
3670 value
3671 }
3672
3673 #[inline]
3674 fn splat_c64s(self, value: c64) -> Self::c64s {
3675 value
3676 }
3677
3678 #[inline]
3679 fn splat_f32s(self, value: f32) -> Self::f32s {
3680 value
3681 }
3682
3683 #[inline]
3684 fn splat_f64s(self, value: f64) -> Self::f64s {
3685 value
3686 }
3687
3688 #[inline]
3689 fn splat_u32s(self, value: u32) -> Self::u32s {
3690 value
3691 }
3692
3693 #[inline]
3694 fn splat_u64s(self, value: u64) -> Self::u64s {
3695 value
3696 }
3697
3698 #[inline]
3699 fn sub_c32s(self, a: Self::c32s, b: Self::c32s) -> Self::c32s {
3700 a - b
3701 }
3702
3703 #[inline]
3704 fn sub_c64s(self, a: Self::c64s, b: Self::c64s) -> Self::c64s {
3705 a - b
3706 }
3707
3708 #[inline]
3709 fn sub_f32s(self, a: Self::f32s, b: Self::f32s) -> Self::f32s {
3710 a - b
3711 }
3712
3713 #[inline]
3714 fn sub_f64s(self, a: Self::f64s, b: Self::f64s) -> Self::f64s {
3715 a - b
3716 }
3717
3718 #[inline]
3719 fn sub_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s {
3720 a.wrapping_sub(b)
3721 }
3722
3723 #[inline]
3724 fn sub_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s {
3725 a.wrapping_sub(b)
3726 }
3727
3728 #[inline]
3729 fn swap_re_im_c32s(self, a: Self::c32s) -> Self::c32s {
3730 c32 { re: a.im, im: a.re }
3731 }
3732
3733 fn swap_re_im_c64s(self, a: Self::c64s) -> Self::c64s {
3734 c64 { re: a.im, im: a.re }
3735 }
3736
3737 #[inline]
3738 fn vectorize<Op: WithSimd>(self, op: Op) -> Op::Output {
3739 op.with_simd(self)
3740 }
3741
3742 #[inline]
3743 fn widening_mul_u32s(self, a: Self::u32s, b: Self::u32s) -> (Self::u32s, Self::u32s) {
3744 let c = a as u64 * b as u64;
3745 let lo = c as u32;
3746 let hi = (c >> 32) as u32;
3747 (lo, hi)
3748 }
3749
3750 #[inline]
3751 fn wrapping_dyn_shl_u32s(self, a: Self::u32s, amount: Self::u32s) -> Self::u32s {
3752 a.wrapping_shl(amount)
3753 }
3754
3755 #[inline]
3756 fn wrapping_dyn_shr_u32s(self, a: Self::u32s, amount: Self::u32s) -> Self::u32s {
3757 a.wrapping_shr(amount)
3758 }
3759
3760 #[inline]
3761 fn xor_m32s(self, a: Self::m32s, b: Self::m32s) -> Self::m32s {
3762 a ^ b
3763 }
3764
3765 #[inline]
3766 fn xor_m64s(self, a: Self::m64s, b: Self::m64s) -> Self::m64s {
3767 a ^ b
3768 }
3769
3770 #[inline]
3771 fn xor_u32s(self, a: Self::u32s, b: Self::u32s) -> Self::u32s {
3772 a ^ b
3773 }
3774
3775 #[inline]
3776 fn xor_u64s(self, a: Self::u64s, b: Self::u64s) -> Self::u64s {
3777 a ^ b
3778 }
3779}
3780
3781#[inline(always)]
3782unsafe fn split_slice<T, U>(slice: &[T]) -> (&[U], &[T]) {
3783 assert_eq!(core::mem::size_of::<U>() % core::mem::size_of::<T>(), 0);
3784 assert_eq!(core::mem::align_of::<U>(), core::mem::align_of::<T>());
3785
3786 let chunk_size = core::mem::size_of::<U>() / core::mem::size_of::<T>();
3787
3788 let len = slice.len();
3789 let data = slice.as_ptr();
3790
3791 let div = len / chunk_size;
3792 let rem = len % chunk_size;
3793 (
3794 from_raw_parts(data as *const U, div),
3795 from_raw_parts(data.add(len - rem), rem),
3796 )
3797}
3798
3799#[inline(always)]
3800unsafe fn split_mut_slice<T, U>(slice: &mut [T]) -> (&mut [U], &mut [T]) {
3801 assert_eq!(core::mem::size_of::<U>() % core::mem::size_of::<T>(), 0);
3802 assert_eq!(core::mem::align_of::<U>(), core::mem::align_of::<T>());
3803
3804 let chunk_size = core::mem::size_of::<U>() / core::mem::size_of::<T>();
3805
3806 let len = slice.len();
3807 let data = slice.as_mut_ptr();
3808
3809 let div = len / chunk_size;
3810 let rem = len % chunk_size;
3811 (
3812 from_raw_parts_mut(data as *mut U, div),
3813 from_raw_parts_mut(data.add(len - rem), rem),
3814 )
3815}
3816
3817#[inline(always)]
3818unsafe fn rsplit_slice<T, U>(slice: &[T]) -> (&[T], &[U]) {
3819 assert_eq!(core::mem::size_of::<U>() % core::mem::size_of::<T>(), 0);
3820 assert_eq!(core::mem::align_of::<U>(), core::mem::align_of::<T>());
3821
3822 let chunk_size = core::mem::size_of::<U>() / core::mem::size_of::<T>();
3823
3824 let len = slice.len();
3825 let data = slice.as_ptr();
3826
3827 let div = len / chunk_size;
3828 let rem = len % chunk_size;
3829 (
3830 from_raw_parts(data, rem),
3831 from_raw_parts(data.add(rem) as *const U, div),
3832 )
3833}
3834
3835#[inline(always)]
3836unsafe fn rsplit_mut_slice<T, U>(slice: &mut [T]) -> (&mut [T], &mut [U]) {
3837 assert_eq!(core::mem::size_of::<U>() % core::mem::size_of::<T>(), 0);
3838 assert_eq!(core::mem::align_of::<U>(), core::mem::align_of::<T>());
3839
3840 let chunk_size = core::mem::size_of::<U>() / core::mem::size_of::<T>();
3841
3842 let len = slice.len();
3843 let data = slice.as_mut_ptr();
3844
3845 let div = len / chunk_size;
3846 let rem = len % chunk_size;
3847 (
3848 from_raw_parts_mut(data, rem),
3849 from_raw_parts_mut(data.add(rem) as *mut U, div),
3850 )
3851}
3852
3853match_cfg!(item, match cfg!() {
3854 const { any(target_arch = "x86", target_arch = "x86_64") } => {
3855 pub use x86::Arch;
3856 },
3857 const { target_arch = "aarch64" } => {
3858 pub use aarch64::Arch;
3859 },
3860 _ => {
3861 #[derive(Debug, Clone, Copy)]
3862 #[non_exhaustive]
3863 pub enum Arch {
3864 Scalar,
3865 }
3866
3867 impl Arch {
3868 #[inline(always)]
3869 pub fn new() -> Self {
3870 Self::Scalar
3871 }
3872
3873 #[inline(always)]
3874 pub fn dispatch<Op: WithSimd>(self, op: Op) -> Op::Output {
3875 op.with_simd(Scalar)
3876 }
3877 }
3878 impl Default for Arch {
3879 #[inline]
3880 fn default() -> Self {
3881 Self::new()
3882 }
3883 }
3884 },
3885});
3886
3887#[doc(hidden)]
3888pub struct CheckSameSize<T, U>(PhantomData<(T, U)>);
3889impl<T, U> CheckSameSize<T, U> {
3890 pub const VALID: () = {
3891 assert!(core::mem::size_of::<T>() == core::mem::size_of::<U>());
3892 };
3893}
3894
3895#[doc(hidden)]
3896pub struct CheckSizeLessThanOrEqual<T, U>(PhantomData<(T, U)>);
3897impl<T, U> CheckSizeLessThanOrEqual<T, U> {
3898 pub const VALID: () = {
3899 assert!(core::mem::size_of::<T>() <= core::mem::size_of::<U>());
3900 };
3901}
3902
3903#[macro_export]
3904macro_rules! static_assert_same_size {
3905 ($t: ty, $u: ty) => {
3906 let _ = $crate::CheckSameSize::<$t, $u>::VALID;
3907 };
3908}
3909#[macro_export]
3910macro_rules! static_assert_size_less_than_or_equal {
3911 ($t: ty, $u: ty) => {
3912 let _ = $crate::CheckSizeLessThanOrEqual::<$t, $u>::VALID;
3913 };
3914}
3915
3916#[inline(always)]
3920pub fn cast<T: NoUninit, U: AnyBitPattern>(value: T) -> U {
3921 static_assert_same_size!(T, U);
3922 let value = core::mem::ManuallyDrop::new(value);
3923 let ptr = &value as *const core::mem::ManuallyDrop<T> as *const U;
3924 unsafe { ptr.read_unaligned() }
3925}
3926
3927#[inline(always)]
3931pub fn cast_lossy<T: NoUninit, U: AnyBitPattern>(value: T) -> U {
3932 static_assert_size_less_than_or_equal!(U, T);
3933 let value = core::mem::ManuallyDrop::new(value);
3934 let ptr = &value as *const core::mem::ManuallyDrop<T> as *const U;
3935 unsafe { ptr.read_unaligned() }
3936}
3937
3938#[inline(always)]
3942pub fn as_arrays<const N: usize, T>(slice: &[T]) -> (&[[T; N]], &[T]) {
3943 let n = slice.len();
3944 let mid_div_n = n / N;
3945 let mid = mid_div_n * N;
3946 let ptr = slice.as_ptr();
3947 unsafe {
3948 (
3949 from_raw_parts(ptr as *const [T; N], mid_div_n),
3950 from_raw_parts(ptr.add(mid), n - mid),
3951 )
3952 }
3953}
3954
3955#[inline(always)]
3959pub fn as_arrays_mut<const N: usize, T>(slice: &mut [T]) -> (&mut [[T; N]], &mut [T]) {
3960 let n = slice.len();
3961 let mid_div_n = n / N;
3962 let mid = mid_div_n * N;
3963 let ptr = slice.as_mut_ptr();
3964 unsafe {
3965 (
3966 from_raw_parts_mut(ptr as *mut [T; N], mid_div_n),
3967 from_raw_parts_mut(ptr.add(mid), n - mid),
3968 )
3969 }
3970}
3971
3972pub mod core_arch;
3974
3975#[allow(unused_macros)]
3976macro_rules! inherit {
3977 ({$(
3978 $(#[$attr: meta])*
3979 $(unsafe $($placeholder: lifetime)?)?
3980 fn $func: ident(self
3981 $(,$arg: ident: $ty: ty)* $(,)?
3982 ) $(-> $ret: ty)?;
3983 )*}) => {
3984 $(
3985 $(#[$attr])*
3986 #[inline(always)]
3987 $(unsafe $($placeholder)?)? fn $func (self, $($arg: $ty,)*) $(-> $ret)? {
3988 (*self).$func ($($arg,)*)
3989 }
3990 )*
3991 };
3992}
3993
3994#[allow(unused_macros)]
3995macro_rules! inherit_x2 {
3996 ($base: expr, {$(
3997 $(#[$attr: meta])*
3998 $(unsafe $($placeholder: lifetime)?)?
3999 fn $func: ident ($self: ident
4000 $(,$arg: ident: $ty: ty)* $(,)?
4001 ) $(-> $ret: ty)?;
4002 )*}) => {
4003 $(
4004 $(#[$attr])*
4005 #[inline(always)]
4006 $(unsafe $($placeholder)?)? fn $func ($self, $($arg: $ty,)*) $(-> $ret)? {
4007 $(let $arg: [_; 2] = cast!($arg);)*
4008 cast!([($base).$func ($($arg[0],)*), ($base).$func ($($arg[1],)*)])
4009 }
4010 )*
4011 };
4012
4013 ($base: expr, splat, {$(
4014 $(#[$attr: meta])*
4015 $(unsafe $($placeholder: lifetime)?)?
4016 fn $func: ident ($self: ident
4017 $(,$arg: ident: $ty: ty)* $(,)?
4018 ) $(-> $ret: ty)?;
4019 )*}) => {
4020 $(
4021 $(#[$attr])*
4022 #[inline(always)]
4023 $(unsafe $($placeholder)?)? fn $func ($self, $($arg: $ty,)*) $(-> $ret)? {
4024 cast!([($base).$func ($($arg,)*), ($base).$func ($($arg,)*)])
4025 }
4026 )*
4027 };
4028
4029 ($base: expr, wide, {$(
4030 $(#[$attr: meta])*
4031 $(unsafe $($placeholder: lifetime)?)?
4032 fn $func: ident ($self: ident
4033 $(,$arg: ident: $ty: ty)* $(,)?
4034 ) $(-> $ret: ty)?;
4035 )*}) => {
4036 $(
4037 $(#[$attr])*
4038 #[inline(always)]
4039 $(unsafe $($placeholder)?)? fn $func ($self, $($arg: $ty,)*) $(-> $ret)? {
4040 $(let $arg: [_; 2] = cast!($arg);)*
4041 let (r0, r1) = ($base).$func ($($arg[0],)*); let (s0, s1) = ($base).$func ($($arg[1],)*);
4042 (cast!([r0, s0]), cast!([r1, s1]))
4043 }
4044 )*
4045 };
4046}
4047
4048#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
4049#[cfg_attr(docsrs, doc(cfg(any(target_arch = "x86", target_arch = "x86_64"))))]
4050pub mod x86;
4052
4053#[cfg(target_arch = "aarch64")]
4054#[cfg_attr(docsrs, doc(cfg(target_arch = "aarch64")))]
4055pub mod aarch64;
4057
4058#[derive(Copy, Clone, PartialEq, Eq)]
4061#[repr(transparent)]
4062pub struct m8(u8);
4063#[derive(Copy, Clone, PartialEq, Eq)]
4066#[repr(transparent)]
4067pub struct m16(u16);
4068#[derive(Copy, Clone, PartialEq, Eq)]
4071#[repr(transparent)]
4072pub struct m32(u32);
4073#[derive(Copy, Clone, PartialEq, Eq)]
4076#[repr(transparent)]
4077pub struct m64(u64);
4078
4079#[derive(Copy, Clone, PartialEq, Eq)]
4081#[repr(transparent)]
4082pub struct b8(pub u8);
4083#[derive(Copy, Clone, PartialEq, Eq)]
4085#[repr(transparent)]
4086pub struct b16(pub u16);
4087#[derive(Copy, Clone, PartialEq, Eq)]
4089#[repr(transparent)]
4090pub struct b32(pub u32);
4091#[derive(Copy, Clone, PartialEq, Eq)]
4093#[repr(transparent)]
4094pub struct b64(pub u64);
4095
4096impl core::ops::Not for b8 {
4097 type Output = b8;
4098
4099 #[inline(always)]
4100 fn not(self) -> Self::Output {
4101 b8(!self.0)
4102 }
4103}
4104impl core::ops::BitAnd for b8 {
4105 type Output = b8;
4106
4107 #[inline(always)]
4108 fn bitand(self, rhs: Self) -> Self::Output {
4109 b8(self.0 & rhs.0)
4110 }
4111}
4112impl core::ops::BitOr for b8 {
4113 type Output = b8;
4114
4115 #[inline(always)]
4116 fn bitor(self, rhs: Self) -> Self::Output {
4117 b8(self.0 | rhs.0)
4118 }
4119}
4120impl core::ops::BitXor for b8 {
4121 type Output = b8;
4122
4123 #[inline(always)]
4124 fn bitxor(self, rhs: Self) -> Self::Output {
4125 b8(self.0 ^ rhs.0)
4126 }
4127}
4128
4129impl core::ops::Not for m8 {
4130 type Output = m8;
4131
4132 #[inline(always)]
4133 fn not(self) -> Self::Output {
4134 m8(!self.0)
4135 }
4136}
4137impl core::ops::BitAnd for m8 {
4138 type Output = m8;
4139
4140 #[inline(always)]
4141 fn bitand(self, rhs: Self) -> Self::Output {
4142 m8(self.0 & rhs.0)
4143 }
4144}
4145impl core::ops::BitOr for m8 {
4146 type Output = m8;
4147
4148 #[inline(always)]
4149 fn bitor(self, rhs: Self) -> Self::Output {
4150 m8(self.0 | rhs.0)
4151 }
4152}
4153impl core::ops::BitXor for m8 {
4154 type Output = m8;
4155
4156 #[inline(always)]
4157 fn bitxor(self, rhs: Self) -> Self::Output {
4158 m8(self.0 ^ rhs.0)
4159 }
4160}
4161
4162impl core::ops::Not for m16 {
4163 type Output = m16;
4164
4165 #[inline(always)]
4166 fn not(self) -> Self::Output {
4167 m16(!self.0)
4168 }
4169}
4170impl core::ops::BitAnd for m16 {
4171 type Output = m16;
4172
4173 #[inline(always)]
4174 fn bitand(self, rhs: Self) -> Self::Output {
4175 m16(self.0 & rhs.0)
4176 }
4177}
4178impl core::ops::BitOr for m16 {
4179 type Output = m16;
4180
4181 #[inline(always)]
4182 fn bitor(self, rhs: Self) -> Self::Output {
4183 m16(self.0 | rhs.0)
4184 }
4185}
4186impl core::ops::BitXor for m16 {
4187 type Output = m16;
4188
4189 #[inline(always)]
4190 fn bitxor(self, rhs: Self) -> Self::Output {
4191 m16(self.0 ^ rhs.0)
4192 }
4193}
4194
4195impl core::ops::Not for m32 {
4196 type Output = m32;
4197
4198 #[inline(always)]
4199 fn not(self) -> Self::Output {
4200 m32(!self.0)
4201 }
4202}
4203impl core::ops::BitAnd for m32 {
4204 type Output = m32;
4205
4206 #[inline(always)]
4207 fn bitand(self, rhs: Self) -> Self::Output {
4208 m32(self.0 & rhs.0)
4209 }
4210}
4211impl core::ops::BitOr for m32 {
4212 type Output = m32;
4213
4214 #[inline(always)]
4215 fn bitor(self, rhs: Self) -> Self::Output {
4216 m32(self.0 | rhs.0)
4217 }
4218}
4219impl core::ops::BitXor for m32 {
4220 type Output = m32;
4221
4222 #[inline(always)]
4223 fn bitxor(self, rhs: Self) -> Self::Output {
4224 m32(self.0 ^ rhs.0)
4225 }
4226}
4227
4228impl core::ops::Not for m64 {
4229 type Output = m64;
4230
4231 #[inline(always)]
4232 fn not(self) -> Self::Output {
4233 m64(!self.0)
4234 }
4235}
4236impl core::ops::BitAnd for m64 {
4237 type Output = m64;
4238
4239 #[inline(always)]
4240 fn bitand(self, rhs: Self) -> Self::Output {
4241 m64(self.0 & rhs.0)
4242 }
4243}
4244impl core::ops::BitOr for m64 {
4245 type Output = m64;
4246
4247 #[inline(always)]
4248 fn bitor(self, rhs: Self) -> Self::Output {
4249 m64(self.0 | rhs.0)
4250 }
4251}
4252impl core::ops::BitXor for m64 {
4253 type Output = m64;
4254
4255 #[inline(always)]
4256 fn bitxor(self, rhs: Self) -> Self::Output {
4257 m64(self.0 ^ rhs.0)
4258 }
4259}
4260
4261impl core::ops::Not for b16 {
4262 type Output = b16;
4263
4264 #[inline(always)]
4265 fn not(self) -> Self::Output {
4266 b16(!self.0)
4267 }
4268}
4269impl core::ops::BitAnd for b16 {
4270 type Output = b16;
4271
4272 #[inline(always)]
4273 fn bitand(self, rhs: Self) -> Self::Output {
4274 b16(self.0 & rhs.0)
4275 }
4276}
4277impl core::ops::BitOr for b16 {
4278 type Output = b16;
4279
4280 #[inline(always)]
4281 fn bitor(self, rhs: Self) -> Self::Output {
4282 b16(self.0 | rhs.0)
4283 }
4284}
4285impl core::ops::BitXor for b16 {
4286 type Output = b16;
4287
4288 #[inline(always)]
4289 fn bitxor(self, rhs: Self) -> Self::Output {
4290 b16(self.0 ^ rhs.0)
4291 }
4292}
4293
4294impl core::ops::Not for b32 {
4295 type Output = b32;
4296
4297 #[inline(always)]
4298 fn not(self) -> Self::Output {
4299 b32(!self.0)
4300 }
4301}
4302impl core::ops::BitAnd for b32 {
4303 type Output = b32;
4304
4305 #[inline(always)]
4306 fn bitand(self, rhs: Self) -> Self::Output {
4307 b32(self.0 & rhs.0)
4308 }
4309}
4310impl core::ops::BitOr for b32 {
4311 type Output = b32;
4312
4313 #[inline(always)]
4314 fn bitor(self, rhs: Self) -> Self::Output {
4315 b32(self.0 | rhs.0)
4316 }
4317}
4318impl core::ops::BitXor for b32 {
4319 type Output = b32;
4320
4321 #[inline(always)]
4322 fn bitxor(self, rhs: Self) -> Self::Output {
4323 b32(self.0 ^ rhs.0)
4324 }
4325}
4326
4327impl core::ops::Not for b64 {
4328 type Output = b64;
4329
4330 #[inline(always)]
4331 fn not(self) -> Self::Output {
4332 b64(!self.0)
4333 }
4334}
4335impl core::ops::BitAnd for b64 {
4336 type Output = b64;
4337
4338 #[inline(always)]
4339 fn bitand(self, rhs: Self) -> Self::Output {
4340 b64(self.0 & rhs.0)
4341 }
4342}
4343impl core::ops::BitOr for b64 {
4344 type Output = b64;
4345
4346 #[inline(always)]
4347 fn bitor(self, rhs: Self) -> Self::Output {
4348 b64(self.0 | rhs.0)
4349 }
4350}
4351impl core::ops::BitXor for b64 {
4352 type Output = b64;
4353
4354 #[inline(always)]
4355 fn bitxor(self, rhs: Self) -> Self::Output {
4356 b64(self.0 ^ rhs.0)
4357 }
4358}
4359
4360impl Debug for b8 {
4361 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
4362 #[allow(dead_code)]
4363 #[derive(Copy, Clone, Debug)]
4364 struct b8(bool, bool, bool, bool, bool, bool, bool, bool);
4365 b8(
4366 ((self.0 >> 0) & 1) == 1,
4367 ((self.0 >> 1) & 1) == 1,
4368 ((self.0 >> 2) & 1) == 1,
4369 ((self.0 >> 3) & 1) == 1,
4370 ((self.0 >> 4) & 1) == 1,
4371 ((self.0 >> 5) & 1) == 1,
4372 ((self.0 >> 6) & 1) == 1,
4373 ((self.0 >> 7) & 1) == 1,
4374 )
4375 .fmt(f)
4376 }
4377}
4378impl Debug for b16 {
4379 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
4380 #[allow(dead_code)]
4381 #[derive(Copy, Clone, Debug)]
4382 struct b16(
4383 bool,
4384 bool,
4385 bool,
4386 bool,
4387 bool,
4388 bool,
4389 bool,
4390 bool,
4391 bool,
4392 bool,
4393 bool,
4394 bool,
4395 bool,
4396 bool,
4397 bool,
4398 bool,
4399 );
4400 b16(
4401 ((self.0 >> 00) & 1) == 1,
4402 ((self.0 >> 01) & 1) == 1,
4403 ((self.0 >> 02) & 1) == 1,
4404 ((self.0 >> 03) & 1) == 1,
4405 ((self.0 >> 04) & 1) == 1,
4406 ((self.0 >> 05) & 1) == 1,
4407 ((self.0 >> 06) & 1) == 1,
4408 ((self.0 >> 07) & 1) == 1,
4409 ((self.0 >> 08) & 1) == 1,
4410 ((self.0 >> 09) & 1) == 1,
4411 ((self.0 >> 10) & 1) == 1,
4412 ((self.0 >> 11) & 1) == 1,
4413 ((self.0 >> 12) & 1) == 1,
4414 ((self.0 >> 13) & 1) == 1,
4415 ((self.0 >> 14) & 1) == 1,
4416 ((self.0 >> 15) & 1) == 1,
4417 )
4418 .fmt(f)
4419 }
4420}
4421impl Debug for b32 {
4422 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
4423 #[allow(dead_code)]
4424 #[derive(Copy, Clone, Debug)]
4425 struct b32(
4426 bool,
4427 bool,
4428 bool,
4429 bool,
4430 bool,
4431 bool,
4432 bool,
4433 bool,
4434 bool,
4435 bool,
4436 bool,
4437 bool,
4438 bool,
4439 bool,
4440 bool,
4441 bool,
4442 bool,
4443 bool,
4444 bool,
4445 bool,
4446 bool,
4447 bool,
4448 bool,
4449 bool,
4450 bool,
4451 bool,
4452 bool,
4453 bool,
4454 bool,
4455 bool,
4456 bool,
4457 bool,
4458 );
4459 b32(
4460 ((self.0 >> 00) & 1) == 1,
4461 ((self.0 >> 01) & 1) == 1,
4462 ((self.0 >> 02) & 1) == 1,
4463 ((self.0 >> 03) & 1) == 1,
4464 ((self.0 >> 04) & 1) == 1,
4465 ((self.0 >> 05) & 1) == 1,
4466 ((self.0 >> 06) & 1) == 1,
4467 ((self.0 >> 07) & 1) == 1,
4468 ((self.0 >> 08) & 1) == 1,
4469 ((self.0 >> 09) & 1) == 1,
4470 ((self.0 >> 10) & 1) == 1,
4471 ((self.0 >> 11) & 1) == 1,
4472 ((self.0 >> 12) & 1) == 1,
4473 ((self.0 >> 13) & 1) == 1,
4474 ((self.0 >> 14) & 1) == 1,
4475 ((self.0 >> 15) & 1) == 1,
4476 ((self.0 >> 16) & 1) == 1,
4477 ((self.0 >> 17) & 1) == 1,
4478 ((self.0 >> 18) & 1) == 1,
4479 ((self.0 >> 19) & 1) == 1,
4480 ((self.0 >> 20) & 1) == 1,
4481 ((self.0 >> 21) & 1) == 1,
4482 ((self.0 >> 22) & 1) == 1,
4483 ((self.0 >> 23) & 1) == 1,
4484 ((self.0 >> 24) & 1) == 1,
4485 ((self.0 >> 25) & 1) == 1,
4486 ((self.0 >> 26) & 1) == 1,
4487 ((self.0 >> 27) & 1) == 1,
4488 ((self.0 >> 28) & 1) == 1,
4489 ((self.0 >> 29) & 1) == 1,
4490 ((self.0 >> 30) & 1) == 1,
4491 ((self.0 >> 31) & 1) == 1,
4492 )
4493 .fmt(f)
4494 }
4495}
4496impl Debug for b64 {
4497 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
4498 #[allow(dead_code)]
4499 #[derive(Copy, Clone, Debug)]
4500 struct b64(
4501 bool,
4502 bool,
4503 bool,
4504 bool,
4505 bool,
4506 bool,
4507 bool,
4508 bool,
4509 bool,
4510 bool,
4511 bool,
4512 bool,
4513 bool,
4514 bool,
4515 bool,
4516 bool,
4517 bool,
4518 bool,
4519 bool,
4520 bool,
4521 bool,
4522 bool,
4523 bool,
4524 bool,
4525 bool,
4526 bool,
4527 bool,
4528 bool,
4529 bool,
4530 bool,
4531 bool,
4532 bool,
4533 bool,
4534 bool,
4535 bool,
4536 bool,
4537 bool,
4538 bool,
4539 bool,
4540 bool,
4541 bool,
4542 bool,
4543 bool,
4544 bool,
4545 bool,
4546 bool,
4547 bool,
4548 bool,
4549 bool,
4550 bool,
4551 bool,
4552 bool,
4553 bool,
4554 bool,
4555 bool,
4556 bool,
4557 bool,
4558 bool,
4559 bool,
4560 bool,
4561 bool,
4562 bool,
4563 bool,
4564 bool,
4565 );
4566 b64(
4567 ((self.0 >> 00) & 1) == 1,
4568 ((self.0 >> 01) & 1) == 1,
4569 ((self.0 >> 02) & 1) == 1,
4570 ((self.0 >> 03) & 1) == 1,
4571 ((self.0 >> 04) & 1) == 1,
4572 ((self.0 >> 05) & 1) == 1,
4573 ((self.0 >> 06) & 1) == 1,
4574 ((self.0 >> 07) & 1) == 1,
4575 ((self.0 >> 08) & 1) == 1,
4576 ((self.0 >> 09) & 1) == 1,
4577 ((self.0 >> 10) & 1) == 1,
4578 ((self.0 >> 11) & 1) == 1,
4579 ((self.0 >> 12) & 1) == 1,
4580 ((self.0 >> 13) & 1) == 1,
4581 ((self.0 >> 14) & 1) == 1,
4582 ((self.0 >> 15) & 1) == 1,
4583 ((self.0 >> 16) & 1) == 1,
4584 ((self.0 >> 17) & 1) == 1,
4585 ((self.0 >> 18) & 1) == 1,
4586 ((self.0 >> 19) & 1) == 1,
4587 ((self.0 >> 20) & 1) == 1,
4588 ((self.0 >> 21) & 1) == 1,
4589 ((self.0 >> 22) & 1) == 1,
4590 ((self.0 >> 23) & 1) == 1,
4591 ((self.0 >> 24) & 1) == 1,
4592 ((self.0 >> 25) & 1) == 1,
4593 ((self.0 >> 26) & 1) == 1,
4594 ((self.0 >> 27) & 1) == 1,
4595 ((self.0 >> 28) & 1) == 1,
4596 ((self.0 >> 29) & 1) == 1,
4597 ((self.0 >> 30) & 1) == 1,
4598 ((self.0 >> 31) & 1) == 1,
4599 ((self.0 >> 32) & 1) == 1,
4600 ((self.0 >> 33) & 1) == 1,
4601 ((self.0 >> 34) & 1) == 1,
4602 ((self.0 >> 35) & 1) == 1,
4603 ((self.0 >> 36) & 1) == 1,
4604 ((self.0 >> 37) & 1) == 1,
4605 ((self.0 >> 38) & 1) == 1,
4606 ((self.0 >> 39) & 1) == 1,
4607 ((self.0 >> 40) & 1) == 1,
4608 ((self.0 >> 41) & 1) == 1,
4609 ((self.0 >> 42) & 1) == 1,
4610 ((self.0 >> 43) & 1) == 1,
4611 ((self.0 >> 44) & 1) == 1,
4612 ((self.0 >> 45) & 1) == 1,
4613 ((self.0 >> 46) & 1) == 1,
4614 ((self.0 >> 47) & 1) == 1,
4615 ((self.0 >> 48) & 1) == 1,
4616 ((self.0 >> 49) & 1) == 1,
4617 ((self.0 >> 50) & 1) == 1,
4618 ((self.0 >> 51) & 1) == 1,
4619 ((self.0 >> 52) & 1) == 1,
4620 ((self.0 >> 53) & 1) == 1,
4621 ((self.0 >> 54) & 1) == 1,
4622 ((self.0 >> 55) & 1) == 1,
4623 ((self.0 >> 56) & 1) == 1,
4624 ((self.0 >> 57) & 1) == 1,
4625 ((self.0 >> 58) & 1) == 1,
4626 ((self.0 >> 59) & 1) == 1,
4627 ((self.0 >> 60) & 1) == 1,
4628 ((self.0 >> 61) & 1) == 1,
4629 ((self.0 >> 62) & 1) == 1,
4630 ((self.0 >> 63) & 1) == 1,
4631 )
4632 .fmt(f)
4633 }
4634}
4635
4636impl Debug for m8 {
4637 #[inline]
4638 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
4639 self.is_set().fmt(f)
4640 }
4641}
4642impl Debug for m16 {
4643 #[inline]
4644 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
4645 self.is_set().fmt(f)
4646 }
4647}
4648impl Debug for m32 {
4649 #[inline]
4650 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
4651 self.is_set().fmt(f)
4652 }
4653}
4654impl Debug for m64 {
4655 #[inline]
4656 fn fmt(&self, f: &mut ::core::fmt::Formatter<'_>) -> ::core::fmt::Result {
4657 self.is_set().fmt(f)
4658 }
4659}
4660
4661impl m8 {
4662 #[inline(always)]
4665 pub const fn new(flag: bool) -> Self {
4666 Self(if flag { u8::MAX } else { 0 })
4667 }
4668
4669 #[inline(always)]
4671 pub const fn is_set(self) -> bool {
4672 self.0 != 0
4673 }
4674}
4675impl m16 {
4676 #[inline(always)]
4679 pub const fn new(flag: bool) -> Self {
4680 Self(if flag { u16::MAX } else { 0 })
4681 }
4682
4683 #[inline(always)]
4685 pub const fn is_set(self) -> bool {
4686 self.0 != 0
4687 }
4688}
4689impl m32 {
4690 #[inline(always)]
4693 pub const fn new(flag: bool) -> Self {
4694 Self(if flag { u32::MAX } else { 0 })
4695 }
4696
4697 #[inline(always)]
4699 pub const fn is_set(self) -> bool {
4700 self.0 != 0
4701 }
4702}
4703impl m64 {
4704 #[inline(always)]
4707 pub const fn new(flag: bool) -> Self {
4708 Self(if flag { u64::MAX } else { 0 })
4709 }
4710
4711 #[inline(always)]
4713 pub const fn is_set(self) -> bool {
4714 self.0 != 0
4715 }
4716}
4717
4718#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4720#[repr(C)]
4721pub struct i8x16(
4722 pub i8,
4723 pub i8,
4724 pub i8,
4725 pub i8,
4726 pub i8,
4727 pub i8,
4728 pub i8,
4729 pub i8,
4730 pub i8,
4731 pub i8,
4732 pub i8,
4733 pub i8,
4734 pub i8,
4735 pub i8,
4736 pub i8,
4737 pub i8,
4738);
4739#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4741#[repr(C)]
4742pub struct i8x32(
4743 pub i8,
4744 pub i8,
4745 pub i8,
4746 pub i8,
4747 pub i8,
4748 pub i8,
4749 pub i8,
4750 pub i8,
4751 pub i8,
4752 pub i8,
4753 pub i8,
4754 pub i8,
4755 pub i8,
4756 pub i8,
4757 pub i8,
4758 pub i8,
4759 pub i8,
4760 pub i8,
4761 pub i8,
4762 pub i8,
4763 pub i8,
4764 pub i8,
4765 pub i8,
4766 pub i8,
4767 pub i8,
4768 pub i8,
4769 pub i8,
4770 pub i8,
4771 pub i8,
4772 pub i8,
4773 pub i8,
4774 pub i8,
4775);
4776#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4778#[repr(C)]
4779pub struct i8x64(
4780 pub i8,
4781 pub i8,
4782 pub i8,
4783 pub i8,
4784 pub i8,
4785 pub i8,
4786 pub i8,
4787 pub i8,
4788 pub i8,
4789 pub i8,
4790 pub i8,
4791 pub i8,
4792 pub i8,
4793 pub i8,
4794 pub i8,
4795 pub i8,
4796 pub i8,
4797 pub i8,
4798 pub i8,
4799 pub i8,
4800 pub i8,
4801 pub i8,
4802 pub i8,
4803 pub i8,
4804 pub i8,
4805 pub i8,
4806 pub i8,
4807 pub i8,
4808 pub i8,
4809 pub i8,
4810 pub i8,
4811 pub i8,
4812 pub i8,
4813 pub i8,
4814 pub i8,
4815 pub i8,
4816 pub i8,
4817 pub i8,
4818 pub i8,
4819 pub i8,
4820 pub i8,
4821 pub i8,
4822 pub i8,
4823 pub i8,
4824 pub i8,
4825 pub i8,
4826 pub i8,
4827 pub i8,
4828 pub i8,
4829 pub i8,
4830 pub i8,
4831 pub i8,
4832 pub i8,
4833 pub i8,
4834 pub i8,
4835 pub i8,
4836 pub i8,
4837 pub i8,
4838 pub i8,
4839 pub i8,
4840 pub i8,
4841 pub i8,
4842 pub i8,
4843 pub i8,
4844);
4845
4846#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4848#[repr(C)]
4849pub struct u8x16(
4850 pub u8,
4851 pub u8,
4852 pub u8,
4853 pub u8,
4854 pub u8,
4855 pub u8,
4856 pub u8,
4857 pub u8,
4858 pub u8,
4859 pub u8,
4860 pub u8,
4861 pub u8,
4862 pub u8,
4863 pub u8,
4864 pub u8,
4865 pub u8,
4866);
4867#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4869#[repr(C)]
4870pub struct u8x32(
4871 pub u8,
4872 pub u8,
4873 pub u8,
4874 pub u8,
4875 pub u8,
4876 pub u8,
4877 pub u8,
4878 pub u8,
4879 pub u8,
4880 pub u8,
4881 pub u8,
4882 pub u8,
4883 pub u8,
4884 pub u8,
4885 pub u8,
4886 pub u8,
4887 pub u8,
4888 pub u8,
4889 pub u8,
4890 pub u8,
4891 pub u8,
4892 pub u8,
4893 pub u8,
4894 pub u8,
4895 pub u8,
4896 pub u8,
4897 pub u8,
4898 pub u8,
4899 pub u8,
4900 pub u8,
4901 pub u8,
4902 pub u8,
4903);
4904#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4906#[repr(C)]
4907pub struct u8x64(
4908 pub u8,
4909 pub u8,
4910 pub u8,
4911 pub u8,
4912 pub u8,
4913 pub u8,
4914 pub u8,
4915 pub u8,
4916 pub u8,
4917 pub u8,
4918 pub u8,
4919 pub u8,
4920 pub u8,
4921 pub u8,
4922 pub u8,
4923 pub u8,
4924 pub u8,
4925 pub u8,
4926 pub u8,
4927 pub u8,
4928 pub u8,
4929 pub u8,
4930 pub u8,
4931 pub u8,
4932 pub u8,
4933 pub u8,
4934 pub u8,
4935 pub u8,
4936 pub u8,
4937 pub u8,
4938 pub u8,
4939 pub u8,
4940 pub u8,
4941 pub u8,
4942 pub u8,
4943 pub u8,
4944 pub u8,
4945 pub u8,
4946 pub u8,
4947 pub u8,
4948 pub u8,
4949 pub u8,
4950 pub u8,
4951 pub u8,
4952 pub u8,
4953 pub u8,
4954 pub u8,
4955 pub u8,
4956 pub u8,
4957 pub u8,
4958 pub u8,
4959 pub u8,
4960 pub u8,
4961 pub u8,
4962 pub u8,
4963 pub u8,
4964 pub u8,
4965 pub u8,
4966 pub u8,
4967 pub u8,
4968 pub u8,
4969 pub u8,
4970 pub u8,
4971 pub u8,
4972);
4973
4974#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4976#[repr(C)]
4977pub struct m8x16(
4978 pub m8,
4979 pub m8,
4980 pub m8,
4981 pub m8,
4982 pub m8,
4983 pub m8,
4984 pub m8,
4985 pub m8,
4986 pub m8,
4987 pub m8,
4988 pub m8,
4989 pub m8,
4990 pub m8,
4991 pub m8,
4992 pub m8,
4993 pub m8,
4994);
4995#[derive(Debug, Copy, Clone, PartialEq, Eq)]
4997#[repr(C)]
4998pub struct m8x32(
4999 pub m8,
5000 pub m8,
5001 pub m8,
5002 pub m8,
5003 pub m8,
5004 pub m8,
5005 pub m8,
5006 pub m8,
5007 pub m8,
5008 pub m8,
5009 pub m8,
5010 pub m8,
5011 pub m8,
5012 pub m8,
5013 pub m8,
5014 pub m8,
5015 pub m8,
5016 pub m8,
5017 pub m8,
5018 pub m8,
5019 pub m8,
5020 pub m8,
5021 pub m8,
5022 pub m8,
5023 pub m8,
5024 pub m8,
5025 pub m8,
5026 pub m8,
5027 pub m8,
5028 pub m8,
5029 pub m8,
5030 pub m8,
5031);
5032
5033#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5035#[repr(C)]
5036pub struct i16x8(
5037 pub i16,
5038 pub i16,
5039 pub i16,
5040 pub i16,
5041 pub i16,
5042 pub i16,
5043 pub i16,
5044 pub i16,
5045);
5046#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5048#[repr(C)]
5049pub struct i16x16(
5050 pub i16,
5051 pub i16,
5052 pub i16,
5053 pub i16,
5054 pub i16,
5055 pub i16,
5056 pub i16,
5057 pub i16,
5058 pub i16,
5059 pub i16,
5060 pub i16,
5061 pub i16,
5062 pub i16,
5063 pub i16,
5064 pub i16,
5065 pub i16,
5066);
5067#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5069#[repr(C)]
5070pub struct i16x32(
5071 pub i16,
5072 pub i16,
5073 pub i16,
5074 pub i16,
5075 pub i16,
5076 pub i16,
5077 pub i16,
5078 pub i16,
5079 pub i16,
5080 pub i16,
5081 pub i16,
5082 pub i16,
5083 pub i16,
5084 pub i16,
5085 pub i16,
5086 pub i16,
5087 pub i16,
5088 pub i16,
5089 pub i16,
5090 pub i16,
5091 pub i16,
5092 pub i16,
5093 pub i16,
5094 pub i16,
5095 pub i16,
5096 pub i16,
5097 pub i16,
5098 pub i16,
5099 pub i16,
5100 pub i16,
5101 pub i16,
5102 pub i16,
5103);
5104
5105#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5107#[repr(C)]
5108pub struct u16x8(
5109 pub u16,
5110 pub u16,
5111 pub u16,
5112 pub u16,
5113 pub u16,
5114 pub u16,
5115 pub u16,
5116 pub u16,
5117);
5118#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5120#[repr(C)]
5121pub struct u16x16(
5122 pub u16,
5123 pub u16,
5124 pub u16,
5125 pub u16,
5126 pub u16,
5127 pub u16,
5128 pub u16,
5129 pub u16,
5130 pub u16,
5131 pub u16,
5132 pub u16,
5133 pub u16,
5134 pub u16,
5135 pub u16,
5136 pub u16,
5137 pub u16,
5138);
5139#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5141#[repr(C)]
5142pub struct u16x32(
5143 pub u16,
5144 pub u16,
5145 pub u16,
5146 pub u16,
5147 pub u16,
5148 pub u16,
5149 pub u16,
5150 pub u16,
5151 pub u16,
5152 pub u16,
5153 pub u16,
5154 pub u16,
5155 pub u16,
5156 pub u16,
5157 pub u16,
5158 pub u16,
5159 pub u16,
5160 pub u16,
5161 pub u16,
5162 pub u16,
5163 pub u16,
5164 pub u16,
5165 pub u16,
5166 pub u16,
5167 pub u16,
5168 pub u16,
5169 pub u16,
5170 pub u16,
5171 pub u16,
5172 pub u16,
5173 pub u16,
5174 pub u16,
5175);
5176
5177#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5179#[repr(C)]
5180pub struct m16x8(
5181 pub m16,
5182 pub m16,
5183 pub m16,
5184 pub m16,
5185 pub m16,
5186 pub m16,
5187 pub m16,
5188 pub m16,
5189);
5190#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5192#[repr(C)]
5193pub struct m16x16(
5194 pub m16,
5195 pub m16,
5196 pub m16,
5197 pub m16,
5198 pub m16,
5199 pub m16,
5200 pub m16,
5201 pub m16,
5202 pub m16,
5203 pub m16,
5204 pub m16,
5205 pub m16,
5206 pub m16,
5207 pub m16,
5208 pub m16,
5209 pub m16,
5210);
5211
5212#[derive(Debug, Copy, Clone, PartialEq)]
5214#[repr(C)]
5215pub struct f32x4(pub f32, pub f32, pub f32, pub f32);
5216#[derive(Debug, Copy, Clone, PartialEq)]
5218#[repr(C)]
5219pub struct f32x8(
5220 pub f32,
5221 pub f32,
5222 pub f32,
5223 pub f32,
5224 pub f32,
5225 pub f32,
5226 pub f32,
5227 pub f32,
5228);
5229#[derive(Debug, Copy, Clone, PartialEq)]
5231#[repr(C)]
5232pub struct f32x16(
5233 pub f32,
5234 pub f32,
5235 pub f32,
5236 pub f32,
5237 pub f32,
5238 pub f32,
5239 pub f32,
5240 pub f32,
5241 pub f32,
5242 pub f32,
5243 pub f32,
5244 pub f32,
5245 pub f32,
5246 pub f32,
5247 pub f32,
5248 pub f32,
5249);
5250
5251#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5253#[repr(C)]
5254pub struct i32x4(pub i32, pub i32, pub i32, pub i32);
5255#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5257#[repr(C)]
5258pub struct i32x8(
5259 pub i32,
5260 pub i32,
5261 pub i32,
5262 pub i32,
5263 pub i32,
5264 pub i32,
5265 pub i32,
5266 pub i32,
5267);
5268#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5270#[repr(C)]
5271pub struct i32x16(
5272 pub i32,
5273 pub i32,
5274 pub i32,
5275 pub i32,
5276 pub i32,
5277 pub i32,
5278 pub i32,
5279 pub i32,
5280 pub i32,
5281 pub i32,
5282 pub i32,
5283 pub i32,
5284 pub i32,
5285 pub i32,
5286 pub i32,
5287 pub i32,
5288);
5289
5290#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5292#[repr(C)]
5293pub struct u32x4(pub u32, pub u32, pub u32, pub u32);
5294#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5296#[repr(C)]
5297pub struct u32x8(
5298 pub u32,
5299 pub u32,
5300 pub u32,
5301 pub u32,
5302 pub u32,
5303 pub u32,
5304 pub u32,
5305 pub u32,
5306);
5307#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5309#[repr(C)]
5310pub struct u32x16(
5311 pub u32,
5312 pub u32,
5313 pub u32,
5314 pub u32,
5315 pub u32,
5316 pub u32,
5317 pub u32,
5318 pub u32,
5319 pub u32,
5320 pub u32,
5321 pub u32,
5322 pub u32,
5323 pub u32,
5324 pub u32,
5325 pub u32,
5326 pub u32,
5327);
5328
5329#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5331#[repr(C)]
5332pub struct m32x4(pub m32, pub m32, pub m32, pub m32);
5333#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5335#[repr(C)]
5336pub struct m32x8(
5337 pub m32,
5338 pub m32,
5339 pub m32,
5340 pub m32,
5341 pub m32,
5342 pub m32,
5343 pub m32,
5344 pub m32,
5345);
5346#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5348#[repr(C)]
5349pub struct m32x16(
5350 pub m32,
5351 pub m32,
5352 pub m32,
5353 pub m32,
5354 pub m32,
5355 pub m32,
5356 pub m32,
5357 pub m32,
5358 pub m32,
5359 pub m32,
5360 pub m32,
5361 pub m32,
5362 pub m32,
5363 pub m32,
5364 pub m32,
5365 pub m32,
5366);
5367
5368#[derive(Debug, Copy, Clone, PartialEq)]
5370#[repr(C)]
5371pub struct f64x2(pub f64, pub f64);
5372#[derive(Debug, Copy, Clone, PartialEq)]
5374#[repr(C)]
5375pub struct f64x4(pub f64, pub f64, pub f64, pub f64);
5376#[derive(Debug, Copy, Clone, PartialEq)]
5378#[repr(C)]
5379pub struct f64x8(
5380 pub f64,
5381 pub f64,
5382 pub f64,
5383 pub f64,
5384 pub f64,
5385 pub f64,
5386 pub f64,
5387 pub f64,
5388);
5389
5390#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5392#[repr(C)]
5393pub struct i64x2(pub i64, pub i64);
5394#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5396#[repr(C)]
5397pub struct i64x4(pub i64, pub i64, pub i64, pub i64);
5398#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5400#[repr(C)]
5401pub struct i64x8(
5402 pub i64,
5403 pub i64,
5404 pub i64,
5405 pub i64,
5406 pub i64,
5407 pub i64,
5408 pub i64,
5409 pub i64,
5410);
5411
5412#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5414#[repr(C)]
5415pub struct u64x2(pub u64, pub u64);
5416#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5418#[repr(C)]
5419pub struct u64x4(pub u64, pub u64, pub u64, pub u64);
5420#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5422#[repr(C)]
5423pub struct u64x8(
5424 pub u64,
5425 pub u64,
5426 pub u64,
5427 pub u64,
5428 pub u64,
5429 pub u64,
5430 pub u64,
5431 pub u64,
5432);
5433
5434#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5436#[repr(C)]
5437pub struct m64x2(pub m64, pub m64);
5438#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5440#[repr(C)]
5441pub struct m64x4(pub m64, pub m64, pub m64, pub m64);
5442#[derive(Debug, Copy, Clone, PartialEq, Eq)]
5444#[repr(C)]
5445pub struct m64x8(
5446 pub m64,
5447 pub m64,
5448 pub m64,
5449 pub m64,
5450 pub m64,
5451 pub m64,
5452 pub m64,
5453 pub m64,
5454);
5455
5456unsafe impl Zeroable for m8 {}
5457unsafe impl Zeroable for m16 {}
5458unsafe impl Zeroable for m32 {}
5459unsafe impl Zeroable for m64 {}
5460unsafe impl Pod for m8 {}
5461unsafe impl Pod for m16 {}
5462unsafe impl Pod for m32 {}
5463unsafe impl Pod for m64 {}
5464
5465unsafe impl Zeroable for b8 {}
5466unsafe impl Pod for b8 {}
5467unsafe impl Zeroable for b16 {}
5468unsafe impl Pod for b16 {}
5469unsafe impl Zeroable for b32 {}
5470unsafe impl Pod for b32 {}
5471unsafe impl Zeroable for b64 {}
5472unsafe impl Pod for b64 {}
5473
5474unsafe impl Zeroable for i8x16 {}
5475unsafe impl Zeroable for i8x32 {}
5476unsafe impl Zeroable for i8x64 {}
5477unsafe impl Pod for i8x16 {}
5478unsafe impl Pod for i8x32 {}
5479unsafe impl Pod for i8x64 {}
5480unsafe impl Zeroable for u8x16 {}
5481unsafe impl Zeroable for u8x32 {}
5482unsafe impl Zeroable for u8x64 {}
5483unsafe impl Pod for u8x16 {}
5484unsafe impl Pod for u8x32 {}
5485unsafe impl Pod for u8x64 {}
5486unsafe impl Zeroable for m8x16 {}
5487unsafe impl Zeroable for m8x32 {}
5488unsafe impl Pod for m8x16 {}
5489unsafe impl Pod for m8x32 {}
5490
5491unsafe impl Zeroable for i16x8 {}
5492unsafe impl Zeroable for i16x16 {}
5493unsafe impl Zeroable for i16x32 {}
5494unsafe impl Pod for i16x8 {}
5495unsafe impl Pod for i16x16 {}
5496unsafe impl Pod for i16x32 {}
5497unsafe impl Zeroable for u16x8 {}
5498unsafe impl Zeroable for u16x16 {}
5499unsafe impl Zeroable for u16x32 {}
5500unsafe impl Pod for u16x8 {}
5501unsafe impl Pod for u16x16 {}
5502unsafe impl Pod for u16x32 {}
5503unsafe impl Zeroable for m16x8 {}
5504unsafe impl Zeroable for m16x16 {}
5505unsafe impl Pod for m16x8 {}
5506unsafe impl Pod for m16x16 {}
5507
5508unsafe impl Zeroable for f32x4 {}
5509unsafe impl Zeroable for f32x8 {}
5510unsafe impl Zeroable for f32x16 {}
5511unsafe impl Pod for f32x4 {}
5512unsafe impl Pod for f32x8 {}
5513unsafe impl Pod for f32x16 {}
5514unsafe impl Zeroable for i32x4 {}
5515unsafe impl Zeroable for i32x8 {}
5516unsafe impl Zeroable for i32x16 {}
5517unsafe impl Pod for i32x4 {}
5518unsafe impl Pod for i32x8 {}
5519unsafe impl Pod for i32x16 {}
5520unsafe impl Zeroable for u32x4 {}
5521unsafe impl Zeroable for u32x8 {}
5522unsafe impl Zeroable for u32x16 {}
5523unsafe impl Pod for u32x4 {}
5524unsafe impl Pod for u32x8 {}
5525unsafe impl Pod for u32x16 {}
5526unsafe impl Zeroable for m32x4 {}
5527unsafe impl Zeroable for m32x8 {}
5528unsafe impl Zeroable for m32x16 {}
5529unsafe impl Pod for m32x4 {}
5530unsafe impl Pod for m32x8 {}
5531unsafe impl Pod for m32x16 {}
5532
5533unsafe impl Zeroable for f64x2 {}
5534unsafe impl Zeroable for f64x4 {}
5535unsafe impl Zeroable for f64x8 {}
5536unsafe impl Pod for f64x2 {}
5537unsafe impl Pod for f64x4 {}
5538unsafe impl Pod for f64x8 {}
5539unsafe impl Zeroable for i64x2 {}
5540unsafe impl Zeroable for i64x4 {}
5541unsafe impl Zeroable for i64x8 {}
5542unsafe impl Pod for i64x2 {}
5543unsafe impl Pod for i64x4 {}
5544unsafe impl Pod for i64x8 {}
5545unsafe impl Zeroable for u64x2 {}
5546unsafe impl Zeroable for u64x4 {}
5547unsafe impl Zeroable for u64x8 {}
5548unsafe impl Pod for u64x2 {}
5549unsafe impl Pod for u64x4 {}
5550unsafe impl Pod for u64x8 {}
5551unsafe impl Zeroable for m64x2 {}
5552unsafe impl Zeroable for m64x4 {}
5553unsafe impl Zeroable for m64x8 {}
5554unsafe impl Pod for m64x2 {}
5555unsafe impl Pod for m64x4 {}
5556unsafe impl Pod for m64x8 {}
5557
5558macro_rules! iota_32 {
5559 ($T: ty) => {{
5560 let mut iota = core::mem::MaybeUninit::uninit();
5561 unsafe {
5562 {
5563 let iota =
5564 &mut *((&mut iota) as *mut MaybeUninit<[$T; 32]> as *mut [MaybeUninit<$T>; 32]);
5565 let mut i = 0;
5566 while i < 32 {
5567 let v = (&mut iota[i]) as *mut _ as *mut u32;
5568
5569 let mut j = 0;
5570 while j < core::mem::size_of::<$T>() / core::mem::size_of::<u32>() {
5571 v.add(j).write_unaligned(i as u32);
5572 j += 1;
5573 }
5574
5575 i += 1;
5576 }
5577 }
5578 iota.assume_init()
5579 }
5580 }};
5581}
5582
5583macro_rules! iota_64 {
5584 ($T: ty) => {{
5585 let mut iota = core::mem::MaybeUninit::uninit();
5586 unsafe {
5587 {
5588 let iota =
5589 &mut *((&mut iota) as *mut MaybeUninit<[$T; 32]> as *mut [MaybeUninit<$T>; 32]);
5590 let mut i = 0;
5591 while i < 32 {
5592 let v = (&mut iota[i]) as *mut _ as *mut u64;
5593
5594 let mut j = 0;
5595 while j < core::mem::size_of::<$T>() / core::mem::size_of::<u64>() {
5596 v.add(j).write_unaligned(i as u64);
5597 j += 1;
5598 }
5599
5600 i += 1;
5601 }
5602 }
5603 iota.assume_init()
5604 }
5605 }};
5606}
5607
5608#[cfg(libpulp_const)]
5609pub const fn iota_32<T: Interleave>() -> [T; 32] {
5610 iota_32!(T)
5611}
5612#[cfg(libpulp_const)]
5613pub const fn iota_64<T: Interleave>() -> [T; 32] {
5614 iota_64!(T)
5615}
5616
5617#[cfg(not(libpulp_const))]
5618pub fn iota_32<T: Interleave>() -> [T; 32] {
5619 iota_32!(T)
5620}
5621#[cfg(not(libpulp_const))]
5622pub fn iota_64<T: Interleave>() -> [T; 32] {
5623 iota_64!(T)
5624}
5625
5626#[cfg(target_arch = "x86_64")]
5627#[cfg(test)]
5628mod tests {
5629 use super::*;
5630
5631 #[test]
5632 fn test_interleave() {
5633 if let Some(simd) = x86::V3::try_new() {
5634 {
5635 let src = [f64x4(0.0, 0.1, 1.0, 1.1), f64x4(2.0, 2.1, 3.0, 3.1)];
5636 let dst = unsafe { deinterleave_fallback::<f64, f64x4, [f64x4; 2]>(src) };
5637 assert_eq!(dst[1], simd.add_f64x4(dst[0], simd.splat_f64x4(0.1)));
5638 assert_eq!(src, unsafe {
5639 interleave_fallback::<f64, f64x4, [f64x4; 2]>(dst)
5640 });
5641 }
5642 {
5643 let src = [
5644 f64x4(0.0, 0.1, 0.2, 0.3),
5645 f64x4(1.0, 1.1, 1.2, 1.3),
5646 f64x4(2.0, 2.1, 2.2, 2.3),
5647 f64x4(3.0, 3.1, 3.2, 3.3),
5648 ];
5649 let dst = unsafe { deinterleave_fallback::<f64, f64x4, [f64x4; 4]>(src) };
5650 assert_eq!(dst[1], simd.add_f64x4(dst[0], simd.splat_f64x4(0.1)));
5651 assert_eq!(dst[2], simd.add_f64x4(dst[0], simd.splat_f64x4(0.2)));
5652 assert_eq!(dst[3], simd.add_f64x4(dst[0], simd.splat_f64x4(0.3)));
5653 assert_eq!(src, unsafe {
5654 interleave_fallback::<f64, f64x4, [f64x4; 4]>(dst)
5655 });
5656 }
5657 }
5658 }
5659}