pulp/core_arch/
mod.rs

1mod arch {
2	#[cfg(target_arch = "aarch64")]
3	pub use core::arch::aarch64::*;
4	#[cfg(target_arch = "wasm32")]
5	pub use core::arch::wasm32::*;
6	#[cfg(target_arch = "x86")]
7	pub use core::arch::x86::*;
8	#[cfg(target_arch = "x86_64")]
9	pub use core::arch::x86_64::*;
10
11	#[cfg(all(feature = "nightly", target_arch = "arm"))]
12	pub use core::arch::arm::*;
13	#[cfg(all(feature = "nightly", target_arch = "mips"))]
14	pub use core::arch::mips::*;
15	#[cfg(all(feature = "nightly", target_arch = "mips64"))]
16	pub use core::arch::mips64::*;
17	#[cfg(all(feature = "nightly", target_arch = "powerpc"))]
18	pub use core::arch::powerpc::*;
19	#[cfg(all(feature = "nightly", target_arch = "powerpc64"))]
20	pub use core::arch::powerpc64::*;
21	#[cfg(all(feature = "nightly", target_arch = "riscv32"))]
22	pub use core::arch::riscv32::*;
23	#[cfg(all(feature = "nightly", target_arch = "riscv64"))]
24	pub use core::arch::riscv64::*;
25	#[cfg(all(feature = "nightly", target_arch = "wasm64"))]
26	pub use core::arch::wasm64::*;
27}
28
29#[allow(unused_macros)]
30macro_rules! delegate {
31    ({$(
32        $(#[$attr: meta])*
33        $(unsafe $($placeholder: lifetime)?)?
34        fn $func: ident $(<$(const $generic: ident: $generic_ty: ty),* $(,)?>)?(
35            $($arg: ident: $ty: ty),* $(,)?
36        ) $(-> $ret: ty)?;
37    )*}) => {
38        $(
39            $(#[$attr])*
40            #[allow(clippy::missing_safety_doc)]
41            #[inline(always)]
42            pub $(unsafe $($placeholder)?)? fn $func $(<$(const $generic: $generic_ty,)*>)?(self, $($arg: $ty,)*) $(-> $ret)? {
43                unsafe { arch::$func $(::<$($generic,)*>)?($($arg,)*) }
44            }
45        )*
46    };
47}
48
49#[cfg(all(feature = "std", any(target_arch = "x86", target_arch = "x86_64")))]
50#[macro_export]
51macro_rules! feature_detected {
52	($feature: tt) => {
53		::std::is_x86_feature_detected!($feature)
54	};
55}
56
57#[cfg(all(feature = "std", target_arch = "aarch64"))]
58#[macro_export]
59macro_rules! feature_detected {
60	($feature: tt) => {
61		::std::arch::is_aarch64_feature_detected!($feature)
62	};
63}
64
65#[cfg(any(
66	not(feature = "std"),
67	not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64"))
68))]
69#[macro_export]
70macro_rules! feature_detected {
71	($feature: tt) => {
72		cfg!(target_feature = $feature)
73	};
74}
75
76#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
77#[doc(hidden)]
78#[rustfmt::skip]
79#[macro_export]
80macro_rules! __impl_type {
81    ("aes") => { $crate::core_arch::x86::Aes };
82    ("pclmulqdq") => { $crate::core_arch::x86::Pclmulqdq };
83    ("rdrand") => { $crate::core_arch::x86::Rdrand };
84    ("rdseed") => { $crate::core_arch::x86::Rdseed };
85    ("tsc") => { $crate::core_arch::x86::Tsc };
86    ("mmx") => { $crate::core_arch::x86::Mmx };
87    ("sse") => { $crate::core_arch::x86::Sse };
88    ("sse2") => { $crate::core_arch::x86::Sse2 };
89    ("sse3") => { $crate::core_arch::x86::Sse3 };
90    ("ssse3") => { $crate::core_arch::x86::Ssse3 };
91    ("sse4.1") => { $crate::core_arch::x86::Sse4_1 };
92    ("sse4.2") => { $crate::core_arch::x86::Sse4_2 };
93    ("sse4a") => { $crate::core_arch::x86::Sse4a };
94    ("sha") => { $crate::core_arch::x86::Sha };
95    ("avx") => { $crate::core_arch::x86::Avx };
96    ("avx2") => { $crate::core_arch::x86::Avx2 };
97    ("gfni") => { $crate::core_arch::x86::Gfni };
98    ("vaes") => { $crate::core_arch::x86::Vaes };
99    ("vpclmulqdq") => { $crate::core_arch::x86::Vpclmulqdq };
100    ("avx512f") => { $crate::core_arch::x86::Avx512f };
101    ("avx512cd") => { $crate::core_arch::x86::Avx512cd };
102    ("avx512er") => { $crate::core_arch::x86::Avx512er };
103    ("avx512pf") => { $crate::core_arch::x86::Avx512pf };
104    ("avx512bw") => { $crate::core_arch::x86::Avx512bw };
105    ("avx512dq") => { $crate::core_arch::x86::Avx512dq };
106    ("avx512vl") => { $crate::core_arch::x86::Avx512f };
107    ("avx512ifma") => { $crate::core_arch::x86::Avx512ifma };
108    ("avx512vbmi") => { $crate::core_arch::x86::Avx512vbmi };
109    ("avx512vpopcntdq") => { $crate::core_arch::x86::Avx512vpopcntdq };
110    ("avx512vbmi2") => { $crate::core_arch::x86::Avx512vbmi2 };
111    ("avx512gfni") => { $crate::core_arch::x86::Avx512gfni };
112    ("avx512vaes") => { $crate::core_arch::x86::Avx512vaes };
113    ("avx512vpclmulqdq") => { $crate::core_arch::x86::Avx512vpclmulqdq };
114    ("avx512vnni") => { $crate::core_arch::x86::Avx512vnni };
115    ("avx512bitalg") => { $crate::core_arch::x86::Avx512bitalg };
116    ("avx512bf16") => { $crate::core_arch::x86::Avx512bf16 };
117    ("avx512vp2intersect") => { $crate::core_arch::x86::Avx512vp2intersect };
118    ("f16c") => { $crate::core_arch::x86::F16c };
119    ("fma") => { $crate::core_arch::x86::Fma };
120    ("bmi1") => { $crate::core_arch::x86::Bmi1 };
121    ("bmi2") => { $crate::core_arch::x86::Bmi2 };
122    ("lzcnt") => { $crate::core_arch::x86::Lzcnt };
123    ("tbm") => { $crate::core_arch::x86::Tbm };
124    ("popcnt") => { $crate::core_arch::x86::Popcnt };
125    ("fxsr") => { $crate::core_arch::x86::Fxsr };
126    ("xsave") => { $crate::core_arch::x86::Xsave };
127    ("xsaveopt") => { $crate::core_arch::x86::Xsaveopt };
128    ("xsaves") => { $crate::core_arch::x86::Xsaves };
129    ("xsavec") => { $crate::core_arch::x86::Xsavec };
130    ("cmpxchg16b") => { $crate::core_arch::x86::Cmpxchg16b };
131    ("adx") => { $crate::core_arch::x86::Adx };
132    ("rtm") => { $crate::core_arch::x86::Rtm };
133    ("abm") => { $crate::core_arch::x86::Abm };
134}
135
136#[cfg(target_arch = "aarch64")]
137#[doc(hidden)]
138#[rustfmt::skip]
139#[macro_export]
140macro_rules! __impl_type {
141    ("neon") => { $crate::core_arch::aarch64::Neon };
142    ("pmull") => { $crate::core_arch::aarch64::Pmull };
143    ("fp") => { $crate::core_arch::aarch64::Fp };
144    ("fp16") => { $crate::core_arch::aarch64::Fp16 };
145    ("sve") => { $crate::core_arch::aarch64::Sve };
146    ("crc") => { $crate::core_arch::aarch64::Crc };
147    ("lse") => { $crate::core_arch::aarch64::Lse };
148    ("lse2") => { $crate::core_arch::aarch64::Lse2 };
149    ("rdm") => { $crate::core_arch::aarch64::Rdm };
150    ("rcpc") => { $crate::core_arch::aarch64::Rcpc };
151    ("rcpc2") => { $crate::core_arch::aarch64::Rcpc2 };
152    ("dotprod") => { $crate::core_arch::aarch64::Dotprod };
153    ("tme") => { $crate::core_arch::aarch64::Tme };
154    ("fhm") => { $crate::core_arch::aarch64::Fhm };
155    ("dit") => { $crate::core_arch::aarch64::Dit };
156    ("flagm") => { $crate::core_arch::aarch64::Flagm };
157    ("ssbs") => { $crate::core_arch::aarch64::Ssbs };
158    ("sb") => { $crate::core_arch::aarch64::Sb };
159    ("paca") => { $crate::core_arch::aarch64::Paca };
160    ("pacg") => { $crate::core_arch::aarch64::Pacg };
161    ("dpb") => { $crate::core_arch::aarch64::Dpb };
162    ("dpb2") => { $crate::core_arch::aarch64::Dpb2 };
163    ("sve2") => { $crate::core_arch::aarch64::Sve2 };
164    ("sve2-aes") => { $crate::core_arch::aarch64::Sve2Aes };
165    ("sve2-sm4") => { $crate::core_arch::aarch64::Sve2Sm4 };
166    ("sve2-sha3") => { $crate::core_arch::aarch64::Sve2Sha3 };
167    ("sve2-bitperm") => { $crate::core_arch::aarch64::Sve2Bitperm };
168    ("frintts") => { $crate::core_arch::aarch64::Frintts };
169    ("i8mm") => { $crate::core_arch::aarch64::I8mm };
170    ("f32mm") => { $crate::core_arch::aarch64::F32mm };
171    ("f64mm") => { $crate::core_arch::aarch64::F64mm };
172    ("bf16") => { $crate::core_arch::aarch64::Bf16 };
173    ("rand") => { $crate::core_arch::aarch64::Rand };
174    ("bti") => { $crate::core_arch::aarch64::Bti };
175    ("mte") => { $crate::core_arch::aarch64::Mte };
176    ("jsconv") => { $crate::core_arch::aarch64::Jsconv };
177    ("fcma") => { $crate::core_arch::aarch64::Fcma };
178    ("aes") => { $crate::core_arch::aarch64::Aes };
179    ("sha2") => { $crate::core_arch::aarch64::Sha2 };
180    ("sha3") => { $crate::core_arch::aarch64::Sha3 };
181    ("sm4") => { $crate::core_arch::aarch64::Sm4 };
182    ("asimd") => { $crate::core_arch::aarch64::Asimd };
183}
184
185#[cfg(not(any(target_arch = "x86_64", target_arch = "x86", target_arch = "aarch64",)))]
186#[doc(hidden)]
187#[rustfmt::skip]
188#[macro_export]
189macro_rules! __impl_type {
190    ($tt: tt) => { compile_error!("unsupported arch") }
191}
192
193#[macro_export]
194macro_rules! simd_type {
195    (
196        $(
197            $(#[$attr: meta])*
198            $vis: vis struct $name: ident {
199                $($feature_vis: vis $ident: ident: $feature: tt),* $(,)?
200            }
201        )*
202    ) => {
203        $crate::simd_type!({$(
204            $(#[$attr])*
205            $vis struct $name {
206                $($feature_vis $ident: target_feature!($feature),)*
207            }
208        )*});
209    };
210
211    ({
212        $(
213            $(#[$attr: meta])*
214            $vis: vis struct $name: ident {
215                $($feature_vis: vis $ident: ident: $_: ident!($feature: tt)),* $(,)?
216            }
217        )*
218    }) => {
219        $(
220            #[allow(dead_code)]
221            $(#[$attr])*
222            #[derive(Clone, Copy, Debug)]
223            $vis struct $name{
224                $($feature_vis $ident : $crate::core_arch::__impl_type!($feature),)*
225            }
226
227            #[allow(dead_code)]
228            $(#[$attr])*
229            impl $name {
230                /// Returns a SIMD token type without checking if the required CPU features for
231                /// this type are available.
232                ///
233                /// # Safety
234                /// - the required CPU features must be available.
235                #[inline]
236                pub unsafe fn new_unchecked() -> Self {
237                    unsafe{Self{
238                        $($ident: <$crate::core_arch::__impl_type!($feature)>::new_unchecked(),)*
239                    }}
240                }
241
242                /// Returns a SIMD token type if the required CPU features for this type are
243                /// available, otherwise returns `None`.
244                #[inline]
245                pub fn try_new() -> Option<Self> {
246                    if Self::is_available() {
247                        Some(unsafe{Self{
248                            $($ident: <$crate::core_arch::__impl_type!($feature)>::new_unchecked(),)*
249                        }})
250                    } else {
251                        None
252                    }
253                }
254
255                #[inline(always)]
256                fn __static_available() -> &'static ::core::sync::atomic::AtomicU8 {
257                    static AVAILABLE: ::core::sync::atomic::AtomicU8 = ::core::sync::atomic::AtomicU8::new(u8::MAX);
258                    &AVAILABLE
259                }
260
261                /// Returns `true` if the required CPU features for this type are available,
262                /// otherwise returns `false`.
263                #[inline]
264                pub fn is_available() -> bool {
265                    let mut available = Self::__static_available().load(::core::sync::atomic::Ordering::Relaxed);
266                    if available == u8::MAX {
267                        available = Self::__detect_is_available() as u8;
268                    }
269
270                    available != 0
271                }
272
273                #[inline(never)]
274                fn __detect_is_available() -> bool {
275                    let out = true $(&& <$crate::core_arch::__impl_type!($feature)>::is_available())*;
276                    Self::__static_available().store(out as u8, ::core::sync::atomic::Ordering::Relaxed);
277                    out
278                }
279
280                /// Vectorizes the given function as if the CPU features for this type were applied
281                /// to it.
282                ///
283                /// # Note
284                /// For the vectorization to work properly, the given function must be inlined.
285                /// Consider marking it as `#[inline(always)]`
286                #[inline(always)]
287                pub fn vectorize<F: $crate::NullaryFnOnce>(self, f: F) -> F::Output {
288                    $(#[target_feature(enable = $feature)])*
289                    #[inline]
290                	#[allow(clippy::too_many_arguments)]
291                    unsafe fn imp_fastcall<F: $crate::NullaryFnOnce>(
292                        f0: ::core::mem::MaybeUninit<::core::primitive::usize>,
293                        f1: ::core::mem::MaybeUninit<::core::primitive::usize>,
294                        f2: ::core::mem::MaybeUninit<::core::primitive::usize>,
295                        f3: ::core::mem::MaybeUninit<::core::primitive::usize>,
296                        f4: ::core::mem::MaybeUninit<::core::primitive::usize>,
297                        f5: ::core::mem::MaybeUninit<::core::primitive::usize>,
298                        f6: ::core::mem::MaybeUninit<::core::primitive::usize>,
299                        f7: ::core::mem::MaybeUninit<::core::primitive::usize>,
300                    ) -> F::Output {
301                        let f: F = core::mem::transmute_copy(&[f0, f1, f2, f3, f4, f5, f6, f7]);
302                        f.call()
303                    }
304
305                    $(#[target_feature(enable = $feature)])*
306                    #[inline]
307                    unsafe fn imp<F: $crate::NullaryFnOnce>(f: F) -> F::Output {
308                        f.call()
309                    }
310
311                    if $crate::try_const! { ::core::mem::size_of::<F>() <= 8 * ::core::mem::size_of::<::core::primitive::usize>() } {
312                        union Pad<T> {
313                            t: ::core::mem::ManuallyDrop<T>,
314                            __u: ::core::mem::MaybeUninit<[usize; 8]>,
315                        }
316
317                        let f = Pad {
318                            t: ::core::mem::ManuallyDrop::new(f),
319                        };
320                        let p = (&f) as *const _ as *const ::core::mem::MaybeUninit<usize>;
321
322                        unsafe {
323                            imp_fastcall::<F>(
324                                *p.add(0),
325                                *p.add(1),
326                                *p.add(2),
327                                *p.add(3),
328                                *p.add(4),
329                                *p.add(5),
330                                *p.add(6),
331                                *p.add(7),
332                            )
333                        }
334                    } else {
335                        unsafe { imp(f) }
336                    }
337                }
338
339                /// Takes a proof of the existence of this SIMD token (`self`), and returns a
340                /// persistent reference to it.
341                #[inline]
342                pub fn to_ref(self) -> &'static Self {
343                    const __ASSERT_ZST: () = {
344                        assert!(::core::mem::size_of::<$name>() == 0);
345                    };
346                    unsafe { &*::core::ptr::NonNull::dangling().as_ptr() }
347                }
348            }
349        )*
350    };
351}
352
353#[rustfmt::skip]
354#[doc(hidden)]
355pub use __impl_type as __impl_type;
356
357#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
358#[cfg_attr(docsrs, doc(cfg(any(target_arch = "x86", target_arch = "x86_64"))))]
359pub mod x86;
360
361#[cfg(target_arch = "aarch64")]
362#[cfg_attr(docsrs, doc(cfg(target_arch = "aarch64")))]
363pub mod aarch64;