qd/
lib.rs

1#![cfg_attr(not(test), no_std)]
2
3#[cfg(feature = "std")]
4extern crate std;
5
6use core::cmp::Ordering;
7use core::ops::*;
8
9use bytemuck::Pod;
10use bytemuck::Zeroable;
11
12mod const_fma;
13mod const_imp;
14
15#[cfg(not(feature = "std"))]
16macro_rules! pick {
17    ($libm: expr, $std: expr) => {
18        $libm
19    };
20}
21
22#[cfg(feature = "std")]
23macro_rules! pick {
24    ($libm: expr, $std: expr) => {
25        $std
26    };
27}
28
29mod imp {
30    use super::*;
31
32    pub use super::const_imp::*;
33
34    pub fn fma(a: f64, b: f64, c: f64) -> f64 {
35        pick!(libm::fma, f64::mul_add)(a, b, c)
36    }
37
38    pub fn sqrt(a: f64) -> f64 {
39        pick!(libm::sqrt, f64::sqrt)(a)
40    }
41
42    pub fn two_prod(a: f64, b: f64) -> Quad {
43        let p = a * b;
44        let e = fma(a, b, -p);
45        Quad(p, e)
46    }
47
48    pub fn mul(a: Quad, b: Quad) -> Quad {
49        let Quad(p0, e1) = two_prod(a.0, b.0);
50        let e1 = fma(a.0, b.1, e1);
51        let e1 = fma(a.1, b.0, e1);
52
53        const_imp::quick_two_sum(p0, e1)
54    }
55}
56
57pub mod simd;
58
59/// extended precision floating point type.
60///
61/// math operations assume that `self.1.abs() < self.0.abs() * ulp / 2.0`
62#[derive(Copy, Clone, Debug, PartialEq, PartialOrd)]
63#[repr(C)]
64pub struct Quad<T = f64>(pub T, pub T);
65
66impl Quad {
67    pub const RADIX: u32 = 2;
68    pub const MANTISSA_DIGITS: u32 = 105;
69    pub const DIGITS: u32 = 31;
70
71    pub const ZERO: Self = Self(0.0, 0.0);
72    pub const ONE: Self = Self(1.0, 0.0);
73    pub const INFINITY: Self = Self(f64::INFINITY, 0.0);
74    pub const NEG_INFINITY: Self = Self(f64::NEG_INFINITY, 0.0);
75    pub const NAN: Self = Self(f64::NAN, f64::NAN);
76
77    pub const EPSILON: Self = Self(f64::EPSILON * f64::EPSILON, 0.0);
78    pub const MAX: Self = Self(f64::MAX, f64::MAX * f64::EPSILON / 2.0);
79    pub const MIN: Self = Self(-Self::MAX.0, -Self::MAX.1);
80    pub const MIN_POSITIVE: Self = Self(f64::MIN_POSITIVE, 0.0);
81
82    pub const MIN_EXP: i32 = f64::MIN_EXP;
83    pub const MAX_EXP: i32 = f64::MAX_EXP;
84
85    pub const LN_2: Self = Self(0.6931471805599453, 2.3190468138462996e-17);
86    pub const FRAC_1_LN_2: Self = Self(1.4426950408889634, 2.0355273740931033e-17);
87    pub const LN_10: Self = Self(2.302585092994046, -2.1707562233822494e-16);
88    pub const FRAC_1_LN_10: Self = Self(0.4342944819032518, 1.098319650216765e-17);
89    pub const PI: Self = Self(3.141592653589793, 1.2246467991473532e-16);
90
91    #[inline(always)]
92    pub const fn from_f64(value: f64) -> Self {
93        Quad(value, 0.0)
94    }
95
96    pub const fn add_estimate(self, rhs: Self) -> Self {
97        const_imp::add_estimate(self, rhs)
98    }
99
100    pub const fn add_accurate(self, rhs: Self) -> Self {
101        const_imp::add_accurate(self, rhs)
102    }
103
104    pub const fn sub_estimate(self, rhs: Self) -> Self {
105        const_imp::add_estimate(self, rhs.neg())
106    }
107
108    pub const fn sub_accurate(self, rhs: Self) -> Self {
109        const_imp::add_accurate(self, rhs.neg())
110    }
111
112    pub const fn neg(self) -> Self {
113        Quad(-self.0, -self.1)
114    }
115
116    pub const fn abs(self) -> Self {
117        const_imp::abs(self)
118    }
119
120    pub fn mul(self, rhs: Self) -> Self {
121        imp::mul(self, rhs)
122    }
123
124    pub const fn const_mul(self, rhs: Self) -> Self {
125        const_imp::mul(self, rhs)
126    }
127
128    pub const fn const_div(self, rhs: Self) -> Self {
129        let mut quotient = Quad(self.0 / rhs.0, 0.0);
130        let mut r = self.sub_accurate(rhs.const_mul(quotient));
131        quotient.1 = r.0 / rhs.0;
132        r = r.sub_accurate(rhs.const_mul(Quad(quotient.1, 0.0)));
133
134        let update = r.0 / rhs.0;
135
136        quotient = const_imp::quick_two_sum(quotient.0, quotient.1);
137        quotient = quotient.add_accurate(Quad(update, 0.0));
138        quotient
139    }
140
141    pub const fn const_recip(self) -> Self {
142        Self::ONE.const_div(self)
143    }
144
145    pub fn recip(self) -> Self {
146        Self::ONE.div(self)
147    }
148
149    pub fn div(self, rhs: Self) -> Self {
150        let mut quotient = Quad(self.0 / rhs.0, 0.0);
151        let mut r = self.sub_accurate(rhs.mul(quotient));
152        quotient.1 = r.0 / rhs.0;
153        r = r.sub_accurate(rhs.mul(Quad(quotient.1, 0.0)));
154
155        let update = r.0 / rhs.0;
156
157        quotient = imp::quick_two_sum(quotient.0, quotient.1);
158        quotient = quotient.add_accurate(Quad(update, 0.0));
159        quotient
160    }
161
162    pub const fn eq(self, rhs: Self) -> bool {
163        self.0 == rhs.0 && self.1 == rhs.1
164    }
165
166    pub const fn ne(self, rhs: Self) -> bool {
167        self.0 != rhs.0 || self.1 != rhs.1
168    }
169
170    pub const fn is_nan(self) -> bool {
171        self.0.is_nan() || self.1.is_nan()
172    }
173
174    pub const fn is_finite(self) -> bool {
175        self.0.is_finite() && self.1.is_finite()
176    }
177
178    pub const fn partial_cmp(self, rhs: Self) -> Option<Ordering> {
179        if self.is_nan() || rhs.is_nan() {
180            return None;
181        }
182
183        if self.0 < rhs.0 {
184            Some(Ordering::Less)
185        } else if self.0 > rhs.0 {
186            Some(Ordering::Greater)
187        } else {
188            if self.1 < rhs.1 {
189                Some(Ordering::Less)
190            } else if self.1 > rhs.1 {
191                Some(Ordering::Greater)
192            } else {
193                Some(Ordering::Equal)
194            }
195        }
196    }
197
198    pub const fn lt(self, rhs: Self) -> bool {
199        self.0 < rhs.0 || (self.0 == rhs.0 && self.1 < rhs.1)
200    }
201
202    pub const fn le(self, rhs: Self) -> bool {
203        self.0 <= rhs.0 || (self.0 == rhs.0 && self.1 <= rhs.1)
204    }
205
206    pub const fn gt(self, rhs: Self) -> bool {
207        rhs.lt(self)
208    }
209
210    pub const fn ge(self, rhs: Self) -> bool {
211        rhs.le(self)
212    }
213
214    pub const fn const_sqrt(self) -> Self {
215        if self.0 == 0.0 {
216            return Self::ZERO;
217        }
218
219        let mut iterate;
220        {
221            let inv_sqrt = 1.0 / const_imp::sqrt(self.0);
222            let left = self.0 * inv_sqrt;
223            let right = self.sub_accurate(const_imp::two_prod(left, left)).0 * (inv_sqrt / 2.0);
224            iterate = const_imp::two_sum(left, right);
225        }
226        {
227            iterate = Self::ONE.const_div(iterate);
228            let left = self.0 * iterate.0;
229            let right = self.sub_accurate(const_imp::two_prod(left, left)).0 * (iterate.0 / 2.0);
230            iterate = const_imp::two_sum(left, right);
231        }
232        iterate
233    }
234
235    pub fn sqrt(self) -> Self {
236        if self.0 == 0.0 {
237            return Self::ZERO;
238        }
239
240        let mut iterate;
241        {
242            let inv_sqrt = 1.0 / imp::sqrt(self.0);
243            let left = self.0 * inv_sqrt;
244            let right = self.sub_accurate(imp::two_prod(left, left)).0 * (inv_sqrt / 2.0);
245            iterate = imp::two_sum(left, right);
246        }
247        {
248            iterate = Self::ONE.div(iterate);
249            let left = self.0 * iterate.0;
250            let right = self.sub_accurate(imp::two_prod(left, left)).0 * (iterate.0 / 2.0);
251            iterate = imp::two_sum(left, right);
252        }
253        iterate
254    }
255
256    pub fn exp(self) -> Self {
257        let value = self;
258        let exp_max = 709.0;
259        if value.0 <= -exp_max {
260            return Self(0.0, 0.0);
261        }
262        if value.0 >= exp_max {
263            return Self::INFINITY;
264        }
265        if value.0 == 0.0 {
266            return Self(1.0, 0.0);
267        }
268
269        let shift = pick!(libm::floor, f64::floor)(value.0 / Self::LN_2.0 + 0.5);
270
271        let num_squares = 9;
272        let num_terms = 9;
273
274        let scale = (1u32 << num_squares) as f64;
275        let inv_scale = scale.recip();
276
277        let r = (value - Self::LN_2 * Self(shift, 0.0)) * Self::from_f64(inv_scale);
278
279        let mut r_power = r * r;
280        let mut iterate = r + r_power * Self::from_f64(0.5);
281
282        r_power = r_power * r;
283
284        let mut coefficient = Self(6.0, 0.0).recip();
285        let mut term = coefficient * r_power;
286
287        iterate = iterate + term;
288        let tolerance = Self::EPSILON.0 * inv_scale;
289
290        for j in 4..num_terms {
291            r_power = r_power * r;
292            coefficient = coefficient / Self(j as f64, 0.0);
293            term = coefficient * r_power;
294            iterate = iterate + term;
295
296            if const_imp::fabs(term.0) <= tolerance {
297                break;
298            }
299        }
300
301        for _ in 0..num_squares {
302            iterate = iterate * iterate + iterate * Self::from_f64(2.0);
303        }
304
305        iterate = iterate + Self(1.0, 0.0);
306        let shift = pick!(libm::pow, f64::powi)(2.0f64, shift as _);
307
308        iterate * Self::from_f64(shift)
309    }
310
311    pub fn ln(self) -> Self {
312        let value = self;
313        if value.0 < 0.0 {
314            return Self::NAN;
315        }
316        if value.0 == 0.0 {
317            return Self::NEG_INFINITY;
318        }
319
320        let mut x = Self(pick!(libm::log, f64::ln)(self.0), 0.0);
321
322        x = x + value * (-x).exp();
323        x = x - Self(1.0, 0.0);
324
325        x
326    }
327
328    pub fn log2(self) -> Self {
329        self.ln() * Self::FRAC_1_LN_2
330    }
331
332    pub fn log10(self) -> Self {
333        self.ln() * Self::FRAC_1_LN_10
334    }
335
336    pub fn trunc(self) -> Self {
337        let trunc = pick!(libm::trunc, f64::trunc);
338        let trunc = Self(trunc(self.0), trunc(self.1));
339
340        if trunc.0 == self.0 {
341            trunc
342        } else {
343            Quad(trunc.0, 0.0)
344        }
345    }
346}
347
348impl Add for Quad {
349    type Output = Quad;
350
351    fn add(self, rhs: Self) -> Self::Output {
352        self.add_accurate(rhs)
353    }
354}
355impl Add for &Quad {
356    type Output = Quad;
357
358    fn add(self, rhs: Self) -> Self::Output {
359        self.add_accurate(*rhs)
360    }
361}
362
363impl Sub for Quad {
364    type Output = Quad;
365
366    fn sub(self, rhs: Self) -> Self::Output {
367        self.sub_accurate(rhs)
368    }
369}
370impl Sub for &Quad {
371    type Output = Quad;
372
373    fn sub(self, rhs: Self) -> Self::Output {
374        self.sub_accurate(*rhs)
375    }
376}
377
378impl Mul for Quad {
379    type Output = Quad;
380
381    fn mul(self, rhs: Self) -> Self::Output {
382        self.mul(rhs)
383    }
384}
385
386impl Mul for &Quad {
387    type Output = Quad;
388
389    fn mul(self, rhs: Self) -> Self::Output {
390        (*self).mul(*rhs)
391    }
392}
393
394impl Div for Quad {
395    type Output = Quad;
396
397    fn div(self, rhs: Self) -> Self::Output {
398        self.div(rhs)
399    }
400}
401
402impl Div for &Quad {
403    type Output = Quad;
404
405    fn div(self, rhs: Self) -> Self::Output {
406        (*self).div(*rhs)
407    }
408}
409
410impl Rem for Quad {
411    type Output = Quad;
412
413    fn rem(self, rhs: Self) -> Self::Output {
414        self - (self / rhs).trunc() * rhs
415    }
416}
417
418impl Rem for &Quad {
419    type Output = Quad;
420
421    fn rem(self, rhs: Self) -> Self::Output {
422        (*self).rem(*rhs)
423    }
424}
425
426impl Neg for Quad {
427    type Output = Quad;
428
429    fn neg(self) -> Self::Output {
430        self.neg()
431    }
432}
433impl Neg for &Quad {
434    type Output = Quad;
435
436    fn neg(self) -> Self::Output {
437        (*self).neg()
438    }
439}
440
441impl AddAssign for Quad {
442    fn add_assign(&mut self, rhs: Self) {
443        *self = *self + rhs
444    }
445}
446impl AddAssign<&Quad> for Quad {
447    fn add_assign(&mut self, rhs: &Quad) {
448        *self = *self + *rhs
449    }
450}
451
452impl SubAssign for Quad {
453    fn sub_assign(&mut self, rhs: Self) {
454        *self = *self - rhs
455    }
456}
457impl SubAssign<&Quad> for Quad {
458    fn sub_assign(&mut self, rhs: &Quad) {
459        *self = *self - *rhs
460    }
461}
462
463impl MulAssign for Quad {
464    fn mul_assign(&mut self, rhs: Self) {
465        *self = *self * rhs
466    }
467}
468impl MulAssign<&Quad> for Quad {
469    fn mul_assign(&mut self, rhs: &Quad) {
470        *self = *self * *rhs
471    }
472}
473
474impl DivAssign for Quad {
475    fn div_assign(&mut self, rhs: Self) {
476        *self = *self / rhs
477    }
478}
479impl DivAssign<&Quad> for Quad {
480    fn div_assign(&mut self, rhs: &Quad) {
481        *self = *self / *rhs
482    }
483}
484
485impl RemAssign for Quad {
486    fn rem_assign(&mut self, rhs: Self) {
487        *self = *self % rhs
488    }
489}
490impl RemAssign<&Quad> for Quad {
491    fn rem_assign(&mut self, rhs: &Quad) {
492        *self = *self % *rhs
493    }
494}
495impl From<f64> for Quad {
496    #[inline(always)]
497    fn from(value: f64) -> Self {
498        Quad(value, 0.0)
499    }
500}
501
502impl num_traits::Zero for Quad {
503    fn zero() -> Self {
504        Self::ZERO
505    }
506
507    fn is_zero(&self) -> bool {
508        *self == Self::ZERO
509    }
510}
511
512impl num_traits::One for Quad {
513    fn one() -> Self {
514        Self::ONE
515    }
516
517    fn is_one(&self) -> bool {
518        *self == Self::ONE
519    }
520}
521impl num_traits::Num for Quad {
522    type FromStrRadixErr = num_traits::ParseFloatError;
523
524    fn from_str_radix(str: &str, radix: u32) -> Result<Self, Self::FromStrRadixErr> {
525        let _ = str;
526        let _ = radix;
527
528        Err(num_traits::ParseFloatError {
529            kind: num_traits::FloatErrorKind::Invalid,
530        })
531    }
532}
533
534#[cfg(test)]
535mod tests {
536    use super::*;
537
538    #[test]
539    fn test_sqrt() {
540        let x = Quad(3.5, 0.0);
541
542        let y = x.sqrt();
543        assert!(y * y - x < Quad::from(1e-30));
544
545        const {
546            let x = Quad(2.0, 0.0);
547            let y = x.const_sqrt();
548            assert!((y.const_mul(y).sub_accurate(x).abs()).lt(Quad::from_f64(1e-30)));
549        };
550    }
551
552    #[test]
553    fn test_rem() {
554        let x = Quad::from(36.0) / Quad::from(10.0);
555        assert!((x % Quad::from(0.5) - Quad::from(10.0).recip()).abs() < Quad::from(1e-30));
556    }
557}
558
559unsafe impl<T: Zeroable> Zeroable for Quad<T> {}
560unsafe impl<T: Pod> Pod for Quad<T> {}
qd/lib.rs

qd/
lib.rs