qd/simd/
mod.rs

1use super::*;
2use pulp::Simd;
3
4pub extern crate pulp;
5
6#[inline(always)]
7fn quick_two_sum<S: Simd>(simd: S, large: S::f64s, small: S::f64s) -> Quad<S::f64s> {
8    let s = simd.add_f64s(large, small);
9    let e = simd.sub_f64s(small, simd.sub_f64s(s, large));
10    Quad(s, e)
11}
12
13#[inline(always)]
14fn two_sum<S: Simd>(simd: S, a: S::f64s, b: S::f64s) -> Quad<S::f64s> {
15    let gt = simd.greater_than_f64s(simd.abs_f64s(a), simd.abs_f64s(b));
16    let large = simd.select_f64s_m64s(gt, a, b);
17    let small = simd.select_f64s_m64s(gt, b, a);
18    quick_two_sum(simd, large, small)
19}
20
21#[inline(always)]
22fn two_prod<S: Simd>(simd: S, a: S::f64s, b: S::f64s) -> Quad<S::f64s> {
23    let p = simd.mul_f64s(a, b);
24    let e = simd.mul_add_f64s(a, b, simd.neg_f64s(p));
25
26    Quad(p, e)
27}
28
29#[inline(always)]
30pub fn eq<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> S::m64s {
31    simd.and_m64s(simd.equal_f64s(a.0, b.0), simd.equal_f64s(a.1, b.1))
32}
33
34#[inline(always)]
35pub fn ne<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> S::m64s {
36    simd.not_m64s(eq(simd, a, b))
37}
38
39#[inline(always)]
40pub fn less_than<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> S::m64s {
41    let lt0 = simd.less_than_f64s(a.0, b.0);
42    let eq0 = simd.equal_f64s(a.0, b.0);
43    let lt1 = simd.less_than_f64s(a.1, b.1);
44
45    simd.or_m64s(lt0, simd.and_m64s(eq0, lt1))
46}
47
48#[inline(always)]
49pub fn greater_than<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> S::m64s {
50    let lt0 = simd.greater_than_f64s(a.0, b.0);
51    let eq0 = simd.equal_f64s(a.0, b.0);
52    let lt1 = simd.greater_than_f64s(a.1, b.1);
53
54    simd.or_m64s(lt0, simd.and_m64s(eq0, lt1))
55}
56
57#[inline(always)]
58pub fn less_than_or_equal<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> S::m64s {
59    let lt0 = simd.less_than_or_equal_f64s(a.0, b.0);
60    let eq0 = simd.equal_f64s(a.0, b.0);
61    let lt1 = simd.less_than_or_equal_f64s(a.1, b.1);
62
63    simd.or_m64s(lt0, simd.and_m64s(eq0, lt1))
64}
65
66#[inline(always)]
67pub fn greater_than_or_equal<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> S::m64s {
68    let lt0 = simd.greater_than_or_equal_f64s(a.0, b.0);
69    let eq0 = simd.equal_f64s(a.0, b.0);
70    let lt1 = simd.greater_than_or_equal_f64s(a.1, b.1);
71
72    simd.or_m64s(lt0, simd.and_m64s(eq0, lt1))
73}
74
75#[inline(always)]
76pub fn neg<S: Simd>(simd: S, a: Quad<S::f64s>) -> Quad<S::f64s> {
77    Quad(simd.neg_f64s(a.0), simd.neg_f64s(a.1))
78}
79
80#[inline(always)]
81pub fn add_accurate<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> Quad<S::f64s> {
82    let Quad(s0, e1) = two_sum(simd, a.0, b.0);
83    let Quad(s1, e2) = two_sum(simd, a.1, b.1);
84
85    let e1 = simd.add_f64s(e1, s1);
86
87    let Quad(s0, e1) = quick_two_sum(simd, s0, e1);
88    let e1 = simd.add_f64s(e1, e2);
89
90    quick_two_sum(simd, s0, e1)
91}
92
93#[inline(always)]
94pub fn add_estimate<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> Quad<S::f64s> {
95    let Quad(s0, e1) = two_sum(simd, a.0, b.0);
96    let s1 = simd.add_f64s(a.1, b.1);
97    let e1 = simd.add_f64s(e1, s1);
98
99    quick_two_sum(simd, s0, e1)
100}
101
102#[inline(always)]
103pub fn sub_accurate<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> Quad<S::f64s> {
104    add_accurate(simd, a, neg(simd, b))
105}
106
107#[inline(always)]
108pub fn sub_estimate<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> Quad<S::f64s> {
109    add_estimate(simd, a, neg(simd, b))
110}
111
112#[inline(always)]
113pub fn mul<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> Quad<S::f64s> {
114    let Quad(p0, e1) = two_prod(simd, a.0, b.0);
115    let e1 = simd.mul_add_f64s(a.0, b.1, e1);
116    let e1 = simd.mul_add_f64s(a.1, b.0, e1);
117    quick_two_sum(simd, p0, e1)
118}
119
120#[inline(always)]
121pub fn select<S: Simd>(
122    simd: S,
123    mask: S::m64s,
124    if_true: Quad<S::f64s>,
125    if_false: Quad<S::f64s>,
126) -> Quad<S::f64s> {
127    Quad(
128        simd.select_f64s_m64s(mask, if_true.0, if_false.0),
129        simd.select_f64s_m64s(mask, if_true.1, if_false.1),
130    )
131}
132
133#[inline(always)]
134pub fn splat<S: Simd>(simd: S, a: Quad) -> Quad<S::f64s> {
135    Quad(simd.splat_f64s(a.0), simd.splat_f64s(a.1))
136}
137
138#[inline(always)]
139pub fn abs<S: Simd>(simd: S, a: Quad<S::f64s>) -> Quad<S::f64s> {
140    let sign_bit = simd.and_f64s(a.0, simd.splat_f64s(-0.0));
141
142    Quad(simd.xor_f64s(a.0, sign_bit), simd.xor_f64s(a.1, sign_bit))
143}