1use super::*;
2use pulp::Simd;
3
4pub extern crate pulp;
5
6#[inline(always)]
7fn quick_two_sum<S: Simd>(simd: S, large: S::f64s, small: S::f64s) -> Quad<S::f64s> {
8 let s = simd.add_f64s(large, small);
9 let e = simd.sub_f64s(small, simd.sub_f64s(s, large));
10 Quad(s, e)
11}
12
13#[inline(always)]
14fn two_sum<S: Simd>(simd: S, a: S::f64s, b: S::f64s) -> Quad<S::f64s> {
15 let gt = simd.greater_than_f64s(simd.abs_f64s(a), simd.abs_f64s(b));
16 let large = simd.select_f64s_m64s(gt, a, b);
17 let small = simd.select_f64s_m64s(gt, b, a);
18 quick_two_sum(simd, large, small)
19}
20
21#[inline(always)]
22fn two_prod<S: Simd>(simd: S, a: S::f64s, b: S::f64s) -> Quad<S::f64s> {
23 let p = simd.mul_f64s(a, b);
24 let e = simd.mul_add_f64s(a, b, simd.neg_f64s(p));
25
26 Quad(p, e)
27}
28
29#[inline(always)]
30pub fn eq<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> S::m64s {
31 simd.and_m64s(simd.equal_f64s(a.0, b.0), simd.equal_f64s(a.1, b.1))
32}
33
34#[inline(always)]
35pub fn ne<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> S::m64s {
36 simd.not_m64s(eq(simd, a, b))
37}
38
39#[inline(always)]
40pub fn less_than<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> S::m64s {
41 let lt0 = simd.less_than_f64s(a.0, b.0);
42 let eq0 = simd.equal_f64s(a.0, b.0);
43 let lt1 = simd.less_than_f64s(a.1, b.1);
44
45 simd.or_m64s(lt0, simd.and_m64s(eq0, lt1))
46}
47
48#[inline(always)]
49pub fn greater_than<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> S::m64s {
50 let lt0 = simd.greater_than_f64s(a.0, b.0);
51 let eq0 = simd.equal_f64s(a.0, b.0);
52 let lt1 = simd.greater_than_f64s(a.1, b.1);
53
54 simd.or_m64s(lt0, simd.and_m64s(eq0, lt1))
55}
56
57#[inline(always)]
58pub fn less_than_or_equal<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> S::m64s {
59 let lt0 = simd.less_than_or_equal_f64s(a.0, b.0);
60 let eq0 = simd.equal_f64s(a.0, b.0);
61 let lt1 = simd.less_than_or_equal_f64s(a.1, b.1);
62
63 simd.or_m64s(lt0, simd.and_m64s(eq0, lt1))
64}
65
66#[inline(always)]
67pub fn greater_than_or_equal<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> S::m64s {
68 let lt0 = simd.greater_than_or_equal_f64s(a.0, b.0);
69 let eq0 = simd.equal_f64s(a.0, b.0);
70 let lt1 = simd.greater_than_or_equal_f64s(a.1, b.1);
71
72 simd.or_m64s(lt0, simd.and_m64s(eq0, lt1))
73}
74
75#[inline(always)]
76pub fn neg<S: Simd>(simd: S, a: Quad<S::f64s>) -> Quad<S::f64s> {
77 Quad(simd.neg_f64s(a.0), simd.neg_f64s(a.1))
78}
79
80#[inline(always)]
81pub fn add_accurate<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> Quad<S::f64s> {
82 let Quad(s0, e1) = two_sum(simd, a.0, b.0);
83 let Quad(s1, e2) = two_sum(simd, a.1, b.1);
84
85 let e1 = simd.add_f64s(e1, s1);
86
87 let Quad(s0, e1) = quick_two_sum(simd, s0, e1);
88 let e1 = simd.add_f64s(e1, e2);
89
90 quick_two_sum(simd, s0, e1)
91}
92
93#[inline(always)]
94pub fn add_estimate<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> Quad<S::f64s> {
95 let Quad(s0, e1) = two_sum(simd, a.0, b.0);
96 let s1 = simd.add_f64s(a.1, b.1);
97 let e1 = simd.add_f64s(e1, s1);
98
99 quick_two_sum(simd, s0, e1)
100}
101
102#[inline(always)]
103pub fn sub_accurate<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> Quad<S::f64s> {
104 add_accurate(simd, a, neg(simd, b))
105}
106
107#[inline(always)]
108pub fn sub_estimate<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> Quad<S::f64s> {
109 add_estimate(simd, a, neg(simd, b))
110}
111
112#[inline(always)]
113pub fn mul<S: Simd>(simd: S, a: Quad<S::f64s>, b: Quad<S::f64s>) -> Quad<S::f64s> {
114 let Quad(p0, e1) = two_prod(simd, a.0, b.0);
115 let e1 = simd.mul_add_f64s(a.0, b.1, e1);
116 let e1 = simd.mul_add_f64s(a.1, b.0, e1);
117 quick_two_sum(simd, p0, e1)
118}
119
120#[inline(always)]
121pub fn select<S: Simd>(
122 simd: S,
123 mask: S::m64s,
124 if_true: Quad<S::f64s>,
125 if_false: Quad<S::f64s>,
126) -> Quad<S::f64s> {
127 Quad(
128 simd.select_f64s_m64s(mask, if_true.0, if_false.0),
129 simd.select_f64s_m64s(mask, if_true.1, if_false.1),
130 )
131}
132
133#[inline(always)]
134pub fn splat<S: Simd>(simd: S, a: Quad) -> Quad<S::f64s> {
135 Quad(simd.splat_f64s(a.0), simd.splat_f64s(a.1))
136}
137
138#[inline(always)]
139pub fn abs<S: Simd>(simd: S, a: Quad<S::f64s>) -> Quad<S::f64s> {
140 let sign_bit = simd.and_f64s(a.0, simd.splat_f64s(-0.0));
141
142 Quad(simd.xor_f64s(a.0, sign_bit), simd.xor_f64s(a.1, sign_bit))
143}