faer/sparse/linalg/
amd.rs

1//! approximate minimum degree ordering.
2
3// AMD, Copyright (c), 1996-2022, Timothy A. Davis,
4// Patrick R. Amestoy, and Iain S. Duff.  All Rights Reserved.
5//
6// Availability:
7//
8//     http://www.suitesparse.com
9//
10// -------------------------------------------------------------------------------
11// AMD License: BSD 3-clause:
12// -------------------------------------------------------------------------------
13//
14//     Redistribution and use in source and binary forms, with or without
15//     modification, are permitted provided that the following conditions are met:
16//         * Redistributions of source code must retain the above copyright notice, this list of
17//           conditions and the following disclaimer.
18//         * Redistributions in binary form must reproduce the above copyright notice, this list of
19//           conditions and the following disclaimer in the documentation and/or other materials
20//           provided with the distribution.
21//         * Neither the name of the organizations to which the authors are affiliated, nor the
22//           names of its contributors may be used to endorse or promote products derived from this
23//           software without specific prior written permission.
24//
25//     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26//     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27//     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28//     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
29//     DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
30//     (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
31//     SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
32//     CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33//     LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34//     OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
35//     DAMAGE.
36
37use crate::internal_prelude_sp::*;
38use crate::{assert, debug_assert};
39
40#[inline]
41fn post_tree<'n, I: Index>(
42	root: Idx<'n, usize>,
43	k: usize,
44	child: &mut Array<'n, MaybeIdx<'n, I>>,
45	sibling: &Array<'n, MaybeIdx<'n, I>>,
46	order: &mut Array<'n, I::Signed>,
47	stack: &mut Array<'n, I::Signed>,
48) -> usize {
49	let N = child.len();
50	let I = I::Signed::truncate;
51	let mut top = 1usize;
52	stack[N.check(0)] = I(*root);
53
54	let mut k = k;
55
56	while top > 0 {
57		let i = N.check(stack[N.check(top - 1)].zx());
58		if let Some(child_) = child[i].idx() {
59			let mut f = child_.zx();
60			loop {
61				top += 1;
62				if let Some(new_f) = sibling[f].idx() {
63					f = new_f.zx();
64				} else {
65					break;
66				}
67			}
68
69			let mut t = top;
70			let mut f = child_.zx();
71			loop {
72				t -= 1;
73				stack[N.check(t)] = I(*f);
74				if let Some(new_f) = sibling[f].idx() {
75					f = new_f.zx();
76				} else {
77					break;
78				}
79			}
80			child[i] = MaybeIdx::none();
81		} else {
82			top -= 1;
83			order[i] = I(k);
84			k += 1;
85		}
86	}
87
88	k
89}
90
91#[inline]
92fn postorder<'n, 'out, I: Index>(
93	order: &'out mut Array<'n, I::Signed>,
94	etree: &Array<'n, MaybeIdx<'n, I>>,
95	nv: &Array<'n, I::Signed>,
96	f_size: &Array<'n, I::Signed>,
97	stack: &mut MemStack,
98) {
99	let N = order.len();
100	let n = *N;
101	if n == 0 {
102		return;
103	}
104
105	let I = I::Signed::truncate;
106	let zero = I(0);
107	let none = I(NONE);
108
109	let (ref mut child, stack) = stack.collect(repeat_n!(MaybeIdx::<'_, I>::none(), n));
110	let (ref mut sibling, stack) = stack.collect(repeat_n!(MaybeIdx::<'_, I>::none(), n));
111	let (ref mut stack, _) = stack.collect(repeat_n!(I(0), n));
112
113	let child = Array::from_mut(child, N);
114	let sibling = Array::from_mut(sibling, N);
115	let stack = Array::from_mut(stack, N);
116
117	for j in N.indices().rev() {
118		if nv[j] > zero {
119			if let Some(parent) = etree[j].idx() {
120				let parent = parent.zx();
121				sibling[j] = child[parent];
122				child[parent] = MaybeIdx::from_index(j.truncate());
123			}
124		}
125	}
126
127	for i in N.indices() {
128		if nv[i] > zero {
129			if let Some(child_) = child[i].idx() {
130				let child_ = child_.zx();
131
132				let mut fprev = MaybeIdx::<'n>::none();
133				let mut bigfprev = MaybeIdx::<'n>::none();
134				let mut bigf = MaybeIdx::<'n>::none();
135				let mut maxfrsize = none;
136
137				let mut f = MaybeIdx::from_index(child_);
138				while let Some(f_) = f.idx() {
139					let frsize = f_size[f_];
140					if frsize >= maxfrsize {
141						maxfrsize = frsize;
142						bigfprev = fprev;
143						bigf = f;
144					}
145					fprev = f;
146					f = sibling[f_].sx();
147				}
148
149				let bigf = bigf.idx().unwrap();
150				let fnext = sibling[bigf];
151				if let Some(fnext) = fnext.idx() {
152					if let Some(bigfprev) = bigfprev.idx() {
153						sibling[bigfprev] = MaybeIdx::from_index(fnext);
154					} else {
155						child[i] = MaybeIdx::from_index(fnext);
156					}
157
158					let fprev = fprev.idx().unwrap();
159					sibling[bigf] = MaybeIdx::none();
160					sibling[fprev] = MaybeIdx::from_index(bigf.truncate());
161				}
162			}
163		}
164	}
165
166	order.as_mut().fill(none);
167
168	let mut k = 0usize;
169	for i in N.indices() {
170		if etree[i].idx().is_none() && nv[i] > zero {
171			k = post_tree(i, k, child, sibling, order, stack);
172		}
173	}
174}
175
176#[inline(always)]
177fn flip<I: SignedIndex>(i: I) -> I {
178	-I::truncate(2) - i
179}
180
181#[inline]
182fn clear_flag<I: SignedIndex>(wflg: I, wbig: I, w: &mut [I]) -> I {
183	let I = I::truncate;
184	let zero = I(0);
185	let one = I(1);
186
187	if wflg < I(2) || wflg >= wbig {
188		for x in w {
189			if *x != zero {
190				*x = one;
191			}
192		}
193		return I(2);
194	}
195	wflg
196}
197
198#[allow(clippy::comparison_chain)]
199fn amd_2<I: Index>(
200	pe: &mut [I::Signed],  // input/output
201	iw: &mut [I::Signed],  // input/modified (undefined on output)
202	len: &mut [I::Signed], // input/modified (undefined on output)
203	pfree: usize,
204	next: &mut [I::Signed],
205	last: &mut [I::Signed],
206	control: Control,
207	stack: &mut MemStack,
208) -> FlopCount {
209	let n = pe.len();
210	assert!(n < I::Signed::MAX.zx());
211
212	let mut pfree = pfree;
213	let iwlen = iw.len();
214
215	let I = I::Signed::truncate;
216	let none = I(NONE);
217	let zero = I(0);
218	let one = I(1);
219
220	let alpha = control.dense;
221	let aggressive = control.aggressive;
222
223	let mut mindeg = 0usize;
224	let mut ncmpa = 0usize;
225	let mut lemax = 0usize;
226
227	let mut ndiv = 0.0;
228	let mut nms_lu = 0.0;
229	let mut nms_ldl = 0.0;
230
231	let mut nel = 0usize;
232	let mut me = none;
233
234	let dense = if alpha < 0.0 {
235		n - 2
236	} else {
237		(alpha * faer_traits::math_utils::sqrt(&(n as f64))) as usize
238	};
239
240	let dense = Ord::max(dense, 16);
241	let dense = Ord::min(dense, n);
242
243	let (mut w, stack) = stack.collect(repeat_n!(one, n));
244	let (mut nv, stack) = stack.collect(repeat_n!(one, n));
245	let (mut elen, stack) = stack.collect(repeat_n!(zero, n));
246
247	let nv = &mut *nv;
248	let elen = &mut *elen;
249	let w = &mut *w;
250
251	let wbig = I::Signed::MAX - I(n);
252	let mut wflg = I(0);
253	let mut ndense = zero;
254
255	{
256		let (mut head, stack) = stack.collect(repeat_n!(none, n));
257		let (mut degree, _) = stack.collect(repeat_n!(none, n));
258
259		let head = &mut *head;
260		let degree = &mut *degree;
261
262		last.fill(none);
263		head.fill(none);
264		next.fill(none);
265		degree.copy_from_slice(len);
266
267		for i in 0..n {
268			let deg = degree[i].zx();
269			if deg == 0 {
270				elen[i] = flip(one);
271				nel += 1;
272				pe[i] = none;
273				w[i] = zero;
274			} else if deg > dense {
275				ndense += one;
276				nv[i] = zero;
277				elen[i] = none;
278				pe[i] = none;
279				nel += 1;
280			} else {
281				let inext = head[deg];
282				if inext != none {
283					last[inext.zx()] = I(i);
284				}
285				next[i] = inext;
286				head[deg] = I(i);
287			}
288		}
289
290		while nel < n {
291			let mut deg = mindeg;
292			while deg < n {
293				me = head[deg];
294				if me != none {
295					break;
296				}
297				deg += 1;
298			}
299			mindeg = deg;
300
301			let me = me.zx();
302			let inext = next[me];
303			if inext != none {
304				last[inext.zx()] = none;
305			}
306			head[deg] = inext;
307
308			let elenme = elen[me];
309			let mut nvpiv = nv[me];
310			nel += nvpiv.zx();
311
312			nv[me] = -nvpiv;
313			let mut degme = 0usize;
314			let mut pme1;
315			let mut pme2;
316			if elenme == zero {
317				pme1 = pe[me];
318				pme2 = pme1 - one;
319
320				for p in pme1.zx()..(pme1 + len[me]).zx() {
321					let i = iw[p].zx();
322					let nvi = nv[i];
323					if nvi > zero {
324						degme += nvi.zx();
325						nv[i] = -nvi;
326						pme2 += one;
327						iw[pme2.zx()] = I(i);
328
329						let ilast = last[i];
330						let inext = next[i];
331
332						if inext != none {
333							last[inext.zx()] = ilast;
334						}
335						if ilast != none {
336							next[ilast.zx()] = inext;
337						} else {
338							head[degree[i].zx()] = inext;
339						}
340					}
341				}
342			} else {
343				let mut p = pe[me].zx();
344				pme1 = I(pfree);
345				let slenme = (len[me] - elenme).zx();
346
347				for knt1 in 1..elenme.zx() + 2 {
348					let e;
349					let mut pj;
350					let ln;
351					if I(knt1) > elenme {
352						e = me;
353						pj = I(p);
354						ln = slenme;
355					} else {
356						e = iw[p].zx();
357						p += 1;
358						pj = pe[e];
359						ln = len[e].zx();
360					}
361
362					for knt2 in 1..ln + 1 {
363						let i = iw[pj.zx()].zx();
364						pj += one;
365						let nvi = nv[i];
366
367						if nvi > zero {
368							if pfree >= iwlen {
369								pe[me] = I(p);
370								len[me] -= I(knt1);
371								if len[me] == zero {
372									pe[me] = none;
373								}
374
375								pe[e] = pj;
376								len[e] = I(ln - knt2);
377								if len[e] == zero {
378									pe[e] = none;
379								}
380
381								ncmpa += 1;
382								for (j, pe) in pe.iter_mut().enumerate() {
383									let pn = *pe;
384									if pn >= zero {
385										let pn = pn.zx();
386										*pe = iw[pn];
387										iw[pn] = flip(I(j));
388									}
389								}
390
391								let mut psrc = 0usize;
392								let mut pdst = 0usize;
393								let pend = pme1.zx();
394
395								while psrc < pend {
396									let j = flip(iw[psrc]);
397									psrc += 1;
398									if j >= zero {
399										let j = j.zx();
400										iw[pdst] = pe[j];
401										pe[j] = I(pdst);
402										pdst += 1;
403										let lenj = len[j].zx();
404
405										if lenj > 0 {
406											iw.copy_within(psrc..psrc + lenj - 1, pdst);
407											psrc += lenj - 1;
408											pdst += lenj - 1;
409										}
410									}
411								}
412
413								let p1 = pdst;
414								iw.copy_within(pme1.zx()..pfree, pdst);
415								pdst += pfree - pme1.zx();
416
417								pme1 = I(p1);
418								pfree = pdst;
419								pj = pe[e];
420								p = pe[me].zx();
421							}
422
423							degme += nvi.zx();
424							nv[i] = -nvi;
425							iw[pfree] = I(i);
426							pfree += 1;
427
428							let ilast = last[i];
429							let inext = next[i];
430
431							if inext != none {
432								last[inext.zx()] = ilast;
433							}
434							if ilast != none {
435								next[ilast.zx()] = inext;
436							} else {
437								head[degree[i].zx()] = inext;
438							}
439						}
440					}
441					if e != me {
442						pe[e] = flip(I(me));
443						w[e] = zero;
444					}
445				}
446				pme2 = I(pfree - 1);
447			}
448
449			degree[me] = I(degme);
450			pe[me] = pme1;
451			len[me] = pme2 - pme1 + one;
452			elen[me] = flip(nvpiv + I(degme));
453
454			wflg = clear_flag(wflg, wbig, w);
455			assert!(pme1 >= zero);
456			//assert!(pme2 >= zero);
457			for pme in pme1.zx()..(pme2 + one).zx() {
458				let i = iw[pme].zx();
459				let eln = elen[i];
460				if eln > zero {
461					let nvi = -nv[i];
462					let wnvi = wflg - nvi;
463					for iw in iw[pe[i].zx()..][..eln.zx()].iter() {
464						let e = iw.zx();
465						let mut we = w[e];
466						if we >= wflg {
467							we -= nvi;
468						} else if we != zero {
469							we = degree[e] + wnvi;
470						}
471						w[e] = we;
472					}
473				}
474			}
475
476			for pme in pme1.zx()..(pme2 + one).zx() {
477				let i = iw[pme].zx();
478				let p1 = pe[i].zx();
479				let p2 = p1 + elen[i].zx();
480				let mut pn = p1;
481
482				let mut hash = 0usize;
483				deg = 0usize;
484
485				if aggressive {
486					for p in p1..p2 {
487						let e = iw[p].zx();
488						let we = w[e];
489						if we != zero {
490							let dext = we - wflg;
491							if dext > zero {
492								deg += dext.zx();
493								iw[pn] = I(e);
494								pn += 1;
495								hash = hash.wrapping_add(e);
496							} else {
497								pe[e] = flip(I(me));
498								w[e] = zero;
499							}
500						}
501					}
502				} else {
503					for p in p1..p2 {
504						let e = iw[p].zx();
505						let we = w[e];
506						if we != zero {
507							let dext = (we - wflg).zx();
508							deg += dext;
509							iw[pn] = I(e);
510							pn += 1;
511							hash = hash.wrapping_add(e);
512						}
513					}
514				}
515
516				elen[i] = I(pn - p1 + 1);
517				let p3 = pn;
518				let p4 = p1 + len[i].zx();
519				for p in p2..p4 {
520					let j = iw[p].zx();
521					let nvj = nv[j];
522					if nvj > zero {
523						deg += nvj.zx();
524						iw[pn] = I(j);
525						pn += 1;
526						hash = hash.wrapping_add(j);
527					}
528				}
529
530				if elen[i] == one && p3 == pn {
531					pe[i] = flip(I(me));
532					let nvi = -nv[i];
533					assert!(nvi >= zero);
534					degme -= nvi.zx();
535					nvpiv += nvi;
536					nel += nvi.zx();
537					nv[i] = zero;
538					elen[i] = none;
539				} else {
540					degree[i] = Ord::min(degree[i], I(deg));
541					iw[pn] = iw[p3];
542					iw[p3] = iw[p1];
543					iw[p1] = I(me);
544					len[i] = I(pn - p1 + 1);
545					hash %= n;
546
547					let j = head[hash];
548					if j <= none {
549						next[i] = flip(j);
550						head[hash] = flip(I(i));
551					} else {
552						next[i] = last[j.zx()];
553						last[j.zx()] = I(i);
554					}
555					last[i] = I(hash);
556				}
557			}
558
559			degree[me] = I(degme);
560			lemax = Ord::max(lemax, degme);
561			wflg += I(lemax);
562			wflg = clear_flag(wflg, wbig, w);
563
564			for pme in pme1.zx()..(pme2 + one).zx() {
565				let mut i = iw[pme].zx();
566				if nv[i] < zero {
567					let hash = last[i].zx();
568					let j = head[hash];
569
570					if j == none {
571						i = NONE;
572					} else if j < none {
573						i = flip(j).zx();
574						head[hash] = none;
575					} else {
576						i = last[j.zx()].sx();
577						last[j.zx()] = none;
578					}
579
580					while i != NONE && next[i] != none {
581						let ln = len[i];
582						let eln = elen[i];
583
584						for p in (pe[i] + one).zx()..(pe[i] + ln).zx() {
585							w[iw[p].zx()] = wflg;
586						}
587						let mut jlast = i;
588						let mut j = next[i].sx();
589						while j != NONE {
590							let mut ok = len[j] == ln && elen[j] == eln;
591							for p in (pe[j] + one).zx()..(pe[j] + ln).zx() {
592								if w[iw[p].zx()] != wflg {
593									ok = false;
594								}
595							}
596
597							if ok {
598								pe[j] = flip(I(i));
599								nv[i] += nv[j];
600								nv[j] = zero;
601								elen[j] = none;
602								j = next[j].sx();
603								next[jlast] = I(j);
604							} else {
605								jlast = j;
606								j = next[j].sx();
607							}
608						}
609
610						wflg += one;
611						i = next[i].sx();
612					}
613				}
614			}
615
616			let mut p = pme1.zx();
617			let nleft = n - nel;
618			for pme in pme1.zx()..(pme2 + one).zx() {
619				let i = iw[pme].zx();
620				let nvi = -nv[i];
621				if nvi > zero {
622					nv[i] = nvi;
623					deg = degree[i].zx() + degme - nvi.zx();
624					deg = Ord::min(deg, nleft - nvi.zx());
625
626					let inext = head[deg];
627					if inext != none {
628						last[inext.zx()] = I(i);
629					}
630					next[i] = inext;
631					last[i] = none;
632					head[deg] = I(i);
633
634					mindeg = Ord::min(mindeg, deg);
635					degree[i] = I(deg);
636					iw[p] = I(i);
637					p += 1;
638				}
639			}
640			nv[me] = nvpiv;
641			len[me] = I(p) - pme1;
642			if len[me] == zero {
643				pe[me] = none;
644				w[me] = zero;
645			}
646			if elenme != zero {
647				pfree = p;
648			}
649
650			{
651				let f = nvpiv.sx() as isize as f64;
652				let r = degme as f64 + ndense.sx() as isize as f64;
653				let lnzme = f * r + (f - 1.0) * f / 2.0;
654				ndiv += lnzme;
655				let s = f * r * r + r * (f - 1.0) * f + (f - 1.0) * f * (2.0 * f - 1.0) / 6.0;
656				nms_lu += s;
657				nms_ldl += (s + lnzme) / 2.0;
658			}
659		}
660
661		{
662			let f = ndense.sx() as isize as f64;
663			let lnzme = (f - 1.0) * f / 2.0;
664			ndiv += lnzme;
665			let s = (f - 1.0) * f * (2.0 * f - 1.0) / 6.0;
666			nms_lu += s;
667			nms_ldl += (s + lnzme) / 2.0;
668		}
669
670		for pe in pe.iter_mut() {
671			*pe = flip(*pe);
672		}
673		for elen in elen.iter_mut() {
674			*elen = flip(*elen);
675		}
676
677		for i in 0..n {
678			if nv[i] == zero {
679				let mut j = pe[i].sx();
680				if j == NONE {
681					continue;
682				}
683
684				while nv[j] == zero {
685					j = pe[j].zx();
686				}
687				let e = I(j);
688				let mut j = i;
689				while nv[j] == zero {
690					let jnext = pe[j];
691					pe[j] = e;
692					j = jnext.zx();
693				}
694			}
695		}
696	}
697
698	with_dim!(N, n);
699	postorder(
700		Array::from_mut(w, N),
701		Array::from_ref(MaybeIdx::<'_, I>::from_slice_ref_checked(pe, N), N),
702		Array::from_ref(nv, N),
703		Array::from_ref(elen, N),
704		stack,
705	);
706
707	let (ref mut head, _) = stack.collect(repeat_n!(none, n));
708	next.fill(none);
709
710	for (e, &k) in w.iter().enumerate() {
711		if k != none {
712			head[k.zx()] = I(e);
713		}
714	}
715	nel = 0;
716	for &e in head.iter() {
717		if e == none {
718			break;
719		}
720		let e = e.zx();
721		next[e] = I(nel);
722		nel += nv[e].zx();
723	}
724	assert!(nel == n - ndense.zx());
725
726	for i in 0..n {
727		if nv[i] == zero {
728			let e = pe[i];
729			if e != none {
730				let e = e.zx();
731				next[i] = next[e];
732				next[e] += one;
733			} else {
734				next[i] = I(nel);
735				nel += 1;
736			}
737		}
738	}
739	assert!(nel == n);
740	for i in 0..n {
741		last[next[i].zx()] = I(i);
742	}
743
744	let _ = ncmpa;
745	FlopCount {
746		n_div: ndiv,
747		n_mult_subs_ldl: nms_ldl,
748		n_mult_subs_lu: nms_lu,
749	}
750}
751
752#[allow(clippy::comparison_chain)]
753fn amd_1<I: Index>(
754	perm: &mut [I::Signed],
755	perm_inv: &mut [I::Signed],
756	A: SymbolicSparseColMatRef<'_, I>,
757	len: &mut [I::Signed],
758	iwlen: usize,
759	control: Control,
760	stack: &mut MemStack,
761) -> FlopCount {
762	let n = perm.len();
763	let I = I::Signed::truncate;
764
765	let zero = I(0);
766	let one = I(1);
767
768	let (p_e, stack) = unsafe { stack.make_raw::<I::Signed>(n) };
769	let (s_p, stack) = unsafe { stack.make_raw::<I::Signed>(n) };
770	let (i_w, stack) = unsafe { stack.make_raw::<I::Signed>(iwlen) };
771
772	// Construct the pointers for A+A'.
773
774	let mut pfree = zero;
775	for j in 0..n {
776		p_e[j] = pfree;
777		s_p[j] = pfree;
778		pfree += len[j];
779	}
780	let pfree = pfree.zx();
781
782	// Note that this restriction on iwlen is slightly more restrictive than
783	// what is strictly required in amd_2. amd_2 can operate with no elbow
784	// room at all, but it will be very slow. For better performance, at
785	// least size-n elbow room is enforced.
786	assert!(iwlen >= pfree + n);
787
788	{
789		with_dim!(N, n);
790		let (t_p, _) = unsafe { stack.make_raw::<I::Signed>(n) };
791
792		let A = A.as_shape(N, N);
793		let s_p = Array::from_mut(s_p, N);
794		let t_p = Array::from_mut(t_p, N);
795
796		for k in N.indices() {
797			// Construct A+A'.
798			let mut seen = zero;
799			let mut j_prev = usize::MAX;
800			for j in A.row_idx_of_col(k) {
801				if j_prev == *j {
802					j_prev = *j;
803					continue;
804				}
805				j_prev = *j;
806
807				if j < k {
808					// Entry A(j,k) in the strictly upper triangular part.
809					i_w[s_p[j].zx()] = I(*k);
810					s_p[j] += one;
811
812					i_w[s_p[k].zx()] = I(*j);
813					s_p[k] += one;
814
815					seen += one;
816				} else {
817					if j == k {
818						seen += one;
819					}
820					break;
821				}
822
823				// Scan lower triangular part of A, in column j until reaching
824				// row k. Start where last scan left off.
825				let mut seen_j = zero;
826				let mut i_prev = usize::MAX;
827				for i in &A.row_idx_of_col_raw(j)[t_p[j].zx()..] {
828					if i_prev == *i.zx() {
829						i_prev = *i.zx();
830						continue;
831					}
832					i_prev = *i.zx();
833
834					let i = i.zx();
835					if i < k {
836						// A (i,j) is only in the lower part, not in upper.
837
838						i_w[s_p[i].zx()] = I(*j);
839						s_p[i] += one;
840
841						i_w[s_p[j].zx()] = I(*i);
842						s_p[j] += one;
843
844						seen_j += one;
845					} else {
846						if i == k {
847							seen_j += one;
848						}
849						break;
850					}
851				}
852				t_p[j] += seen_j;
853			}
854			t_p[k] = seen;
855		}
856
857		// Clean up, for remaining mismatched entries.
858		for j in N.indices() {
859			let mut i_prev = usize::MAX;
860			for i in &A.row_idx_of_col_raw(j)[t_p[j].zx()..] {
861				if i_prev == *i.zx() {
862					i_prev = *i.zx();
863					continue;
864				}
865				i_prev = *i.zx();
866
867				let i = i.zx();
868				i_w[s_p[i].zx()] = I(*j);
869				s_p[i] += one;
870
871				i_w[s_p[j].zx()] = I(*i);
872				s_p[j] += one;
873			}
874		}
875	}
876
877	debug_assert!(s_p[n - 1] == I(pfree));
878
879	amd_2::<I>(p_e, i_w, len, pfree, perm_inv, perm, control, stack)
880}
881
882fn preprocess<'out, I: Index>(
883	new_col_ptr: &'out mut [I],
884	new_row_idx: &'out mut [I],
885	A: SymbolicSparseColMatRef<'_, I>,
886	stack: &mut MemStack,
887) -> SymbolicSparseColMatRef<'out, I> {
888	let n = A.nrows();
889
890	with_dim!(N, n);
891	let I = I::Signed::truncate;
892	let zero = I(0);
893	let one = I(1);
894
895	let (ref mut w, stack) = stack.collect(repeat_n!(I(0), n));
896	let (ref mut flag, _) = stack.collect(repeat_n!(I(NONE), n));
897
898	let w = Array::from_mut(w, N);
899	let flag = Array::from_mut(flag, N);
900	let A = A.as_shape(N, N);
901
902	for j in N.indices() {
903		let j_ = I(*j);
904		for i in A.row_idx_of_col(j) {
905			if flag[i] != j_ {
906				w[i] += one;
907				flag[i] = j_;
908			}
909		}
910	}
911
912	new_col_ptr[0] = I::from_signed(zero);
913	for (i, [r, r_next]) in iter::zip(N.indices(), windows2(Cell::as_slice_of_cells(Cell::from_mut(new_col_ptr)))) {
914		r_next.set(r.get() + I::from_signed(w[i]));
915	}
916
917	w.as_mut().copy_from_slice(bytemuck::cast_slice(&new_col_ptr[..n]));
918	flag.as_mut().fill(I(NONE));
919
920	for j in N.indices() {
921		let j_ = I(*j);
922		for i in A.row_idx_of_col(j) {
923			if flag[i] != j_ {
924				new_row_idx[w[i].zx()] = I::from_signed(j_);
925				w[i] += one;
926				flag[i] = j_;
927			}
928		}
929	}
930	unsafe { SymbolicSparseColMatRef::new_unchecked(n, n, &*new_col_ptr, None, &new_row_idx[..new_col_ptr[n].zx()]) }
931}
932
933#[allow(clippy::comparison_chain)]
934fn aat<I: Index>(len: &mut [I::Signed], A: SymbolicSparseColMatRef<'_, I>, stack: &mut MemStack) -> Result<usize, FaerError> {
935	{
936		with_dim!(N, A.nrows());
937		let I = I::Signed::truncate;
938		let zero = I(0);
939		let one = I(1);
940		let A = A.as_shape(N, N);
941
942		let n = *N;
943
944		let t_p = &mut *unsafe { stack.make_raw::<I::Signed>(n).0 }; // local workspace
945
946		let len = Array::from_mut(len, N);
947		let t_p = Array::from_mut(t_p, N);
948
949		for k in N.indices() {
950			let mut seen = zero;
951
952			let mut j_prev = usize::MAX;
953			for j in A.row_idx_of_col(k) {
954				if j_prev == *j {
955					j_prev = *j;
956					continue;
957				}
958				j_prev = *j;
959
960				if j < k {
961					seen += one;
962					len[j] += one;
963					len[k] += one;
964				} else {
965					if j == k {
966						seen += one;
967					}
968					break;
969				}
970
971				let mut seen_j = zero;
972				let mut i_prev = usize::MAX;
973				for i in &A.row_idx_of_col_raw(j)[t_p[j].zx()..] {
974					if i_prev == *i.zx() {
975						i_prev = *i.zx();
976						continue;
977					}
978					i_prev = *i.zx();
979					let i = i.zx();
980					if i < k {
981						len[i] += one;
982						len[j] += one;
983						seen_j += one;
984					} else {
985						if i == k {
986							seen_j += one;
987						}
988						break;
989					}
990				}
991				t_p[j] += seen_j;
992			}
993			t_p[k] = seen;
994		}
995
996		for j in N.indices() {
997			let mut i_prev = usize::MAX;
998			for i in &A.row_idx_of_col_raw(j)[t_p[j].zx()..] {
999				if i_prev == *i.zx() {
1000					i_prev = *i.zx();
1001					continue;
1002				}
1003				i_prev = *i.zx();
1004				len[i.zx()] += one;
1005				len[j] += one;
1006			}
1007		}
1008	}
1009	let nzaat = I::Signed::sum_nonnegative(len).map(I::from_signed);
1010	nzaat.ok_or(FaerError::IndexOverflow).map(I::zx)
1011}
1012
1013/// computes the size and alignment of required workspace for computing the amd ordering of a sorted
1014/// matrix
1015pub fn order_scratch<I: Index>(n: usize, nnz_upper: usize) -> StackReq {
1016	let n_scratch = StackReq::new::<I>(n);
1017	let nzaat = nnz_upper.checked_mul(2).unwrap_or(usize::MAX);
1018	StackReq::all_of(&[
1019		// len
1020		n_scratch,
1021		// A+A.T plus elbow room
1022		StackReq::new::<I>(nzaat.checked_add(nzaat / 5).unwrap_or(usize::MAX)),
1023		n_scratch,
1024		// p_e
1025		n_scratch,
1026		// s_p
1027		n_scratch,
1028		// i_w
1029		n_scratch,
1030		// w
1031		n_scratch,
1032		// nv
1033		n_scratch,
1034		// elen
1035		n_scratch,
1036		// child
1037		n_scratch,
1038		// sibling
1039		n_scratch,
1040		// stack
1041		n_scratch,
1042	])
1043}
1044
1045/// computes the size and alignment of required workspace for computing the amd ordering of an
1046/// unsorted matrix
1047pub fn order_maybe_unsorted_scratch<I: Index>(n: usize, nnz_upper: usize) -> StackReq {
1048	StackReq::all_of(&[order_scratch::<I>(n, nnz_upper), StackReq::new::<I>(n + 1), StackReq::new::<I>(nnz_upper)])
1049}
1050
1051/// computes the approximate minimum degree ordering for reducing the fill-in during the sparse
1052/// cholesky factorization of a matrix with the sparsity pattern of $A + A^\top$
1053pub fn order<I: Index>(
1054	perm: &mut [I],
1055	perm_inv: &mut [I],
1056	A: SymbolicSparseColMatRef<'_, I>,
1057	control: Control,
1058	stack: &mut MemStack,
1059) -> Result<FlopCount, FaerError> {
1060	let n = perm.len();
1061
1062	if n == 0 {
1063		return Ok(FlopCount {
1064			n_div: 0.0,
1065			n_mult_subs_ldl: 0.0,
1066			n_mult_subs_lu: 0.0,
1067		});
1068	}
1069
1070	let (ref mut len, stack) = stack.collect(repeat_n!(I::Signed::truncate(0), n));
1071	let nzaat = aat(len, A, stack)?;
1072	let iwlen = nzaat
1073		.checked_add(nzaat / 5)
1074		.and_then(|x| x.checked_add(n))
1075		.ok_or(FaerError::IndexOverflow)?;
1076	Ok(amd_1::<I>(
1077		bytemuck::cast_slice_mut(perm),
1078		bytemuck::cast_slice_mut(perm_inv),
1079		A,
1080		len,
1081		iwlen,
1082		control,
1083		stack,
1084	))
1085}
1086
1087/// computes the approximate minimum degree ordering for reducing the fill-in during the sparse
1088/// cholesky factorization of a matrix with the sparsity pattern of $A + A^\top$
1089///
1090/// # note
1091/// allows unsorted matrices
1092pub fn order_maybe_unsorted<I: Index>(
1093	perm: &mut [I],
1094	perm_inv: &mut [I],
1095	A: SymbolicSparseColMatRef<'_, I>,
1096	control: Control,
1097	stack: &mut MemStack,
1098) -> Result<FlopCount, FaerError> {
1099	let n = perm.len();
1100
1101	if n == 0 {
1102		return Ok(FlopCount {
1103			n_div: 0.0,
1104			n_mult_subs_ldl: 0.0,
1105			n_mult_subs_lu: 0.0,
1106		});
1107	}
1108	let nnz = A.compute_nnz();
1109	let (new_col_ptr, stack) = unsafe { stack.make_raw::<I>(n + 1) };
1110	let (new_row_idx, stack) = unsafe { stack.make_raw::<I>(nnz) };
1111	let A = preprocess(new_col_ptr, new_row_idx, A, stack);
1112	order(perm, perm_inv, A, control, stack)
1113}
1114
1115/// tuning parameters for the amd implementation
1116#[derive(Debug, Copy, Clone, PartialEq)]
1117pub struct Control {
1118	/// "dense" if degree > dense * sqrt(n)
1119	pub dense: f64,
1120	/// do aggressive absorption
1121	pub aggressive: bool,
1122}
1123
1124impl Default for Control {
1125	#[inline]
1126	fn default() -> Self {
1127		Self {
1128			dense: 10.0,
1129			aggressive: true,
1130		}
1131	}
1132}
1133
1134/// flop count of the ldlt and lu factorizations if the provided ordering is used
1135#[derive(Default, Debug, Copy, Clone, PartialEq)]
1136pub struct FlopCount {
1137	/// number of division
1138	pub n_div: f64,
1139	/// number of multiplications and subtractions for the ldlt factorization
1140	pub n_mult_subs_ldl: f64,
1141	/// number of multiplications and subtractions for the lu factorization
1142	pub n_mult_subs_lu: f64,
1143}