1use crate::internal_prelude_sp::*;
38use crate::{assert, debug_assert};
39
40#[inline]
41fn post_tree<'n, I: Index>(
42 root: Idx<'n, usize>,
43 k: usize,
44 child: &mut Array<'n, MaybeIdx<'n, I>>,
45 sibling: &Array<'n, MaybeIdx<'n, I>>,
46 order: &mut Array<'n, I::Signed>,
47 stack: &mut Array<'n, I::Signed>,
48) -> usize {
49 let N = child.len();
50 let I = I::Signed::truncate;
51 let mut top = 1usize;
52 stack[N.check(0)] = I(*root);
53
54 let mut k = k;
55
56 while top > 0 {
57 let i = N.check(stack[N.check(top - 1)].zx());
58 if let Some(child_) = child[i].idx() {
59 let mut f = child_.zx();
60 loop {
61 top += 1;
62 if let Some(new_f) = sibling[f].idx() {
63 f = new_f.zx();
64 } else {
65 break;
66 }
67 }
68
69 let mut t = top;
70 let mut f = child_.zx();
71 loop {
72 t -= 1;
73 stack[N.check(t)] = I(*f);
74 if let Some(new_f) = sibling[f].idx() {
75 f = new_f.zx();
76 } else {
77 break;
78 }
79 }
80 child[i] = MaybeIdx::none();
81 } else {
82 top -= 1;
83 order[i] = I(k);
84 k += 1;
85 }
86 }
87
88 k
89}
90
91#[inline]
92fn postorder<'n, 'out, I: Index>(
93 order: &'out mut Array<'n, I::Signed>,
94 etree: &Array<'n, MaybeIdx<'n, I>>,
95 nv: &Array<'n, I::Signed>,
96 f_size: &Array<'n, I::Signed>,
97 stack: &mut MemStack,
98) {
99 let N = order.len();
100 let n = *N;
101 if n == 0 {
102 return;
103 }
104
105 let I = I::Signed::truncate;
106 let zero = I(0);
107 let none = I(NONE);
108
109 let (ref mut child, stack) = stack.collect(repeat_n!(MaybeIdx::<'_, I>::none(), n));
110 let (ref mut sibling, stack) = stack.collect(repeat_n!(MaybeIdx::<'_, I>::none(), n));
111 let (ref mut stack, _) = stack.collect(repeat_n!(I(0), n));
112
113 let child = Array::from_mut(child, N);
114 let sibling = Array::from_mut(sibling, N);
115 let stack = Array::from_mut(stack, N);
116
117 for j in N.indices().rev() {
118 if nv[j] > zero {
119 if let Some(parent) = etree[j].idx() {
120 let parent = parent.zx();
121 sibling[j] = child[parent];
122 child[parent] = MaybeIdx::from_index(j.truncate());
123 }
124 }
125 }
126
127 for i in N.indices() {
128 if nv[i] > zero {
129 if let Some(child_) = child[i].idx() {
130 let child_ = child_.zx();
131
132 let mut fprev = MaybeIdx::<'n>::none();
133 let mut bigfprev = MaybeIdx::<'n>::none();
134 let mut bigf = MaybeIdx::<'n>::none();
135 let mut maxfrsize = none;
136
137 let mut f = MaybeIdx::from_index(child_);
138 while let Some(f_) = f.idx() {
139 let frsize = f_size[f_];
140 if frsize >= maxfrsize {
141 maxfrsize = frsize;
142 bigfprev = fprev;
143 bigf = f;
144 }
145 fprev = f;
146 f = sibling[f_].sx();
147 }
148
149 let bigf = bigf.idx().unwrap();
150 let fnext = sibling[bigf];
151 if let Some(fnext) = fnext.idx() {
152 if let Some(bigfprev) = bigfprev.idx() {
153 sibling[bigfprev] = MaybeIdx::from_index(fnext);
154 } else {
155 child[i] = MaybeIdx::from_index(fnext);
156 }
157
158 let fprev = fprev.idx().unwrap();
159 sibling[bigf] = MaybeIdx::none();
160 sibling[fprev] = MaybeIdx::from_index(bigf.truncate());
161 }
162 }
163 }
164 }
165
166 order.as_mut().fill(none);
167
168 let mut k = 0usize;
169 for i in N.indices() {
170 if etree[i].idx().is_none() && nv[i] > zero {
171 k = post_tree(i, k, child, sibling, order, stack);
172 }
173 }
174}
175
176#[inline(always)]
177fn flip<I: SignedIndex>(i: I) -> I {
178 -I::truncate(2) - i
179}
180
181#[inline]
182fn clear_flag<I: SignedIndex>(wflg: I, wbig: I, w: &mut [I]) -> I {
183 let I = I::truncate;
184 let zero = I(0);
185 let one = I(1);
186
187 if wflg < I(2) || wflg >= wbig {
188 for x in w {
189 if *x != zero {
190 *x = one;
191 }
192 }
193 return I(2);
194 }
195 wflg
196}
197
198#[allow(clippy::comparison_chain)]
199fn amd_2<I: Index>(
200 pe: &mut [I::Signed], iw: &mut [I::Signed], len: &mut [I::Signed], pfree: usize,
204 next: &mut [I::Signed],
205 last: &mut [I::Signed],
206 control: Control,
207 stack: &mut MemStack,
208) -> FlopCount {
209 let n = pe.len();
210 assert!(n < I::Signed::MAX.zx());
211
212 let mut pfree = pfree;
213 let iwlen = iw.len();
214
215 let I = I::Signed::truncate;
216 let none = I(NONE);
217 let zero = I(0);
218 let one = I(1);
219
220 let alpha = control.dense;
221 let aggressive = control.aggressive;
222
223 let mut mindeg = 0usize;
224 let mut ncmpa = 0usize;
225 let mut lemax = 0usize;
226
227 let mut ndiv = 0.0;
228 let mut nms_lu = 0.0;
229 let mut nms_ldl = 0.0;
230
231 let mut nel = 0usize;
232 let mut me = none;
233
234 let dense = if alpha < 0.0 {
235 n - 2
236 } else {
237 (alpha * faer_traits::math_utils::sqrt(&(n as f64))) as usize
238 };
239
240 let dense = Ord::max(dense, 16);
241 let dense = Ord::min(dense, n);
242
243 let (mut w, stack) = stack.collect(repeat_n!(one, n));
244 let (mut nv, stack) = stack.collect(repeat_n!(one, n));
245 let (mut elen, stack) = stack.collect(repeat_n!(zero, n));
246
247 let nv = &mut *nv;
248 let elen = &mut *elen;
249 let w = &mut *w;
250
251 let wbig = I::Signed::MAX - I(n);
252 let mut wflg = I(0);
253 let mut ndense = zero;
254
255 {
256 let (mut head, stack) = stack.collect(repeat_n!(none, n));
257 let (mut degree, _) = stack.collect(repeat_n!(none, n));
258
259 let head = &mut *head;
260 let degree = &mut *degree;
261
262 last.fill(none);
263 head.fill(none);
264 next.fill(none);
265 degree.copy_from_slice(len);
266
267 for i in 0..n {
268 let deg = degree[i].zx();
269 if deg == 0 {
270 elen[i] = flip(one);
271 nel += 1;
272 pe[i] = none;
273 w[i] = zero;
274 } else if deg > dense {
275 ndense += one;
276 nv[i] = zero;
277 elen[i] = none;
278 pe[i] = none;
279 nel += 1;
280 } else {
281 let inext = head[deg];
282 if inext != none {
283 last[inext.zx()] = I(i);
284 }
285 next[i] = inext;
286 head[deg] = I(i);
287 }
288 }
289
290 while nel < n {
291 let mut deg = mindeg;
292 while deg < n {
293 me = head[deg];
294 if me != none {
295 break;
296 }
297 deg += 1;
298 }
299 mindeg = deg;
300
301 let me = me.zx();
302 let inext = next[me];
303 if inext != none {
304 last[inext.zx()] = none;
305 }
306 head[deg] = inext;
307
308 let elenme = elen[me];
309 let mut nvpiv = nv[me];
310 nel += nvpiv.zx();
311
312 nv[me] = -nvpiv;
313 let mut degme = 0usize;
314 let mut pme1;
315 let mut pme2;
316 if elenme == zero {
317 pme1 = pe[me];
318 pme2 = pme1 - one;
319
320 for p in pme1.zx()..(pme1 + len[me]).zx() {
321 let i = iw[p].zx();
322 let nvi = nv[i];
323 if nvi > zero {
324 degme += nvi.zx();
325 nv[i] = -nvi;
326 pme2 += one;
327 iw[pme2.zx()] = I(i);
328
329 let ilast = last[i];
330 let inext = next[i];
331
332 if inext != none {
333 last[inext.zx()] = ilast;
334 }
335 if ilast != none {
336 next[ilast.zx()] = inext;
337 } else {
338 head[degree[i].zx()] = inext;
339 }
340 }
341 }
342 } else {
343 let mut p = pe[me].zx();
344 pme1 = I(pfree);
345 let slenme = (len[me] - elenme).zx();
346
347 for knt1 in 1..elenme.zx() + 2 {
348 let e;
349 let mut pj;
350 let ln;
351 if I(knt1) > elenme {
352 e = me;
353 pj = I(p);
354 ln = slenme;
355 } else {
356 e = iw[p].zx();
357 p += 1;
358 pj = pe[e];
359 ln = len[e].zx();
360 }
361
362 for knt2 in 1..ln + 1 {
363 let i = iw[pj.zx()].zx();
364 pj += one;
365 let nvi = nv[i];
366
367 if nvi > zero {
368 if pfree >= iwlen {
369 pe[me] = I(p);
370 len[me] -= I(knt1);
371 if len[me] == zero {
372 pe[me] = none;
373 }
374
375 pe[e] = pj;
376 len[e] = I(ln - knt2);
377 if len[e] == zero {
378 pe[e] = none;
379 }
380
381 ncmpa += 1;
382 for (j, pe) in pe.iter_mut().enumerate() {
383 let pn = *pe;
384 if pn >= zero {
385 let pn = pn.zx();
386 *pe = iw[pn];
387 iw[pn] = flip(I(j));
388 }
389 }
390
391 let mut psrc = 0usize;
392 let mut pdst = 0usize;
393 let pend = pme1.zx();
394
395 while psrc < pend {
396 let j = flip(iw[psrc]);
397 psrc += 1;
398 if j >= zero {
399 let j = j.zx();
400 iw[pdst] = pe[j];
401 pe[j] = I(pdst);
402 pdst += 1;
403 let lenj = len[j].zx();
404
405 if lenj > 0 {
406 iw.copy_within(psrc..psrc + lenj - 1, pdst);
407 psrc += lenj - 1;
408 pdst += lenj - 1;
409 }
410 }
411 }
412
413 let p1 = pdst;
414 iw.copy_within(pme1.zx()..pfree, pdst);
415 pdst += pfree - pme1.zx();
416
417 pme1 = I(p1);
418 pfree = pdst;
419 pj = pe[e];
420 p = pe[me].zx();
421 }
422
423 degme += nvi.zx();
424 nv[i] = -nvi;
425 iw[pfree] = I(i);
426 pfree += 1;
427
428 let ilast = last[i];
429 let inext = next[i];
430
431 if inext != none {
432 last[inext.zx()] = ilast;
433 }
434 if ilast != none {
435 next[ilast.zx()] = inext;
436 } else {
437 head[degree[i].zx()] = inext;
438 }
439 }
440 }
441 if e != me {
442 pe[e] = flip(I(me));
443 w[e] = zero;
444 }
445 }
446 pme2 = I(pfree - 1);
447 }
448
449 degree[me] = I(degme);
450 pe[me] = pme1;
451 len[me] = pme2 - pme1 + one;
452 elen[me] = flip(nvpiv + I(degme));
453
454 wflg = clear_flag(wflg, wbig, w);
455 assert!(pme1 >= zero);
456 for pme in pme1.zx()..(pme2 + one).zx() {
458 let i = iw[pme].zx();
459 let eln = elen[i];
460 if eln > zero {
461 let nvi = -nv[i];
462 let wnvi = wflg - nvi;
463 for iw in iw[pe[i].zx()..][..eln.zx()].iter() {
464 let e = iw.zx();
465 let mut we = w[e];
466 if we >= wflg {
467 we -= nvi;
468 } else if we != zero {
469 we = degree[e] + wnvi;
470 }
471 w[e] = we;
472 }
473 }
474 }
475
476 for pme in pme1.zx()..(pme2 + one).zx() {
477 let i = iw[pme].zx();
478 let p1 = pe[i].zx();
479 let p2 = p1 + elen[i].zx();
480 let mut pn = p1;
481
482 let mut hash = 0usize;
483 deg = 0usize;
484
485 if aggressive {
486 for p in p1..p2 {
487 let e = iw[p].zx();
488 let we = w[e];
489 if we != zero {
490 let dext = we - wflg;
491 if dext > zero {
492 deg += dext.zx();
493 iw[pn] = I(e);
494 pn += 1;
495 hash = hash.wrapping_add(e);
496 } else {
497 pe[e] = flip(I(me));
498 w[e] = zero;
499 }
500 }
501 }
502 } else {
503 for p in p1..p2 {
504 let e = iw[p].zx();
505 let we = w[e];
506 if we != zero {
507 let dext = (we - wflg).zx();
508 deg += dext;
509 iw[pn] = I(e);
510 pn += 1;
511 hash = hash.wrapping_add(e);
512 }
513 }
514 }
515
516 elen[i] = I(pn - p1 + 1);
517 let p3 = pn;
518 let p4 = p1 + len[i].zx();
519 for p in p2..p4 {
520 let j = iw[p].zx();
521 let nvj = nv[j];
522 if nvj > zero {
523 deg += nvj.zx();
524 iw[pn] = I(j);
525 pn += 1;
526 hash = hash.wrapping_add(j);
527 }
528 }
529
530 if elen[i] == one && p3 == pn {
531 pe[i] = flip(I(me));
532 let nvi = -nv[i];
533 assert!(nvi >= zero);
534 degme -= nvi.zx();
535 nvpiv += nvi;
536 nel += nvi.zx();
537 nv[i] = zero;
538 elen[i] = none;
539 } else {
540 degree[i] = Ord::min(degree[i], I(deg));
541 iw[pn] = iw[p3];
542 iw[p3] = iw[p1];
543 iw[p1] = I(me);
544 len[i] = I(pn - p1 + 1);
545 hash %= n;
546
547 let j = head[hash];
548 if j <= none {
549 next[i] = flip(j);
550 head[hash] = flip(I(i));
551 } else {
552 next[i] = last[j.zx()];
553 last[j.zx()] = I(i);
554 }
555 last[i] = I(hash);
556 }
557 }
558
559 degree[me] = I(degme);
560 lemax = Ord::max(lemax, degme);
561 wflg += I(lemax);
562 wflg = clear_flag(wflg, wbig, w);
563
564 for pme in pme1.zx()..(pme2 + one).zx() {
565 let mut i = iw[pme].zx();
566 if nv[i] < zero {
567 let hash = last[i].zx();
568 let j = head[hash];
569
570 if j == none {
571 i = NONE;
572 } else if j < none {
573 i = flip(j).zx();
574 head[hash] = none;
575 } else {
576 i = last[j.zx()].sx();
577 last[j.zx()] = none;
578 }
579
580 while i != NONE && next[i] != none {
581 let ln = len[i];
582 let eln = elen[i];
583
584 for p in (pe[i] + one).zx()..(pe[i] + ln).zx() {
585 w[iw[p].zx()] = wflg;
586 }
587 let mut jlast = i;
588 let mut j = next[i].sx();
589 while j != NONE {
590 let mut ok = len[j] == ln && elen[j] == eln;
591 for p in (pe[j] + one).zx()..(pe[j] + ln).zx() {
592 if w[iw[p].zx()] != wflg {
593 ok = false;
594 }
595 }
596
597 if ok {
598 pe[j] = flip(I(i));
599 nv[i] += nv[j];
600 nv[j] = zero;
601 elen[j] = none;
602 j = next[j].sx();
603 next[jlast] = I(j);
604 } else {
605 jlast = j;
606 j = next[j].sx();
607 }
608 }
609
610 wflg += one;
611 i = next[i].sx();
612 }
613 }
614 }
615
616 let mut p = pme1.zx();
617 let nleft = n - nel;
618 for pme in pme1.zx()..(pme2 + one).zx() {
619 let i = iw[pme].zx();
620 let nvi = -nv[i];
621 if nvi > zero {
622 nv[i] = nvi;
623 deg = degree[i].zx() + degme - nvi.zx();
624 deg = Ord::min(deg, nleft - nvi.zx());
625
626 let inext = head[deg];
627 if inext != none {
628 last[inext.zx()] = I(i);
629 }
630 next[i] = inext;
631 last[i] = none;
632 head[deg] = I(i);
633
634 mindeg = Ord::min(mindeg, deg);
635 degree[i] = I(deg);
636 iw[p] = I(i);
637 p += 1;
638 }
639 }
640 nv[me] = nvpiv;
641 len[me] = I(p) - pme1;
642 if len[me] == zero {
643 pe[me] = none;
644 w[me] = zero;
645 }
646 if elenme != zero {
647 pfree = p;
648 }
649
650 {
651 let f = nvpiv.sx() as isize as f64;
652 let r = degme as f64 + ndense.sx() as isize as f64;
653 let lnzme = f * r + (f - 1.0) * f / 2.0;
654 ndiv += lnzme;
655 let s = f * r * r + r * (f - 1.0) * f + (f - 1.0) * f * (2.0 * f - 1.0) / 6.0;
656 nms_lu += s;
657 nms_ldl += (s + lnzme) / 2.0;
658 }
659 }
660
661 {
662 let f = ndense.sx() as isize as f64;
663 let lnzme = (f - 1.0) * f / 2.0;
664 ndiv += lnzme;
665 let s = (f - 1.0) * f * (2.0 * f - 1.0) / 6.0;
666 nms_lu += s;
667 nms_ldl += (s + lnzme) / 2.0;
668 }
669
670 for pe in pe.iter_mut() {
671 *pe = flip(*pe);
672 }
673 for elen in elen.iter_mut() {
674 *elen = flip(*elen);
675 }
676
677 for i in 0..n {
678 if nv[i] == zero {
679 let mut j = pe[i].sx();
680 if j == NONE {
681 continue;
682 }
683
684 while nv[j] == zero {
685 j = pe[j].zx();
686 }
687 let e = I(j);
688 let mut j = i;
689 while nv[j] == zero {
690 let jnext = pe[j];
691 pe[j] = e;
692 j = jnext.zx();
693 }
694 }
695 }
696 }
697
698 with_dim!(N, n);
699 postorder(
700 Array::from_mut(w, N),
701 Array::from_ref(MaybeIdx::<'_, I>::from_slice_ref_checked(pe, N), N),
702 Array::from_ref(nv, N),
703 Array::from_ref(elen, N),
704 stack,
705 );
706
707 let (ref mut head, _) = stack.collect(repeat_n!(none, n));
708 next.fill(none);
709
710 for (e, &k) in w.iter().enumerate() {
711 if k != none {
712 head[k.zx()] = I(e);
713 }
714 }
715 nel = 0;
716 for &e in head.iter() {
717 if e == none {
718 break;
719 }
720 let e = e.zx();
721 next[e] = I(nel);
722 nel += nv[e].zx();
723 }
724 assert!(nel == n - ndense.zx());
725
726 for i in 0..n {
727 if nv[i] == zero {
728 let e = pe[i];
729 if e != none {
730 let e = e.zx();
731 next[i] = next[e];
732 next[e] += one;
733 } else {
734 next[i] = I(nel);
735 nel += 1;
736 }
737 }
738 }
739 assert!(nel == n);
740 for i in 0..n {
741 last[next[i].zx()] = I(i);
742 }
743
744 let _ = ncmpa;
745 FlopCount {
746 n_div: ndiv,
747 n_mult_subs_ldl: nms_ldl,
748 n_mult_subs_lu: nms_lu,
749 }
750}
751
752#[allow(clippy::comparison_chain)]
753fn amd_1<I: Index>(
754 perm: &mut [I::Signed],
755 perm_inv: &mut [I::Signed],
756 A: SymbolicSparseColMatRef<'_, I>,
757 len: &mut [I::Signed],
758 iwlen: usize,
759 control: Control,
760 stack: &mut MemStack,
761) -> FlopCount {
762 let n = perm.len();
763 let I = I::Signed::truncate;
764
765 let zero = I(0);
766 let one = I(1);
767
768 let (p_e, stack) = unsafe { stack.make_raw::<I::Signed>(n) };
769 let (s_p, stack) = unsafe { stack.make_raw::<I::Signed>(n) };
770 let (i_w, stack) = unsafe { stack.make_raw::<I::Signed>(iwlen) };
771
772 let mut pfree = zero;
775 for j in 0..n {
776 p_e[j] = pfree;
777 s_p[j] = pfree;
778 pfree += len[j];
779 }
780 let pfree = pfree.zx();
781
782 assert!(iwlen >= pfree + n);
787
788 {
789 with_dim!(N, n);
790 let (t_p, _) = unsafe { stack.make_raw::<I::Signed>(n) };
791
792 let A = A.as_shape(N, N);
793 let s_p = Array::from_mut(s_p, N);
794 let t_p = Array::from_mut(t_p, N);
795
796 for k in N.indices() {
797 let mut seen = zero;
799 let mut j_prev = usize::MAX;
800 for j in A.row_idx_of_col(k) {
801 if j_prev == *j {
802 j_prev = *j;
803 continue;
804 }
805 j_prev = *j;
806
807 if j < k {
808 i_w[s_p[j].zx()] = I(*k);
810 s_p[j] += one;
811
812 i_w[s_p[k].zx()] = I(*j);
813 s_p[k] += one;
814
815 seen += one;
816 } else {
817 if j == k {
818 seen += one;
819 }
820 break;
821 }
822
823 let mut seen_j = zero;
826 let mut i_prev = usize::MAX;
827 for i in &A.row_idx_of_col_raw(j)[t_p[j].zx()..] {
828 if i_prev == *i.zx() {
829 i_prev = *i.zx();
830 continue;
831 }
832 i_prev = *i.zx();
833
834 let i = i.zx();
835 if i < k {
836 i_w[s_p[i].zx()] = I(*j);
839 s_p[i] += one;
840
841 i_w[s_p[j].zx()] = I(*i);
842 s_p[j] += one;
843
844 seen_j += one;
845 } else {
846 if i == k {
847 seen_j += one;
848 }
849 break;
850 }
851 }
852 t_p[j] += seen_j;
853 }
854 t_p[k] = seen;
855 }
856
857 for j in N.indices() {
859 let mut i_prev = usize::MAX;
860 for i in &A.row_idx_of_col_raw(j)[t_p[j].zx()..] {
861 if i_prev == *i.zx() {
862 i_prev = *i.zx();
863 continue;
864 }
865 i_prev = *i.zx();
866
867 let i = i.zx();
868 i_w[s_p[i].zx()] = I(*j);
869 s_p[i] += one;
870
871 i_w[s_p[j].zx()] = I(*i);
872 s_p[j] += one;
873 }
874 }
875 }
876
877 debug_assert!(s_p[n - 1] == I(pfree));
878
879 amd_2::<I>(p_e, i_w, len, pfree, perm_inv, perm, control, stack)
880}
881
882fn preprocess<'out, I: Index>(
883 new_col_ptr: &'out mut [I],
884 new_row_idx: &'out mut [I],
885 A: SymbolicSparseColMatRef<'_, I>,
886 stack: &mut MemStack,
887) -> SymbolicSparseColMatRef<'out, I> {
888 let n = A.nrows();
889
890 with_dim!(N, n);
891 let I = I::Signed::truncate;
892 let zero = I(0);
893 let one = I(1);
894
895 let (ref mut w, stack) = stack.collect(repeat_n!(I(0), n));
896 let (ref mut flag, _) = stack.collect(repeat_n!(I(NONE), n));
897
898 let w = Array::from_mut(w, N);
899 let flag = Array::from_mut(flag, N);
900 let A = A.as_shape(N, N);
901
902 for j in N.indices() {
903 let j_ = I(*j);
904 for i in A.row_idx_of_col(j) {
905 if flag[i] != j_ {
906 w[i] += one;
907 flag[i] = j_;
908 }
909 }
910 }
911
912 new_col_ptr[0] = I::from_signed(zero);
913 for (i, [r, r_next]) in iter::zip(N.indices(), windows2(Cell::as_slice_of_cells(Cell::from_mut(new_col_ptr)))) {
914 r_next.set(r.get() + I::from_signed(w[i]));
915 }
916
917 w.as_mut().copy_from_slice(bytemuck::cast_slice(&new_col_ptr[..n]));
918 flag.as_mut().fill(I(NONE));
919
920 for j in N.indices() {
921 let j_ = I(*j);
922 for i in A.row_idx_of_col(j) {
923 if flag[i] != j_ {
924 new_row_idx[w[i].zx()] = I::from_signed(j_);
925 w[i] += one;
926 flag[i] = j_;
927 }
928 }
929 }
930 unsafe { SymbolicSparseColMatRef::new_unchecked(n, n, &*new_col_ptr, None, &new_row_idx[..new_col_ptr[n].zx()]) }
931}
932
933#[allow(clippy::comparison_chain)]
934fn aat<I: Index>(len: &mut [I::Signed], A: SymbolicSparseColMatRef<'_, I>, stack: &mut MemStack) -> Result<usize, FaerError> {
935 {
936 with_dim!(N, A.nrows());
937 let I = I::Signed::truncate;
938 let zero = I(0);
939 let one = I(1);
940 let A = A.as_shape(N, N);
941
942 let n = *N;
943
944 let t_p = &mut *unsafe { stack.make_raw::<I::Signed>(n).0 }; let len = Array::from_mut(len, N);
947 let t_p = Array::from_mut(t_p, N);
948
949 for k in N.indices() {
950 let mut seen = zero;
951
952 let mut j_prev = usize::MAX;
953 for j in A.row_idx_of_col(k) {
954 if j_prev == *j {
955 j_prev = *j;
956 continue;
957 }
958 j_prev = *j;
959
960 if j < k {
961 seen += one;
962 len[j] += one;
963 len[k] += one;
964 } else {
965 if j == k {
966 seen += one;
967 }
968 break;
969 }
970
971 let mut seen_j = zero;
972 let mut i_prev = usize::MAX;
973 for i in &A.row_idx_of_col_raw(j)[t_p[j].zx()..] {
974 if i_prev == *i.zx() {
975 i_prev = *i.zx();
976 continue;
977 }
978 i_prev = *i.zx();
979 let i = i.zx();
980 if i < k {
981 len[i] += one;
982 len[j] += one;
983 seen_j += one;
984 } else {
985 if i == k {
986 seen_j += one;
987 }
988 break;
989 }
990 }
991 t_p[j] += seen_j;
992 }
993 t_p[k] = seen;
994 }
995
996 for j in N.indices() {
997 let mut i_prev = usize::MAX;
998 for i in &A.row_idx_of_col_raw(j)[t_p[j].zx()..] {
999 if i_prev == *i.zx() {
1000 i_prev = *i.zx();
1001 continue;
1002 }
1003 i_prev = *i.zx();
1004 len[i.zx()] += one;
1005 len[j] += one;
1006 }
1007 }
1008 }
1009 let nzaat = I::Signed::sum_nonnegative(len).map(I::from_signed);
1010 nzaat.ok_or(FaerError::IndexOverflow).map(I::zx)
1011}
1012
1013pub fn order_scratch<I: Index>(n: usize, nnz_upper: usize) -> StackReq {
1016 let n_scratch = StackReq::new::<I>(n);
1017 let nzaat = nnz_upper.checked_mul(2).unwrap_or(usize::MAX);
1018 StackReq::all_of(&[
1019 n_scratch,
1021 StackReq::new::<I>(nzaat.checked_add(nzaat / 5).unwrap_or(usize::MAX)),
1023 n_scratch,
1024 n_scratch,
1026 n_scratch,
1028 n_scratch,
1030 n_scratch,
1032 n_scratch,
1034 n_scratch,
1036 n_scratch,
1038 n_scratch,
1040 n_scratch,
1042 ])
1043}
1044
1045pub fn order_maybe_unsorted_scratch<I: Index>(n: usize, nnz_upper: usize) -> StackReq {
1048 StackReq::all_of(&[order_scratch::<I>(n, nnz_upper), StackReq::new::<I>(n + 1), StackReq::new::<I>(nnz_upper)])
1049}
1050
1051pub fn order<I: Index>(
1054 perm: &mut [I],
1055 perm_inv: &mut [I],
1056 A: SymbolicSparseColMatRef<'_, I>,
1057 control: Control,
1058 stack: &mut MemStack,
1059) -> Result<FlopCount, FaerError> {
1060 let n = perm.len();
1061
1062 if n == 0 {
1063 return Ok(FlopCount {
1064 n_div: 0.0,
1065 n_mult_subs_ldl: 0.0,
1066 n_mult_subs_lu: 0.0,
1067 });
1068 }
1069
1070 let (ref mut len, stack) = stack.collect(repeat_n!(I::Signed::truncate(0), n));
1071 let nzaat = aat(len, A, stack)?;
1072 let iwlen = nzaat
1073 .checked_add(nzaat / 5)
1074 .and_then(|x| x.checked_add(n))
1075 .ok_or(FaerError::IndexOverflow)?;
1076 Ok(amd_1::<I>(
1077 bytemuck::cast_slice_mut(perm),
1078 bytemuck::cast_slice_mut(perm_inv),
1079 A,
1080 len,
1081 iwlen,
1082 control,
1083 stack,
1084 ))
1085}
1086
1087pub fn order_maybe_unsorted<I: Index>(
1093 perm: &mut [I],
1094 perm_inv: &mut [I],
1095 A: SymbolicSparseColMatRef<'_, I>,
1096 control: Control,
1097 stack: &mut MemStack,
1098) -> Result<FlopCount, FaerError> {
1099 let n = perm.len();
1100
1101 if n == 0 {
1102 return Ok(FlopCount {
1103 n_div: 0.0,
1104 n_mult_subs_ldl: 0.0,
1105 n_mult_subs_lu: 0.0,
1106 });
1107 }
1108 let nnz = A.compute_nnz();
1109 let (new_col_ptr, stack) = unsafe { stack.make_raw::<I>(n + 1) };
1110 let (new_row_idx, stack) = unsafe { stack.make_raw::<I>(nnz) };
1111 let A = preprocess(new_col_ptr, new_row_idx, A, stack);
1112 order(perm, perm_inv, A, control, stack)
1113}
1114
1115#[derive(Debug, Copy, Clone, PartialEq)]
1117pub struct Control {
1118 pub dense: f64,
1120 pub aggressive: bool,
1122}
1123
1124impl Default for Control {
1125 #[inline]
1126 fn default() -> Self {
1127 Self {
1128 dense: 10.0,
1129 aggressive: true,
1130 }
1131 }
1132}
1133
1134#[derive(Default, Debug, Copy, Clone, PartialEq)]
1136pub struct FlopCount {
1137 pub n_div: f64,
1139 pub n_mult_subs_ldl: f64,
1141 pub n_mult_subs_lu: f64,
1143}