pub fn transpose_dedup_scratch<I: Index>(nrows: usize, ncols: usize) -> StackReq
computes the workspace size and alignment required to transpose a matrix and deduplicate the output elements