Skip to content

Commit 85dce1b

Browse files
fredrikekreViralBShah
authored andcommitted
move cor, cov, std, stdm, var, varm and linreg to StatsBase (#27152)
fix JuliaLang/julia#25571 (comment) (included in JuliaStats/StatsBase.jl#379) fix #23769 (included in JuliaStats/StatsBase.jl#379) fix #27140
1 parent e253cda commit 85dce1b

File tree

3 files changed

+2
-145
lines changed

3 files changed

+2
-145
lines changed

src/linalg.jl

-24
Original file line numberDiff line numberDiff line change
@@ -1010,27 +1010,3 @@ end
10101010

10111011
chol(A::SparseMatrixCSC) = error("Use cholesky() instead of chol() for sparse matrices.")
10121012
eigen(A::SparseMatrixCSC) = error("Use IterativeEigensolvers.eigs() instead of eigen() for sparse matrices.")
1013-
1014-
function Base.cov(X::SparseMatrixCSC; dims::Int=1, corrected::Bool=true)
1015-
vardim = dims
1016-
a, b = size(X)
1017-
n, p = vardim == 1 ? (a, b) : (b, a)
1018-
1019-
# The covariance can be decomposed into two terms
1020-
# 1/(n - 1) ∑ (x_i - x̄)*(x_i - x̄)' = 1/(n - 1) (∑ x_i*x_i' - n*x̄*x̄')
1021-
# which can be evaluated via a sparse matrix-matrix product
1022-
1023-
# Compute ∑ x_i*x_i' = X'X using sparse matrix-matrix product
1024-
out = Matrix(Base.unscaled_covzm(X, vardim))
1025-
1026-
# Compute x̄
1027-
x̄ᵀ = mean(X, dims=vardim)
1028-
1029-
# Subtract n*x̄*x̄' from X'X
1030-
@inbounds for j in 1:p, i in 1:p
1031-
out[i,j] -= x̄ᵀ[i] * x̄ᵀ[j]' * n
1032-
end
1033-
1034-
# scale with the sample size n or the corrected sample size n - 1
1035-
return rmul!(out, inv(n - corrected))
1036-
end

src/sparsematrix.jl

-61
Original file line numberDiff line numberDiff line change
@@ -3489,67 +3489,6 @@ function hash(A::SparseMatrixCSC{T}, h::UInt) where T
34893489
hashrun(0, length(A)-lastidx, h) # Hash zeros at end
34903490
end
34913491

3492-
## Statistics
3493-
3494-
# This is the function that does the reduction underlying var/std
3495-
function Base.centralize_sumabs2!(R::AbstractArray{S}, A::SparseMatrixCSC{Tv,Ti}, means::AbstractArray) where {S,Tv,Ti}
3496-
lsiz = Base.check_reducedims(R,A)
3497-
size(means) == size(R) || error("size of means must match size of R")
3498-
isempty(R) || fill!(R, zero(S))
3499-
isempty(A) && return R
3500-
3501-
colptr = A.colptr
3502-
rowval = A.rowval
3503-
nzval = A.nzval
3504-
m = size(A, 1)
3505-
n = size(A, 2)
3506-
3507-
if size(R, 1) == size(R, 2) == 1
3508-
# Reduction along both columns and rows
3509-
R[1, 1] = Base.centralize_sumabs2(A, means[1])
3510-
elseif size(R, 1) == 1
3511-
# Reduction along rows
3512-
@inbounds for col = 1:n
3513-
mu = means[col]
3514-
r = convert(S, (m-colptr[col+1]+colptr[col])*abs2(mu))
3515-
@simd for j = colptr[col]:colptr[col+1]-1
3516-
r += abs2(nzval[j] - mu)
3517-
end
3518-
R[1, col] = r
3519-
end
3520-
elseif size(R, 2) == 1
3521-
# Reduction along columns
3522-
rownz = fill(convert(Ti, n), m)
3523-
@inbounds for col = 1:n
3524-
@simd for j = colptr[col]:colptr[col+1]-1
3525-
row = rowval[j]
3526-
R[row, 1] += abs2(nzval[j] - means[row])
3527-
rownz[row] -= 1
3528-
end
3529-
end
3530-
for i = 1:m
3531-
R[i, 1] += rownz[i]*abs2(means[i])
3532-
end
3533-
else
3534-
# Reduction along a dimension > 2
3535-
@inbounds for col = 1:n
3536-
lastrow = 0
3537-
@simd for j = colptr[col]:colptr[col+1]-1
3538-
row = rowval[j]
3539-
for i = lastrow+1:row-1
3540-
R[i, col] = abs2(means[i, col])
3541-
end
3542-
R[row, col] = abs2(nzval[j] - means[row, col])
3543-
lastrow = row
3544-
end
3545-
for i = lastrow+1:m
3546-
R[i, col] = abs2(means[i, col])
3547-
end
3548-
end
3549-
end
3550-
return R
3551-
end
3552-
35533492
## Uniform matrix arithmetic
35543493

35553494
(+)(A::SparseMatrixCSC, J::UniformScaling) = A + sparse(J, size(A)...)

test/sparse.jl

+2-60
Original file line numberDiff line numberDiff line change
@@ -489,7 +489,7 @@ end
489489
pA = sparse(rand(3, 7))
490490

491491
for arr in (se33, sA, pA)
492-
for f in (sum, prod, minimum, maximum, var)
492+
for f in (sum, prod, minimum, maximum)
493493
farr = Array(arr)
494494
@test f(arr) f(farr)
495495
@test f(arr, dims=1) f(farr, dims=1)
@@ -518,9 +518,8 @@ end
518518
@test prod(sparse(Int[])) === 1
519519
@test_throws ArgumentError minimum(sparse(Int[]))
520520
@test_throws ArgumentError maximum(sparse(Int[]))
521-
@test var(sparse(Int[])) === NaN
522521

523-
for f in (sum, prod, var)
522+
for f in (sum, prod)
524523
@test isequal(f(spzeros(0, 1), dims=1), f(Matrix{Int}(I, 0, 1), dims=1))
525524
@test isequal(f(spzeros(0, 1), dims=2), f(Matrix{Int}(I, 0, 1), dims=2))
526525
@test isequal(f(spzeros(0, 1), dims=(1, 2)), f(Matrix{Int}(I, 0, 1), dims=(1, 2)))
@@ -2033,63 +2032,6 @@ end
20332032
@test issymmetric(B)
20342033
end
20352034

2036-
# Faster covariance function for sparse matrices
2037-
# Prevents densifying the input matrix when subtracting the mean
2038-
# Test against dense implementation
2039-
# PR https://github.com/JuliaLang/julia/pull/22735
2040-
# Part of this test needed to be hacked due to the treatment
2041-
# of Inf in sparse matrix algebra
2042-
# https://github.com/JuliaLang/julia/issues/22921
2043-
# The issue will be resolved in
2044-
# https://github.com/JuliaLang/julia/issues/22733
2045-
@testset "optimizing sparse $elty covariance" for elty in (Float64, Complex{Float64})
2046-
n = 10
2047-
p = 5
2048-
np2 = div(n*p, 2)
2049-
nzvals, x_sparse = guardsrand(1) do
2050-
if elty <: Real
2051-
nzvals = randn(np2)
2052-
else
2053-
nzvals = complex.(randn(np2), randn(np2))
2054-
end
2055-
nzvals, sparse(rand(1:n, np2), rand(1:p, np2), nzvals, n, p)
2056-
end
2057-
x_dense = convert(Matrix{elty}, x_sparse)
2058-
@testset "Test with no Infs and NaNs, vardim=$vardim, corrected=$corrected" for vardim in (1, 2),
2059-
corrected in (true, false)
2060-
@test cov(x_sparse, dims=vardim, corrected=corrected)
2061-
cov(x_dense , dims=vardim, corrected=corrected)
2062-
end
2063-
2064-
@testset "Test with $x11, vardim=$vardim, corrected=$corrected" for x11 in (NaN, Inf),
2065-
vardim in (1, 2),
2066-
corrected in (true, false)
2067-
x_sparse[1,1] = x11
2068-
x_dense[1 ,1] = x11
2069-
2070-
cov_sparse = cov(x_sparse, dims=vardim, corrected=corrected)
2071-
cov_dense = cov(x_dense , dims=vardim, corrected=corrected)
2072-
@test cov_sparse[2:end, 2:end] cov_dense[2:end, 2:end]
2073-
@test isfinite.(cov_sparse) == isfinite.(cov_dense)
2074-
@test isfinite.(cov_sparse) == isfinite.(cov_dense)
2075-
end
2076-
2077-
@testset "Test with NaN and Inf, vardim=$vardim, corrected=$corrected" for vardim in (1, 2),
2078-
corrected in (true, false)
2079-
x_sparse[1,1] = Inf
2080-
x_dense[1 ,1] = Inf
2081-
x_sparse[2,1] = NaN
2082-
x_dense[2 ,1] = NaN
2083-
2084-
cov_sparse = cov(x_sparse, dims=vardim, corrected=corrected)
2085-
cov_dense = cov(x_dense , dims=vardim, corrected=corrected)
2086-
@test cov_sparse[(1 + vardim):end, (1 + vardim):end]
2087-
cov_dense[ (1 + vardim):end, (1 + vardim):end]
2088-
@test isfinite.(cov_sparse) == isfinite.(cov_dense)
2089-
@test isfinite.(cov_sparse) == isfinite.(cov_dense)
2090-
end
2091-
end
2092-
20932035
@testset "similar should not alias the input sparse array" begin
20942036
a = sparse(rand(3,3) .+ 0.1)
20952037
b = similar(a, Float32, Int32)

0 commit comments

Comments
 (0)