JuliaLang · nolta · Mar 14, 2013 · Mar 1, 2013 · Mar 14, 2013 · Mar 14, 2013
diff --git a/base/exports.jl b/base/exports.jl
@@ -798,7 +798,9 @@ export
     median,
     quantile,
     std,
+    stdm,
     var,
+    varm,
 
 # signal processing
     bfft,

diff --git a/base/statistics.jl b/base/statistics.jl
@@ -12,64 +12,49 @@ function mean(iterable)
     end
     return total/count
 end
+mean(v::AbstractArray, region) = sum(v, region) / prod(size(v)[region])
 
 function median!{T<:Real}(v::AbstractVector{T})
     isempty(v) && error("median of an empty array is undefined")
     sort!(v) # TODO: do something more efficient, e.g. select but detect NaNs
     isnan(v[end]) && error("median is undefined in presence of NaNs")
     isodd(length(v)) ? float(v[div(end+1,2)]) : (v[div(end,2)]+v[div(end,2)+1])/2
 end
-median{T<:Real}(v::AbstractArray{T}) = median!(copy(reshape(v, length(v))))
+median{T<:Real}(v::AbstractArray{T}) = median!(copy(vec(v)))
 
 ## variance with known mean
-function var(v::AbstractVector, m::Number, corrected::Bool)
+function varm(v::AbstractVector, m::Number)
     n = length(v)
-    if n == 0 || (n == 1 && corrected)
+    if n == 0 || n == 1
         return NaN
     end
     x = v - m
-    return dot(x, x) / (n - (corrected ? 1 : 0))
+    return dot(x, x) / (n - 1)
 end
-var(v::AbstractVector, m::Number) = var(v, m, true)
-var(v::AbstractArray, m::Number, corrected::Bool) = var(reshape(v, length(v)), m, corrected)
-var(v::AbstractArray, m::Number) = var(v, m, true)
-function var(v::Ranges, m::Number, corrected::Bool)
-    f = first(v) - m
-    s = step(v)
-    l = length(v)
-    if l == 0 || (l == 1 && corrected)
-        return NaN
-    end
-    if corrected
-        return f^2 * l / (l - 1) + f * s * l + s^2 * l * (2 * l - 1) / 6
-    else
-        return f^2 + f * s * (l - 1) + s^2 * (l - 1) * (2 * l - 1) / 6
-    end
-end
-var(v::Ranges, m::Number) = var(v, m, true)
+varm(v::AbstractArray, m::Number) = varm(vec(v), m)
+varm(v::Ranges, m::Number) = var(v)
 
 ## variance
-function var(v::Ranges, corrected::Bool)
+function var(v::Ranges)
     s = step(v)
     l = length(v)
-    if l == 0 || (l == 1 && corrected)
+    if l == 0 || l == 1
         return NaN
     end
-    return abs2(s) * (l + 1) * (corrected ? l : (l - 1)) / 12
+    return abs2(s) * (l + 1) * l / 12
+end
+var(v::AbstractArray) = varm(v, mean(v))
+function var(v::AbstractArray, region)
+    x = bsxfun(-, v, mean(v, region))
+    return sum(x.^2, region) / (prod(size(v)[region]) - 1)
 end
-var(v::AbstractVector, corrected::Bool) = var(v, mean(v), corrected)
-var(v::AbstractArray, corrected::Bool) = var(reshape(v, length(v)), corrected)
-var(v::AbstractArray) = var(v, true)
 
 ## standard deviation with known mean
-std(v::AbstractArray, m::Number, corrected::Bool) = sqrt(var(v, m, corrected))
-std(v::AbstractArray, m::Number) = std(v, m, true)
+stdm(v, m::Number) = sqrt(varm(v, m))
 
 ## standard deviation
-std(v::AbstractArray, corrected::Bool) = std(v, mean(v), corrected)
-std(v::AbstractArray) = std(v, true)
-std(v::Ranges, corrected::Bool) = sqrt(var(v, corrected))
-std(v::Ranges) = std(v, true)
+std(v) = sqrt(var(v))
+std(v, region) = sqrt(var(v, region))
 
 ## hist ##
 
@@ -155,35 +140,35 @@ function center(x::AbstractVector)
     res
 end
 
-function cov(x::AbstractVecOrMat, y::AbstractVecOrMat, corrected::Bool)
+function cov(x::AbstractVecOrMat, y::AbstractVecOrMat)
     if size(x, 1) != size(y, 1)
         error("incompatible matrices")
     end
     n = size(x, 1)
     xc = center(x)
     yc = center(y)
-    conj(xc' * yc / (n - (corrected ? 1 : 0)))
+    conj(xc' * yc / (n - 1))
 end
-cov(x::AbstractVector, y::AbstractVector, corrected::Bool) = cov(x'', y, corrected)[1]
+cov(x::AbstractVector, y::AbstractVector) = cov(x'', y)[1]
 
-function cov(x::AbstractVecOrMat, corrected::Bool)
+function cov(x::AbstractVecOrMat)
     n = size(x, 1)
     xc = center(x)
-    conj(xc' * xc / (n - (corrected ? 1 : 0)))
+    conj(xc' * xc / (n - 1))
 end
-cov(x::AbstractVector, corrected::Bool) = cov(x'', corrected)[1]
+cov(x::AbstractVector) = cov(x'')[1]
 
-function cor(x::AbstractVecOrMat, y::AbstractVecOrMat, corrected::Bool)
-    z = cov(x, y, corrected)
+function cor(x::AbstractVecOrMat, y::AbstractVecOrMat)
+    z = cov(x, y)
     scale = Base.amap(std, x, 2) * Base.amap(std, y, 2)'
     z ./ scale
 end
-cor(x::AbstractVector, y::AbstractVector, corrected::Bool) =
-    cov(x, y, corrected) / std(x) / std(y)
+cor(x::AbstractVector, y::AbstractVector) =
+    cov(x, y) / std(x) / std(y)
 
 
-function cor(x::AbstractVecOrMat, corrected::Bool)
-    res = cov(x, corrected)
+function cor(x::AbstractVecOrMat)
+    res = cov(x)
     n = size(res, 1)
     scale = 1 / sqrt(diag(res))
     for j in 1:n
@@ -195,13 +180,7 @@ function cor(x::AbstractVecOrMat, corrected::Bool)
     end
     res 
 end
-cor(x::AbstractVector, corrected::Bool) = cor(x'', corrected)[1]
-
-cov(x::AbstractVecOrMat) = cov(x, true)
-cov(x::AbstractVecOrMat, y::AbstractVecOrMat) = cov(x, y, true)
-cor(x::AbstractVecOrMat) = cor(x, true)
-cor(x::AbstractVecOrMat, y::AbstractVecOrMat) = cor(x, y, true)
-
+cor(x::AbstractVector) = cor(x'')[1]
 
 ## quantiles ##
 

diff --git a/doc/stdlib/base.rst b/doc/stdlib/base.rst
@@ -2672,37 +2672,37 @@ Combinatorics
 Statistics
 ----------
 
-.. function:: mean(v, [dim])
+.. function:: mean(v[, region])
 
-   Compute the mean of whole array ``v``, or optionally along dimension ``dim``
+   Compute the mean of whole array ``v``, or optionally along the dimensions in ``region``.
 
-.. function:: std(v, [corrected])
+.. function:: std(v[, region])
 
-   Compute the sample standard deviation of a vector ``v``. If the optional argument ``corrected`` is either left unspecified or is explicitly set to the default value of ``true``, then the algorithm will return an estimator of the generative distribution's standard deviation under the assumption that each entry of ``v`` is an IID draw from that generative distribution. This computation is equivalent to calculating ``sqrt(sum((v .- mean(v)).^2) / (length(v) - 1))`` and involves an implicit correction term sometimes called the Bessel correction which insures that the estimator of the variance is unbiased. If, instead, the optional argument ``corrected`` is set to ``false``, then the algorithm will produce the equivalent of ``sqrt(sum((v .- mean(v)).^2) / length(v))``, which is the empirical standard deviation of the sample.
+   Compute the sample standard deviation of a vector or array``v``, optionally along dimensions in ``region``. The algorithm returns an estimator of the generative distribution's standard deviation under the assumption that each entry of ``v`` is an IID draw from that generative distribution. This computation is equivalent to calculating ``sqrt(sum((v - mean(v)).^2) / (length(v) - 1))``.
 
-.. function:: std(v, m, [corrected])
+.. function:: stdm(v, m)
 
-   Compute the sample standard deviation of a vector ``v`` with known mean ``m``. If the optional argument ``corrected`` is either left unspecified or is explicitly set to the default value of ``true``, then the algorithm will return an estimator of the generative distribution's standard deviation under the assumption that each entry of ``v`` is an IID draw from that generative distribution. This computation is equivalent to calculating ``sqrt(sum((v .- m).^2) / (length(v) - 1))`` and involves an implicit correction term sometimes called the Bessel correction which insures that the estimator of the variance is unbiased. If, instead, the optional argument ``corrected`` is set to ``false``, then the algorithm will produce the equivalent of ``sqrt(sum((v .- m).^2) / length(v))``, which is the empirical standard deviation of the sample.
+   Compute the sample standard deviation of a vector ``v`` with known mean ``m``.
 
-.. function:: var(v, [corrected])
+.. function:: var(v[, region])
 
-   Compute the sample variance of a vector ``v``. If the optional argument ``corrected`` is either left unspecified or is explicitly set to the default value of ``true``, then the algorithm will return an unbiased estimator of the generative distribution's variance under the assumption that each entry of ``v`` is an IID draw from that generative distribution. This computation is equivalent to calculating ``sum((v .- mean(v)).^2) / (length(v) - 1)`` and involves an implicit correction term sometimes called the Bessel correction. If, instead, the optional argument ``corrected`` is set to ``false``, then the algorithm will produce the equivalent of ``sum((v .- mean(v)).^2) / length(v)``, which is the empirical variance of the sample.
+   Compute the sample variance of a vector or array``v``, optionally along dimensions in ``region``. The algorithm will return an estimator of the generative distribution's variance under the assumption that each entry of ``v`` is an IID draw from that generative distribution. This computation is equivalent to calculating ``sum((v - mean(v)).^2) / (length(v) - 1)``.
 
-.. function:: var(v, m, [corrected])
+.. function:: varm(v, m)
 
-   Compute the sample variance of a vector ``v`` with known mean ``m``. If the optional argument ``corrected`` is either left unspecified or is explicitly set to the default value of ``true``, then the algorithm will return an unbiased estimator of the generative distribution's variance under the assumption that each entry of ``v`` is an IID draw from that generative distribution. This computation is equivalent to calculating ``sum((v .- m)).^2) / (length(v) - 1)`` and involves an implicit correction term sometimes called the Bessel correction. If, instead, the optional argument ``corrected`` is set to ``false``, then the algorithm will produce the equivalent of ``sum((v .- m)).^2) / length(v)``, which is the empirical variance of the sample.
+   Compute the sample variance of a vector ``v`` with known mean ``m``.
 
 .. function:: median(v)
 
-   Compute the median of a vector ``v``
+   Compute the median of a vector ``v``.
 
-.. function:: hist(v, [n])
+.. function:: hist(v[, n])
 
-   Compute the histogram of ``v``, optionally using ``n`` bins
+   Compute the histogram of ``v``, optionally using ``n`` bins.
 
 .. function:: hist(v, e)
 
-   Compute the histogram of ``v`` using a vector ``e`` as the edges for the bins
+   Compute the histogram of ``v`` using a vector ``e`` as the edges for the bins.
 
 .. function:: quantile(v, p)
 
@@ -2712,13 +2712,17 @@ Statistics
 
    Compute the quantiles of a vector ``v`` at the probability values ``[.0, .2, .4, .6, .8, 1.0]``.
 
-.. function:: cov(v)
+.. function:: cov(v1[, v2])
 
-   Compute the Pearson covariance between two vectors ``v1`` and ``v2``.
+   Compute the Pearson covariance between two vectors ``v1`` and ``v2``. If
+   called with a single element ``v``, then computes covariance of columns of
+   ``v``.
 
-.. function:: cor(v)
+.. function:: cor(v1[, v2])
 
-   Compute the Pearson correlation between two vectors ``v1`` and ``v2``.
+   Compute the Pearson correlation between two vectors ``v1`` and ``v2``. If
+   called with a single element ``v``, then computes correlation of columns of
+   ``v``.
 
 Signal Processing
 -----------------

diff --git a/test/statistics.jl b/test/statistics.jl
@@ -17,8 +17,13 @@
 @test_fails median([NaN,0.0])
 
 @test mean([1,2,3]) == 2.
+@test mean([0 1 2; 4 5 6], 1) == [2. 3. 4.]
 @test var([1,2,3]) == 1.
+@test var(1:8) == 6.
+@test var([1 2 3 4 5; 6 7 8 9 10], 2) == [2.5 2.5]'
+@test varm([1,2,3], 2) == 1.
 @test std([1,2,3]) == 1.
+@test stdm([1,2,3], 2) == 1.
 @test hist([1,2,3],10) == [1,0,0,0,0,1,0,0,0,1]
 @test hist([1,2,3],[0,2,4]) == [1,2,0]
-Original file line number
+Diff line change
@@ Expand Up / @@ -798,7 +798,9 @@ export @@
         median,
         quantile,
         std,
+        stdm,
         var,
+        varm,
     # signal processing
         bfft,
@@ Expand Down @@