Skip to content

Commit d33608c

Browse files
committed
Pairwise BLAS-based sumabs and sumabs2
1 parent 78898f7 commit d33608c

File tree

2 files changed

+11
-15
lines changed

2 files changed

+11
-15
lines changed

base/linalg/dense.jl

+9-13
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,18 @@ isposdef{T}(A::AbstractMatrix{T}, UL::Symbol) = (S = typeof(sqrt(one(T))); ispos
2727
isposdef{T}(A::AbstractMatrix{T}) = (S = typeof(sqrt(one(T))); isposdef!(S == T ? copy(A) : convert(AbstractMatrix{S}, A)))
2828
isposdef(x::Number) = imag(x)==0 && real(x) > 0
2929

30-
31-
Base.sumabs{T<:BlasFloat}(x::Union(Array{T},StridedVector{T})) =
32-
length(x) > 32 ? BLAS.asum(x) : Base._sumabs(x)
33-
3430
stride1(x::Array) = 1
3531
stride1(x::StridedVector) = stride(x, 1)::Int
3632

37-
function Base.sumabs2{T<:BlasFloat}(x::Union(Array{T},StridedVector{T}))
38-
n = length(x)
39-
if n < DOT_CUTOFF
40-
return Base._sumabs2(x)
41-
else
42-
px = pointer(x)
43-
incx = stride1(x)
44-
return BLAS.dot(n, px, incx, px, incx)
45-
end
33+
Base.sum_seq{T<:BlasFloat}(::Base.AbsFun, a::Array{T}, ifirst::Int, ilast::Int) =
34+
BLAS.asum(ilast-ifirst+1, pointer(a, ifirst), stride1(a))
35+
36+
# This appears to show a benefit from a larger block size
37+
Base.sum_pairwise_blocksize(::Base.Abs2Fun) = 4096
38+
function Base.sum_seq{T<:BlasFloat}(::Base.Abs2Fun, a::Array{T}, ifirst::Int, ilast::Int)
39+
px = pointer(a, ifirst)
40+
incx = stride1(a)
41+
BLAS.dot(ilast-ifirst+1, px, incx, px, incx)
4642
end
4743

4844
function norm{T<:BlasFloat, TI<:Integer}(x::StridedVector{T}, rx::Union(UnitRange{TI},Range{TI}))

base/reduce.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -250,7 +250,7 @@ sumabs(itr) = sum(AbsFun(), itr)
250250
sumabs2(itr) = sum(Abs2Fun(), itr)
251251

252252
# Note: sum_seq uses four accumulators, so each accumulator gets at most 256 numbers
253-
const PAIRWISE_SUM_BLOCKSIZE = 1024
253+
sum_pairwise_blocksize(f) = 1024
254254

255255
# a fast implementation of sum in sequential order (from left to right).
256256
# to allow type-stable loops, requires length > 1
@@ -311,7 +311,7 @@ end
311311
function sum_pairwise(f, a::AbstractArray, ifirst::Int, ilast::Int)
312312
# bsiz: maximum block size
313313

314-
if ifirst + PAIRWISE_SUM_BLOCKSIZE >= ilast
314+
if ifirst + sum_pairwise_blocksize(f) >= ilast
315315
sum_seq(f, a, ifirst, ilast)
316316
else
317317
imid = (ifirst + ilast) >>> 1

0 commit comments

Comments
 (0)