Skip to content
This repository was archived by the owner on May 4, 2019. It is now read-only.

Rewrite broadcast() and map() based on lift() #166

Merged
merged 19 commits into from
Jan 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
language: julia
julia:
- 0.4
- 0.5
- nightly
script:
Expand Down
4 changes: 2 additions & 2 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
julia 0.4
Compat 0.9.4
julia 0.5
Compat 0.13.0
Reexport
2 changes: 0 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
environment:
matrix:
- JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe"
- JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe"
- JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
Expand Down
24 changes: 12 additions & 12 deletions perf/map.jl
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,33 @@ f(x::Float64) = 5 * x

function profile_map()

println("Method: map!(f, dest, src) (0 missing entries, lift=true)")
println("Method: map!(f, dest, src) (0 missing entries)")
print(" for Array{Float64}: ")
map!(f, C, A);
@time map!(f, C, A);
print(" for NullableArray{Float64}: ")
map!(f, Z, X; lift=true);
@time map!(f, Z, X; lift=true);
map!(f, Z, X);
@time map!(f, Z, X);
println()

println("Method: map!(f, dest, src) (~half missing entries, lift=true)")
println("Method: map!(f, dest, src) (~half missing entries)")
print(" for NullableArray{Float64}: ")
map!(f, Z, Y; lift=true);
@time map!(f, Z, Y; lift=true);
map!(f, Z, Y);
@time map!(f, Z, Y);
println()

println("Method: map(f, src) (0 missing entries, lift=true)")
println("Method: map(f, src) (0 missing entries)")
print(" for Array{Float64}: ")
map(f, A);
@time map(f, A);
print(" for NullableArray{Float64}: ")
map(f, X; lift=true);
@time map(f, X; lift=true);
map(f, X);
@time map(f, X);
println()

println("Method: map(f, src) (~half missing entries, lift=true)")
println("Method: map(f, src) (~half missing entries)")
print(" for NullableArray{Float64}: ")
map(f, Y; lift=true);
@time map(f, Y; lift=true);
map(f, Y);
@time map(f, Y);
println()
end
1 change: 1 addition & 0 deletions src/NullableArrays.jl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ include("typedefs.jl")
include("constructors.jl")
include("primitives.jl")
include("indexing.jl")
include("lift.jl")
include("map.jl")
include("nullablevector.jl")
include("operators.jl")
Expand Down
260 changes: 79 additions & 181 deletions src/broadcast.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using Base: promote_eltype
using Base.Cartesian
using Base: _default_eltype
using Compat

if VERSION >= v"0.6.0-dev.693"
using Base.Broadcast: check_broadcast_indices, broadcast_indices
else
Expand All @@ -8,199 +9,96 @@ else
const broadcast_indices = broadcast_shape
end

if VERSION >= v"0.5.0-dev+5189"
_to_shape(dims::Base.DimsOrInds) = map(_to_shape, dims)
_to_shape(r::Base.OneTo) = Int(last(r))
else
_to_shape(x) = x
end
if VERSION < v"0.6.0-dev" # Old approach needed for inference to work
ftype(f, A) = typeof(f)
ftype(f, A...) = typeof(a -> f(a...))
ftype(T::DataType, A) = Type{T}
ftype(T::DataType, A...) = Type{T}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The changes in JuliaLang/julia#19421 might be relevant here. Best!


if VERSION < v"0.5.0-dev+5434"
function gen_nullcheck(narrays::Int, nd::Int)
e_nullcheck = macroexpand(:( @nref $nd isnull_1 d->j_d_1 ))
for k = 2:narrays
isnull = Symbol("isnull_$k")
j_d_k = Symbol("j_d_$k")
e_isnull_k = macroexpand(:( @nref $nd $(isnull) d->$(j_d_k) ))
e_nullcheck = Expr(:||, e_nullcheck, e_isnull_k)
end
return e_nullcheck
if isdefined(Base, :Iterators)
using Base.Iterators: Zip2
else
using Base: Zip2
end
ziptype(A) = Tuple{eltype(A)}
ziptype(A, B) = Zip2{Tuple{eltype(A)}, Tuple{eltype(B)}}
@inline ziptype(A, B, C, D...) = Zip{Tuple{eltype(A)}, ziptype(B, C, D...)}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Likewise here, the changes in JuliaLang/julia#19421 might be relevant. Best!


function gen_broadcast_body(nd::Int, narrays::Int, f, lift::Bool)
F = Expr(:quote, f)
e_nullcheck = gen_nullcheck(narrays, nd)
if lift
return quote
# set up aliases to facilitate subsequent Base.Cartesian magic
B_isnull = B.isnull
@nexprs $narrays k->(values_k = A_k.values)
@nexprs $narrays k->(isnull_k = A_k.isnull)
# check size
@assert ndims(B) == $nd
@ncall $narrays check_broadcast_shape size(B) k->A_k
# main loops
@nloops($nd, i, B,
d->(@nexprs $narrays k->(j_d_k = size(A_k, d) == 1 ? 1 : i_d)), # pre
begin # body
if $e_nullcheck
@inbounds (@nref $nd B_isnull i) = true
else
@nexprs $narrays k->(@inbounds v_k = @nref $nd values_k d->j_d_k)
@inbounds (@nref $nd B i) = (@ncall $narrays $F v)
end
end
)
end
else
return Base.Broadcast.gen_broadcast_body_cartesian(nd, narrays, f)
end
end

function gen_broadcast_function(nd::Int, narrays::Int, f, lift::Bool)
As = [Symbol("A_"*string(i)) for i = 1:narrays]
body = gen_broadcast_body(nd, narrays, f, lift)
@eval let
local _F_
function _F_(B, $(As...))
$body
end
_F_
end
nullable_broadcast_eltype(f, As...) =
eltype(_default_eltype(Base.Generator{ziptype(As...), ftype(f, As...)}))
Copy link

@pabloferz pabloferz Jan 19, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm curious why is this needed?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is "this"?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry, the eltype you last added.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's useful because NullableArray{T} is parameterized on the eltype of Nullable rather than on the Nullable{T} itself.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But, wouldn't that warrants that you don't get a Nullable{T} here?

Given that eltype(NullableArray([1], [true])) == Int, that implies _default_eltype(+, NullableArray([1], [true]), NullableArray([1], [true])) == Int, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, eltype(NullableArray([1], [true])) == Nullable{Int}.

I don't want T = Nullable{Int} here since it's used to build a new array via NullableArray{T}(...). NullableArray{Nullable{Int}} would have a Nullable{Nullable{Int}} eltype.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The problem with this is that eltype(_default_eltype(*, ["a"], ["b"]) will give Char, so I guess you'd need a custom eltype here.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But that method is only called on NullableArrays, so we're certain the element types are Nullable (in your case, Nullable{String}).

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, now I see, this is nullable_broadcast_eltype as opposed to broadcast_eltype. Sorry for the noise then.

else
Base.@pure nullable_eltypestuple(a) = Tuple{eltype(eltype(a))}
Base.@pure nullable_eltypestuple(T::Type) = Tuple{Type{eltype(T)}}
Base.@pure nullable_eltypestuple(a, b...) =
Tuple{nullable_eltypestuple(a).types..., nullable_eltypestuple(b...).types...}

Base.@pure function nullable_broadcast_eltype(f, As...)
T = Core.Inference.return_type(f, nullable_eltypestuple(As...))
T === Union{} ? Any : T
end
end

function Base.broadcast!(f, X::NullableArray; lift::Bool=false)
broadcast!(f, X, X; lift=lift)
end
invoke_broadcast!{F, N}(f::F, dest, As::Vararg{NullableArray, N}) =
invoke(broadcast!, Tuple{F, AbstractArray, Vararg{AbstractArray, N}}, f, dest, As...)

@eval let cache = Dict{Any, Dict{Bool, Dict{Int, Dict{Int, Any}}}}()
"""
broadcast!(f, B::NullableArray, As::NullableArray...; lift::Bool=false)

This method implements the same behavior as that of `broadcast!` when called on
regular `Array` arguments. It also includes the `lift` keyword argument, which
when set to true will lift `f` over the entries of the `As`.

Lifting is disabled by default. Note that this method's signature specifies
the destination `B` array as well as the source `As` arrays as all
`NullableArray`s. Thus, calling `broadcast!` on a arguments consisting
of both `Array`s and `NullableArray`s will fall back to the implementation
of `broadcast!` in `base/broadcast.jl`.
"""
function Base.broadcast!(f, B::NullableArray, As::NullableArray...; lift::Bool=false)
nd = ndims(B)
narrays = length(As)

cache_f = Base.@get! cache f Dict{Bool, Dict{Int, Dict{Int, Any}}}()
cache_lift = Base.@get! cache_f lift Dict{Int, Dict{Int, Any}}()
cache_f_na = Base.@get! cache_lift narrays Dict{Int, Any}()
func = Base.@get! cache_f_na nd gen_broadcast_function(nd, narrays, f, lift)

func(B, As...)
return B
end
end # let cache
else
using Base.Broadcast: newindexer, map_newindexer, newindex

function _nullcheck(nargs)
nullcheck = :(isnull_1[I_1])
for i in 2:nargs
sym_isnull = Symbol("isnull_$i")
sym_idx = Symbol("I_$i")
nullcheck = Expr(:||, :($sym_isnull[$sym_idx]), nullcheck)
end
# if 0 argument arrays, treat nullcheck as though it returns false
nargs >= 1 ? nullcheck : :(false)
end
"""
broadcast(f, As::NullableArray...)

@generated function Base.Broadcast._broadcast!{K,ID,XT,nargs}(f,
Z::NullableArray, keeps::K, Idefaults::ID, Xs::XT, ::Type{Val{nargs}}; lift=false)
nullcheck = _nullcheck(nargs)
quote
T = eltype(Z)
$(Expr(:meta, :noinline))
# destructure keeps and Xs tuples (common to both lifted and non-lifted broadcast)
@nexprs $nargs i->(keep_i = keeps[i])
@nexprs $nargs i->(Idefault_i = Idefaults[i])
if !lift
# destructure the keeps and As tuples
@nexprs $nargs i->(X_i = Xs[i])
@simd for I in CartesianRange(indices(Z))
# reverse-broadcast the indices
@nexprs $nargs i->(I_i = newindex(I, keep_i, Idefault_i))
# extract array values
@nexprs $nargs i->(@inbounds val_i = X_i[I_i])
# call the function and store the result
@inbounds Z[I] = @ncall $nargs f val
end
else
# destructure the indexmaps and Xs tuples
@nexprs $nargs i->(values_i = Xs[i].values)
@nexprs $nargs i->(isnull_i = Xs[i].isnull)
@simd for I in CartesianRange(indices(Z))
# reverse-broadcast the indices
@nexprs $nargs i->(I_i = newindex(I, keep_i, Idefault_i))
if $nullcheck
# if any args are null, store null
@inbounds Z.isnull[I] = true
else
# extract array values
@nexprs $nargs i->(@inbounds val_i = values_i[I_i])
# call the function and store the result
@inbounds Z[I] = @ncall $nargs f val
end
end
end
end
end
Call `broadcast` with nullable lifting semantics and return a `NullableArray`.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should revise this line in light of the of the behavior defined in line 62.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch. Actually, since in this PR for now lift isn't supposed to be overloaded (so that lifting is always done), the description is correct. I've removed line 62. I hope we can improve this later in another PR.

Lifting means calling function `f` on the the values wrapped inside `Nullable` entries
of the input arrays, and returning null if any entry is missing.

"""
broadcast!(f, B::NullableArray, As::NullableArray...; lift::Bool=false)

This method implements the same behavior as that of `broadcast!` when called
on regular `Array` arguments. It also includes the `lift` keyword argument,
which when set to true will lift `f` over the entries of the `As`.

Lifting is disabled by default. Note that this method's signature specifies
the destination `B` array as well as the source `As` arrays as all
`NullableArray`s. Thus, calling `broadcast!` on a arguments consisting of
both `Array`s and `NullableArray`s will fall back to the implementation of
`broadcast!` in `base/broadcast.jl`.
"""
# Required to solve dispatch ambiguity between
# broadcast!(f, X::AbstractArray, x::Number...)
# broadcast!(f, Z::NullableArrays.NullableArray, Xs::NullableArrays.NullableArray...)
@inline Base.broadcast!(f, Z::NullableArray; lift=false) =
broadcast!(f, Z, Z; lift=lift)

@inline function Base.broadcast!(f, Z::NullableArray, Xs::NullableArray...;
lift=false)
nargs = length(Xs)
shape = indices(Z)
check_broadcast_indices(shape, Xs...)
keeps, Idefaults = map_newindexer(shape, Xs)
Base.Broadcast._broadcast!(f, Z, keeps, Idefaults, Xs, Val{nargs}; lift=lift)
return Z
end
Note that this method's signature specifies the source `As` arrays as all
`NullableArray`s. Thus, calling `broadcast` on arguments consisting
of both `Array`s and `NullableArray`s will fall back to the standard implementation
of `broadcast` (i.e. without lifting).
"""
function Base.broadcast{F}(f::F, As::NullableArray...)
# These definitions are needed to avoid allocation due to splatting
@inline f2(x1) = lift(f, (x1,))
@inline f2(x1, x2) = lift(f, (x1, x2))
@inline f2(x1, x2, x3) = lift(f, (x1, x2, x3))
@inline f2(x1, x2, x3, x4) = lift(f, (x1, x2, x3, x4))
@inline f2(x1, x2, x3, x4, x5) = lift(f, (x1, x2, x3, x4, x5))
@inline f2(x1, x2, x3, x4, x5, x6) = lift(f, (x1, x2, x3, x4, x5, x6))
@inline f2(x1, x2, x3, x4, x5, x6, x7) = lift(f, (x1, x2, x3, x4, x5, x6, x7))
@inline f2(x...) = lift(f, x)

T = nullable_broadcast_eltype(f, As...)
dest = similar(NullableArray{T}, broadcast_indices(As...))
invoke_broadcast!(f2, dest, As...)
end

"""
broadcast(f, As::NullableArray...;lift::Bool=false)
broadcast!(f, dest::NullableArray, As::NullableArray...)

This method implements the same behavior as that of `broadcast` when called on
regular `Array` arguments. It also includes the `lift` keyword argument, which
when set to true will lift `f` over the entries of the `As`.
Call `broadcast!` with nullable lifting semantics.
Lifting means calling function `f` on the the values wrapped inside `Nullable` entries
of the input arrays, and returning null if any entry is missing.

Lifting is disabled by default. Note that this method's signature specifies the
source `As` arrays as all `NullableArray`s. Thus, calling `broadcast!` on
arguments consisting of both `Array`s and `NullableArray`s will fall back to the
implementation of `broadcast` in `base/broadcast.jl`.
Note that this method's signature specifies the destination `dest` array as well as the
source `As` arrays as all `NullableArray`s. Thus, calling `broadcast!` on a arguments
consisting of both `Array`s and `NullableArray`s will fall back to the standard implementation
of `broadcast!` (i.e. without lifting).
"""
@inline function Base.broadcast(f, Xs::NullableArray...;lift::Bool=false)
return broadcast!(f, NullableArray(eltype(promote_eltype(Xs...)),
_to_shape(broadcast_indices(Xs...))),
Xs...; lift=lift)
function Base.broadcast!{F}(f::F, dest::NullableArray, As::NullableArray...)
# These definitions are needed to avoid allocation due to splatting
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Have you verified that allocations are avoided for f2 of more than two arguments? Also, did you try @inline decorations for f2 to avoid the splatting penalty?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are no allocations on Julia master, though beyond 2 arguments performance is lower. I think I've tried everything I could (given my skills at least), without success (except a generated function, but that's probably overkill for one line). Since there are several issues open about varargs being slow, my hope is that it's going to improve at some point.

@inline f2(x1) = lift(f, (x1,))
@inline f2(x1, x2) = lift(f, (x1, x2))
@inline f2(x1, x2, x3) = lift(f, (x1, x2, x3))
@inline f2(x1, x2, x3, x4) = lift(f, (x1, x2, x3, x4))
@inline f2(x1, x2, x3, x4, x5) = lift(f, (x1, x2, x3, x4, x5))
@inline f2(x1, x2, x3, x4, x5, x6) = lift(f, (x1, x2, x3, x4, x5, x6))
@inline f2(x1, x2, x3, x4, x5, x6, x7) = lift(f, (x1, x2, x3, x4, x5, x6, x7))
@inline f2(x...) = lift(f, x)

invoke_broadcast!(f2, dest, As...)
end

# To fix ambiguity
function Base.broadcast!{F}(f::F, dest::NullableArray)
f2() = lift(f)
invoke_broadcast!(f2, dest)
end

# broadcasted ops
Expand Down
37 changes: 37 additions & 0 deletions src/lift.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
eltype_nullable(x::Nullable) = eltype(x)
eltype_nullable(x) = typeof(x)
eltype_nullable{T<:Nullable}(::Type{T}) = eltype(T)
eltype_nullable{T}(::Type{T}) = T

eltypes(x) = Tuple{eltype_nullable(x)}
eltypes(x, xs...) = Tuple{eltype_nullable(x), eltypes(xs...).parameters...}

"""
lift(f, xs...)

Lift function `f`, passing it arguments `xs...`, using standard lifting semantics:
for a function call `f(xs...)`, return null if any `x` in `xs` is null; otherwise,
return `f` applied to values of `xs`.
"""
@inline @generated function lift{F, N, T}(f::F, xs::NTuple{N, T})
args = (:(unsafe_get(xs[$i])) for i in 1:N)
checknull = (:(!isnull(xs[$i])) for i in 1:N)
if null_safe_op(f.instance, map(eltype_nullable, xs.parameters)...)
return quote
val = f($(args...))
nonull = (&)($(checknull...))
@compat Nullable(val, nonull)
end
else
return quote
U = Core.Inference.return_type(f, eltypes(xs...))
if (&)($(checknull...))
return Nullable(f($(args...)))
else
return isleaftype(U) ? Nullable{U}() : Nullable()
end
end
end
end

lift(f) = Nullable(f())
Loading