Skip to content

Update to use NullableArrays and CategoricalArrays #145

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Sep 29, 2016
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
@@ -3,7 +3,6 @@ os:
- linux
- osx
julia:
- 0.4
- 0.5
- nightly
notifications:
11 changes: 6 additions & 5 deletions REQUIRE
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
julia 0.4
DataStructures
DataArrays 0.3.8
DataFrames 0.7.6
julia 0.5
DataStructures 0.4.3
DataFrames 0.8.3
NullableArrays 0.0.9
CategoricalArrays 0.0.6
Compat 0.8
@windows WinReg
@windows WinReg 0.2.0
2 changes: 0 additions & 2 deletions appveyor.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
environment:
matrix:
- JULIAVERSION: "julialang/bin/winnt/x86/0.4/julia-0.4-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.4/julia-0.4-latest-win64.exe"
- JULIAVERSION: "julialang/bin/winnt/x86/0.5/julia-0.5-latest-win32.exe"
- JULIAVERSION: "julialang/bin/winnt/x64/0.5/julia-0.5-latest-win64.exe"
- JULIAVERSION: "julianightlies/bin/winnt/x86/julia-latest-win32.exe"
6 changes: 3 additions & 3 deletions src/RCall.jl
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
__precompile__()
module RCall
using Compat, DataFrames, DataArrays
using Compat, DataFrames, NullableArrays, CategoricalArrays
import Compat.String

import DataStructures: OrderedDict

import Base: eltype, show, convert, isascii,
import Base: eltype, show, convert, isascii, isnull,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You shouldn't need import for this, merely using AFAICT.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it is because RCall also has a isnull method.

length, size, getindex, setindex!, start, next, done, names

export RObject,
Sxp, NilSxp, StrSxp, CharSxp, LglSxp, IntSxp, RealSxp, CplxSxp,
ListSxp, VecSxp, EnvSxp, LangSxp, ClosSxp, S4Sxp,
getattrib, setattrib!, getnames, setnames!, getclass, setclass!, attributes,
globalEnv,
isna, anyna, isnull,
isna, anyna,
rcopy, rparse, rprint, reval, rcall, rlang,
@rimport, @rlibrary, @rput, @rget, @var_str, @R_str

1 change: 0 additions & 1 deletion src/convert-base.jl
Original file line number Diff line number Diff line change
@@ -30,7 +30,6 @@ sexp(::Type{Cint},x) = convert(Cint,x)
sexp(::Type{Float64},x) = convert(Float64,x)
sexp(::Type{Complex128},x) = convert(Complex128,x)


# NilSxp
sexp(::Void) = sexp(Const.NilValue)
rcopy(::Ptr{NilSxp}) = nothing
92 changes: 69 additions & 23 deletions src/convert-data.jl
Original file line number Diff line number Diff line change
@@ -1,34 +1,63 @@
# conversion methods for DataArrays and DataFrames
# conversion methods for NullableArrays, CategoricalArrays and DataFrames

function rcopy{T,S<:VectorSxp}(::Type{DataArray{T}}, s::Ptr{S})
DataArray(rcopy(Array{T},s), isna(s))
function rcopy{T,S<:Sxp}(::Type{Nullable{T}}, s::Ptr{S})
length(s) == 1 || error("length of $s must be 1.")
rcopy(NullableArray{T}, s)[1]
end
function rcopy{S<:VectorSxp}(::Type{DataArray}, s::Ptr{S})
DataArray(rcopy(Array,s), isna(s))

function rcopy{S<:VectorSxp}(::Type{Nullable}, s::Ptr{S})
rcopy(Nullable{eltype(S)}, s)
end

function rcopy{S<:StrSxp}(::Type{Nullable}, s::Ptr{S})
rcopy(Nullable{Compat.String}, s)
end

function rcopy(::Type{DataArray}, s::Ptr{IntSxp})
function rcopy{T,S<:VectorSxp}(::Type{NullableArray{T}}, s::Ptr{S})
NullableArray(rcopy(Array{T},s), isna(s))
end
function rcopy{S<:VectorSxp}(::Type{NullableArray}, s::Ptr{S})
NullableArray(rcopy(Array,s), isna(s))
end

function rcopy(::Type{NullableArray}, s::Ptr{IntSxp})
isFactor(s) && error("$s is a R factor")
DataArray(rcopy(Array,s), isna(s))
NullableArray(rcopy(Array,s), isna(s))
end
function rcopy(::Type{PooledDataArray}, s::Ptr{IntSxp})
function rcopy(::Type{CategoricalArray}, s::Ptr{IntSxp})
isFactor(s) || error("$s is not a R factor")
refs = DataArrays.RefArray([isna(x) ? zero(Int32) : x for x in s])
compact(PooledDataArray(refs,rcopy(getattrib(s,Const.LevelsSymbol))))
refs = UInt32[x for x in s]
levels = rcopy(getattrib(s,Const.LevelsSymbol))
pool = CategoricalPool(levels, isOrdered(s))
CategoricalArray(refs, pool)
end
function rcopy(::Type{NullableCategoricalArray}, s::Ptr{IntSxp})
isFactor(s) || error("$s is not a R factor")
refs = UInt32[isna(x) ? zero(UInt32) : UInt32(x) for x in s]
levels = rcopy(getattrib(s,Const.LevelsSymbol))
pool = CategoricalPool(levels, isOrdered(s))
NullableCategoricalArray(refs, pool)
end

function rcopy(::Type{DataFrame}, s::Ptr{VecSxp})
isFrame(s) || error("s is not a R data frame")
DataFrame(Any[isFactor(c)? rcopy(PooledDataArray, c) : rcopy(DataArray, c) for c in s],
rcopy(Array{Symbol},getnames(s)))
DataFrame(Any[rcopy(c) for c in s], rcopy(Array{Symbol},getnames(s)))
end


## DataArray to sexp conversion.
function sexp(v::DataArray)
rv = protect(sexp(v.data))
# Nullable to sexp conversion.
function sexp{T}(x::Nullable{T})
if isnull(x)
return sexp(natype(T))
else
return sexp(x.value)
end
end

## NullableArray to sexp conversion.
function sexp(v::NullableArray)
rv = protect(sexp(v.values))
try
for (i,isna) = enumerate(v.na)
for (i,isna) = enumerate(isnull(v))
if isna
rv[i] = naeltype(eltype(rv))
end
@@ -39,12 +68,29 @@ function sexp(v::DataArray)
rv
end

## PooledDataArray to sexp conversion.
function sexp{T<:Compat.String,R<:Integer}(v::PooledDataArray{T,R})
rv = sexp(v.refs)
setattrib!(rv, Const.LevelsSymbol, sexp(v.pool))
setattrib!(rv, Const.ClassSymbol, sexp("factor"))
rv
## CategoricalArray to sexp conversion.
for typ in [:NullableCategoricalArray, :CategoricalArray]
@eval begin
function sexp{T<:Compat.String,N,R<:Integer}(v::$typ{T,N,R})
rv = protect(sexp(v.refs))
try
for (i,ref) = enumerate(v.refs)
if ref == 0
rv[i] = naeltype(eltype(rv))
end
end
# due to a bug of CategoricalArrays, we use index(v.pool) instead of index(v)
setattrib!(rv, Const.LevelsSymbol, sexp(CategoricalArrays.index(v.pool)))
setattrib!(rv, Const.ClassSymbol, sexp("factor"))
if CategoricalArrays.ordered(v)
rv = rcall(:ordered, rv, CategoricalArrays.levels(v))
end
finally
unprotect(1)
end
rv
end
end
end

## DataFrame to sexp conversion.
16 changes: 10 additions & 6 deletions src/convert-default.jl
Original file line number Diff line number Diff line change
@@ -8,7 +8,7 @@ rcopy(s::CharSxpPtr) = rcopy(Compat.String,s)

function rcopy(s::StrSxpPtr)
if anyna(s)
rcopy(DataArray,s)
rcopy(NullableArray,s)
elseif length(s) == 1
rcopy(Compat.String,s)
else
@@ -17,7 +17,7 @@ function rcopy(s::StrSxpPtr)
end
function rcopy(s::RealSxpPtr)
if anyna(s)
rcopy(DataArray{Float64},s)
rcopy(NullableArray{Float64},s)
elseif length(s) == 1
rcopy(Float64,s)
else
@@ -26,7 +26,7 @@ function rcopy(s::RealSxpPtr)
end
function rcopy(s::CplxSxpPtr)
if anyna(s)
rcopy(DataArray{Complex128},s)
rcopy(NullableArray{Complex128},s)
elseif length(s) == 1
rcopy(Complex128,s)
else
@@ -35,7 +35,7 @@ function rcopy(s::CplxSxpPtr)
end
function rcopy(s::LglSxpPtr)
if anyna(s)
rcopy(DataArray{Bool},s)
rcopy(NullableArray{Bool},s)
elseif length(s) == 1
rcopy(Bool,s)
else
@@ -44,9 +44,13 @@ function rcopy(s::LglSxpPtr)
end
function rcopy(s::IntSxpPtr)
if isFactor(s)
rcopy(PooledDataArray,s)
if anyna(s)
rcopy(NullableCategoricalArray,s)
else
rcopy(CategoricalArray,s)
end
elseif anyna(s)
rcopy(DataArray{Int},s)
rcopy(NullableArray{Int},s)
elseif length(s) == 1
rcopy(Cint,s)
else
10 changes: 8 additions & 2 deletions src/methods.jl
Original file line number Diff line number Diff line change
@@ -140,7 +140,9 @@ start{S<:VectorSxp}(s::Ptr{S}) = 0
next{S<:VectorSxp}(s::Ptr{S},state) = (state += 1;(s[state],state))
done{S<:VectorSxp}(s::Ptr{S},state) = state ≥ length(s)


start{S<:VectorSxp}(s::RObject{S}) = start(s.p)
next{S<:VectorSxp}(s::RObject{S},state) = next(s.p, state)
done{S<:VectorSxp}(s::RObject{S},state) = done(s.p, state)

# PairListSxps

@@ -298,6 +300,10 @@ naeltype(::Type{CplxSxp}) = complex(Const.NaReal,Const.NaReal)
naeltype(::Type{StrSxp}) = sexp(Const.NaString)
naeltype(::Type{VecSxp}) = sexp(LglSxp,Const.NaInt) # used for setting

natype{S<:Integer}(::Type{S}) = Const.NaInt
natype{S<:Real}(::Type{S}) = Const.NaReal
natype(::Type{Complex}) = complex(Const.NaReal,Const.NaReal)
natype{S<:Compat.String}(::Type{S}) = sexp(Const.NaString)

"""
Check if values correspond to R's sentinel NA values.
@@ -310,7 +316,7 @@ isna(s::CharSxpPtr) = s === sexp(Const.NaString)

# this doesn't allow us to check VecSxp s
function isna{S<:VectorSxp}(s::Ptr{S})
b = BitArray(size(s)...)
b = Array{Bool}(size(s)...)
for (i,e) in enumerate(s)
b[i] = isna(e)
end
61 changes: 41 additions & 20 deletions test/dataframe.jl
Original file line number Diff line number Diff line change
@@ -1,31 +1,52 @@
using DataArrays,DataFrames
using NullableArrays,CategoricalArrays,DataFrames

v110 = rcopy(DataArray,reval("x <- 1:10"))
@test isa(v110,DataVector)
@test eltype(v110) == Cint
@test isequal(rcopy(Nullable, RObject(1)), Nullable(1))
@test isequal(rcopy(Nullable, RObject("abc")), Nullable("abc"))
@test rcopy(RObject(Nullable(1))) == 1
@test isnull(rcopy(Nullable, RObject(Nullable(1, true))))

v110 = rcopy(NullableArray,reval("c(1L, NA)"))
@test isa(v110,NullableVector)
@test eltype(v110) == Nullable{Int32}
@test isnull(rcopy(NullableArray, RObject(v110[2]))[1])

attenu = rcopy(DataFrame,:attenu)
@test isa(attenu,DataFrame)
@test size(attenu) == (182,5)

dist = attenu[:dist]
@test isa(dist,DataArray{Float64})
@test isa(dist,Vector{Float64})
station = attenu[:station]
@test isa(station,NullableCategoricalArray)

@test rcopy(DataArray,"c(NA,TRUE)").na == @data([NA,true]).na
@test rcopy(DataArray,"c(NA,1)").na == @data([NA,1.0]).na
@test rcopy(DataArray,"c(NA,1+0i)").na == @data([NA,1.0+0.0*im]).na
@test rcopy(DataArray,"c(NA,1L)").na == @data([NA,one(Int32)]).na
@test rcopy(DataArray,"c(NA,'NA')").na == @data([NA,"NA"]).na
@test_throws ErrorException rcopy(DataArray,"as.factor(c('a','a','c'))")
@test rcopy(PooledDataArray,"as.factor(c('a','a','c'))").pool == ["a","c"]
@test isequal(rcopy(NullableArray,"c(NA,TRUE)"), NullableArray([true,true], [true,false]))
@test isequal(rcopy(NullableArray,"c(NA,1)"), NullableArray([true,1.], [true,false]))
@test isequal(rcopy(NullableArray,"c(NA,1+0i)"), NullableArray([true,1.+0*im], [true,false]))
@test isequal(rcopy(NullableArray,"c(NA,1L)"), NullableArray([true,one(Int32)], [true,false]))
@test isequal(rcopy(NullableArray,"c(NA,'NA')"), NullableArray(["", "NA"], [true,false]))
@test_throws ErrorException rcopy(NullableArray,"as.factor(c('a','a','c'))")
@test CategoricalArrays.levels(rcopy(CategoricalArray,"factor(c('a','a','c'))")) == ["a","c"]
@test CategoricalArrays.levels(rcopy(NullableCategoricalArray,"factor(c('a',NA,'c'))")) == ["a","c"]
@test CategoricalArrays.ordered(rcopy(CategoricalArray,"ordered(c('a','a','c'))"))
@test CategoricalArrays.ordered(rcopy(NullableCategoricalArray,"ordered(c('a',NA,'c'))"))

@test rcopy(DataArray,RObject(@data([NA,true]))).na == @data([NA,true]).na
@test rcopy(DataArray,RObject(@data([NA,1]))).na == @data([NA,1]).na
@test rcopy(DataArray,RObject(@data([NA,1.]))).na == @data([NA,1.]).na
@test rcopy(DataArray,RObject(@data([NA,1.+0*im]))).na == @data([NA,1.+0*im]).na
@test rcopy(DataArray,RObject(@data([NA,NA,"a","b"]))).na == @data([NA,NA,"a","b"]).na
pda = PooledDataArray(repeat(["a", "b"], inner = [5]))
@test rcopy(PooledDataArray,RObject(pda)).refs == repeat([1,2], inner = [5])
v = NullableArray([true,true], [true,false])
@test isequal(rcopy(NullableArray,RObject(v)), v)
v = NullableArray([1,2], [true,false])
@test isequal(rcopy(NullableArray,RObject(v)), v)
v = NullableArray([1.,2.], [true,false])
@test isequal(rcopy(NullableArray,RObject(v)), v)
v = NullableArray([0,1.+0*im], [true,false])
@test isequal(rcopy(NullableArray,RObject(v)), v)
v = NullableArray(["","abc"], [true,false])
@test isequal(rcopy(NullableArray,RObject(v)), v)
v = CategoricalArray(repeat(["a", "b"], inner = 5))
@test isequal(rcopy(CategoricalArray,RObject(v)), v)
v = NullableCategoricalArray(repeat(["a", "b"], inner = 5), repeat([true, false], outer = 5))
@test isequal(rcopy(NullableCategoricalArray,RObject(v)), v)
v = CategoricalArray(repeat(["a", "b"], inner = 5), ordered=true)
@test isequal(rcopy(CategoricalArray,RObject(v)), v)
v = NullableCategoricalArray(repeat(["a", "b"], inner = 5), repeat([true, false], outer = 5), ordered=true)
@test isequal(rcopy(NullableCategoricalArray,RObject(v)), v)

@test rcopy(rcall(:dim,RObject(attenu))) == [182,5]