Skip to content

Commit 7c574c7

Browse files
committed
Create Index directly from Index(es) where applicable
1 parent 45827f6 commit 7c574c7

File tree

6 files changed

+49
-39
lines changed

6 files changed

+49
-39
lines changed

src/DataFrames.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,10 @@ export @~,
9999
##
100100
##############################################################################
101101

102-
include(joinpath("other", "index.jl"))
103-
include(joinpath("abstractdataframe", "abstractdataframe.jl"))
104102
include(joinpath("other", "utils.jl"))
103+
include(joinpath("other", "index.jl"))
105104

105+
include(joinpath("abstractdataframe", "abstractdataframe.jl"))
106106
include(joinpath("dataframe", "dataframe.jl"))
107107
include(joinpath("subdataframe", "subdataframe.jl"))
108108
include(joinpath("groupeddataframe", "grouping.jl"))

src/abstractdataframe/abstractdataframe.jl

+2-2
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@ Base.ndims(::AbstractDataFrame) = 2
7474
##############################################################################
7575

7676
Base.similar(df::AbstractDataFrame, dims::Int) =
77-
DataFrame([similar(x, dims) for x in columns(df)], _names(df))
77+
DataFrame([similar(x, dims) for x in columns(df)], copy(index(df)))
7878

7979
nas{T}(dv::AbstractArray{T}, dims::Union(Int, (Int...))) = # TODO move to datavector.jl?
8080
DataArray(Array(T, dims), trues(dims))
@@ -83,7 +83,7 @@ nas{T,R}(dv::PooledDataArray{T,R}, dims::Union(Int, (Int...))) =
8383
PooledDataArray(DataArrays.RefArray(zeros(R, dims)), dv.pool)
8484

8585
nas(df::AbstractDataFrame, dims::Int) =
86-
DataFrame(Any[nas(x, dims) for x in columns(df)], _names(df))
86+
DataFrame(Any[nas(x, dims) for x in columns(df)], copy(index(df)))
8787

8888
##############################################################################
8989
##

src/abstractdataframe/join.jl

+1-1
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,6 @@ function crossjoin(df1::AbstractDataFrame, df2::AbstractDataFrame)
182182
r1, r2 = size(df1, 1), size(df2, 1)
183183
cols = [[rep(c, 1, r2) for c in columns(df1)],
184184
[rep(c, r1, 1) for c in columns(df2)]]
185-
colindex = Index([_names(df1), _names(df2)])
185+
colindex = merge(index(df1), index(df2))
186186
DataFrame(cols, colindex)
187187
end

src/dataframe/dataframe.jl

+4-4
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ end
211211
function Base.getindex{T <: ColumnIndex}(df::DataFrame, col_inds::AbstractVector{T})
212212
selected_columns = index(df)[col_inds]
213213
new_columns = df.columns[selected_columns]
214-
return DataFrame(new_columns, Index(index(df).names[selected_columns]))
214+
return DataFrame(new_columns, Index(_names(df)[selected_columns]))
215215
end
216216

217217
# df[SingleRowIndex, SingleColumnIndex] => Scalar
@@ -224,7 +224,7 @@ end
224224
function Base.getindex{T <: ColumnIndex}(df::DataFrame, row_ind::Real, col_inds::AbstractVector{T})
225225
selected_columns = index(df)[col_inds]
226226
new_columns = Any[dv[[row_ind]] for dv in df.columns[selected_columns]]
227-
return DataFrame(new_columns, Index(index(df).names[selected_columns]))
227+
return DataFrame(new_columns, Index(_names(df)[selected_columns]))
228228
end
229229

230230
# df[MultiRowIndex, SingleColumnIndex] => (Sub)?AbstractDataVector
@@ -237,7 +237,7 @@ end
237237
function Base.getindex{R <: Real, T <: ColumnIndex}(df::DataFrame, row_inds::AbstractVector{R}, col_inds::AbstractVector{T})
238238
selected_columns = index(df)[col_inds]
239239
new_columns = Any[dv[row_inds] for dv in df.columns[selected_columns]]
240-
return DataFrame(new_columns, Index(index(df).names[selected_columns]))
240+
return DataFrame(new_columns, Index(_names(df)[selected_columns]))
241241
end
242242

243243
##############################################################################
@@ -646,7 +646,7 @@ end
646646
##############################################################################
647647

648648
function hcat!(df1::DataFrame, df2::AbstractDataFrame)
649-
u = unique_adds(df1, _names(df2))
649+
u = add_names(index(df1), index(df2))
650650
for i in 1:length(u)
651651
df1[u[i]] = df2[i]
652652
end

src/other/index.jl

+40-4
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Base.deepcopy(x::Index) = Index(deepcopy(x.lookup), deepcopy(x.names))
2323
Base.isequal(x::Index, y::Index) = isequal(x.lookup, y.lookup) && isequal(x.names, y.names)
2424
Base.(:(==))(x::Index, y::Index) = isequal(x, y)
2525

26+
# TODO: consider. 'unsafe', as in few other place allow duplicate names to corrupt index
2627
function names!(x::Index, nm::Vector{Symbol})
2728
if length(nm) != length(x)
2829
error("Lengths don't match.")
@@ -63,19 +64,25 @@ Base.haskey(x::Index, key::Real) = 1 <= key <= length(x.names)
6364
Base.keys(x::Index) = names(x)
6465

6566
# TODO: If this should stay 'unsafe', perhaps make unexported
66-
# If changing, make sure union stays fast
6767
function Base.push!(x::Index, nm::Symbol)
6868
x.lookup[nm] = length(x) + 1
6969
push!(x.names, nm)
7070
return x
7171
end
7272

73-
function Base.union(x::Index, nm::Index)
74-
x.lookup[nm] = length(x) + 1
75-
push!(x.names, nm)
73+
function Base.merge!(x::Index, y::Index)
74+
adds = add_names(x, y)
75+
i = length(x)
76+
for add in adds
77+
i += 1
78+
x.lookup[add] = i
79+
end
80+
append!(x.names, adds)
7681
return x
7782
end
7883

84+
Base.merge(x::Index, y::Index) = merge!(copy(x), y)
85+
7986
function Base.delete!(x::Index, idx::Integer)
8087
# reset the lookup's beyond the deleted item
8188
for i in (idx + 1):length(x.names)
@@ -126,3 +133,32 @@ SimpleIndex() = SimpleIndex(0)
126133
Base.length(x::SimpleIndex) = x.length
127134
Base.names(x::SimpleIndex) = nothing
128135
_names(x::SimpleIndex) = nothing
136+
137+
# Helpers
138+
139+
function add_names(ind::Index, add_ind::Index)
140+
u = names(add_ind)
141+
142+
seen = Set(_names(ind))
143+
dups = Int[]
144+
145+
for i in 1:length(u)
146+
name = u[i]
147+
in(name, seen) ? push!(dups, i) : push!(seen, name)
148+
end
149+
for i in dups
150+
nm = u[i]
151+
k = 1
152+
while true
153+
newnm = symbol("$(nm)_$k")
154+
if !in(newnm, seen)
155+
u[i] = newnm
156+
push!(seen, newnm)
157+
break
158+
end
159+
k += 1
160+
end
161+
end
162+
163+
return u
164+
end

src/other/utils.jl

-26
Original file line numberDiff line numberDiff line change
@@ -74,32 +74,6 @@ function make_unique(names::Vector{Symbol})
7474
return names
7575
end
7676

77-
function unique_adds(df::AbstractDataFrame, adds::Vector{Symbol})
78-
seen = Set(_names(df))
79-
dups = Int[]
80-
u = copy(adds)
81-
82-
for i in 1:length(u)
83-
name = u[i]
84-
in(name, seen) ? push!(dups, i) : push!(seen, name)
85-
end
86-
for i in dups
87-
nm = u[i]
88-
k = 1
89-
while true
90-
newnm = symbol("$(nm)_$k")
91-
if !in(newnm, seen)
92-
u[i] = newnm
93-
push!(seen, newnm)
94-
break
95-
end
96-
k += 1
97-
end
98-
end
99-
100-
return u
101-
end
102-
10377
#' @description
10478
#'
10579
#' Generate standardized names for columns of a DataFrame. The

0 commit comments

Comments
 (0)