Skip to content

Commit d80f230

Browse files
authored
Convert Pandas DateSeries to native Julia. (#85)
1 parent bd47245 commit d80f230

File tree

4 files changed

+43
-3
lines changed

4 files changed

+43
-3
lines changed

Project.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ version = "1.5.0"
55
[deps]
66
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
77
DataValues = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5"
8+
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
89
IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d"
910
Lazy = "50d2b5c4-7a5e-59d5-8109-a42b560f39c0"
1011
Pkg = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
@@ -22,5 +23,5 @@ Lazy = "0.15"
2223
PyCall = "1.90"
2324
TableTraits = "^0.4, ^1"
2425
TableTraitsUtils = "^0.3, ^0.4, ^1"
25-
julia = "0.7, 1"
2626
Tables = "1"
27+
julia = "0.7, 1"

src/Pandas.jl

+20-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
__precompile__(true)
22
module Pandas
33

4+
using Dates
45
using PyCall
56
using Lazy
67
using Compat
@@ -76,9 +77,27 @@ end
7677

7778
quot(x) = Expr(:quote, x)
7879

80+
81+
function convert_datetime_series_to_julia_vector(series)
82+
N = length(series)
83+
out = Array{Dates.DateTime}(undef, N)
84+
for i in 1:N
85+
# PyCall.jl overloads the getindex method on `series` to automatically convert
86+
# to a Julia date type.
87+
out[i] = series[i]
88+
end
89+
return out
90+
end
91+
92+
7993
function Base.Array(x::PandasWrapped)
94+
if typeof(x) <: Series && x.pyo.dtype == np.dtype("<M8[ns]")
95+
return convert_datetime_series_to_julia_vector(x)
96+
end
8097
c = np.asarray(x.pyo)
81-
if typeof(c).parameters[1] == PyObject
98+
# PyCall will automatically try to convert the result of np.asarray to a native Julia array containing native Julia objects.
99+
# If it can't, it will return a PyObject or a Julia vector of PyObjects.
100+
if typeof(c) == PyObject || typeof(c).parameters[1] == PyObject
82101
out = Array{Any}(undef, size(x))
83102
for idx in eachindex(out)
84103
out[idx] = convert(PyAny, c[idx])

test/Project.toml

+3-1
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,16 @@
22
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
33
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
44
DataValues = "e7dc6d0d-1eca-5fa6-8ad6-5aecde8b7ea5"
5+
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
56
IteratorInterfaceExtensions = "82899510-4779-5014-852e-03e436cf321d"
7+
PyCall = "438e738f-606a-5dbb-bf0a-cddfbfd45ab0"
68
TableTraits = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c"
79
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
810
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
911

1012
[compat]
13+
DataFrames = "1"
1114
DataValues = "0.4.4"
1215
IteratorInterfaceExtensions = "^0.1.1, ^1"
1316
TableTraits = "^0.4, ^1"
1417
Tables = "1"
15-
DataFrames = "1"

test/runtests.jl

+18
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
using Pandas
22
using Test
33
import DataFrames
4+
using PyCall
5+
using Dates
46

57
df = DataFrame(Dict(:name=>["a", "b"], :age=>[27, 30]))
68
age = values(df.age)
@@ -50,3 +52,19 @@ julia_df = DataFrames.DataFrame(x=[1,2], y=[missing, missing])
5052
py_df = Pandas.DataFrame(julia_df)
5153
expected_df = Pandas.DataFrame(:x=>[1,2], :y=>[NaN, NaN])
5254
@test Pandas.equals(py_df, expected_df)
55+
56+
# Issue #68
57+
py"""
58+
import pandas as pd
59+
60+
def get_df():
61+
df = pd.DataFrame({
62+
"a":pd.to_datetime(["2021.01.15","2021.01.15","2020.04.06"])
63+
})
64+
return df
65+
"""
66+
67+
py_df = py"get_df"()|>Pandas.DataFrame
68+
julia_df = DataFrames.DataFrame(py_df)
69+
70+
@test julia_df.a == [DateTime(2021, 1, 15), DateTime(2021, 1, 15), DateTime(2020, 4, 6)]

0 commit comments

Comments
 (0)