-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
TYP: remove NaTType as possible result of Timestamp and Timedelta constructor #46171
Changes from all commits
9e57d6d
2abbc57
99158b1
0f4130d
24ecfe2
64f00d5
1ee6e00
5761bf2
84b119f
5232375
64feddd
1cc352a
39b3b98
031397c
773e808
9d22a77
5a7b312
5abe369
d980ebe
e79d253
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,7 +3,10 @@ from datetime import ( | |
timedelta, | ||
tzinfo as _tzinfo, | ||
) | ||
from typing import Any | ||
from typing import ( | ||
Any, | ||
Union, | ||
) | ||
|
||
import numpy as np | ||
|
||
|
@@ -15,7 +18,12 @@ nat_strings: set[str] | |
|
||
def is_null_datetimelike(val: object, inat_is_null: bool = ...) -> bool: ... | ||
|
||
class NaTType(datetime): | ||
_NaTComparisonTypes = Union[datetime, timedelta, Period, np.datetime64, np.timedelta64] | ||
|
||
class _NatComparison: | ||
def __call__(self, other: _NaTComparisonTypes) -> bool: ... | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is needed because when you compare to |
||
|
||
class NaTType: | ||
value: np.int64 | ||
def asm8(self) -> np.datetime64: ... | ||
def to_datetime64(self) -> np.datetime64: ... | ||
|
@@ -54,26 +62,19 @@ class NaTType(datetime): | |
def weekofyear(self) -> float: ... | ||
def day_name(self) -> float: ... | ||
def month_name(self) -> float: ... | ||
# error: Return type "float" of "weekday" incompatible with return | ||
# type "int" in supertype "date" | ||
def weekday(self) -> float: ... # type: ignore[override] | ||
# error: Return type "float" of "isoweekday" incompatible with return | ||
# type "int" in supertype "date" | ||
def isoweekday(self) -> float: ... # type: ignore[override] | ||
def weekday(self) -> float: ... | ||
def isoweekday(self) -> float: ... | ||
def total_seconds(self) -> float: ... | ||
# error: Signature of "today" incompatible with supertype "datetime" | ||
def today(self, *args, **kwargs) -> NaTType: ... # type: ignore[override] | ||
# error: Signature of "today" incompatible with supertype "datetime" | ||
def now(self, *args, **kwargs) -> NaTType: ... # type: ignore[override] | ||
def today(self, *args, **kwargs) -> NaTType: ... | ||
def now(self, *args, **kwargs) -> NaTType: ... | ||
def to_pydatetime(self) -> NaTType: ... | ||
def date(self) -> NaTType: ... | ||
def round(self) -> NaTType: ... | ||
def floor(self) -> NaTType: ... | ||
def ceil(self) -> NaTType: ... | ||
def tz_convert(self) -> NaTType: ... | ||
def tz_localize(self) -> NaTType: ... | ||
# error: Signature of "replace" incompatible with supertype "datetime" | ||
def replace( # type: ignore[override] | ||
def replace( | ||
self, | ||
year: int | None = ..., | ||
month: int | None = ..., | ||
|
@@ -86,38 +87,24 @@ class NaTType(datetime): | |
tzinfo: _tzinfo | None = ..., | ||
fold: int | None = ..., | ||
) -> NaTType: ... | ||
# error: Return type "float" of "year" incompatible with return | ||
# type "int" in supertype "date" | ||
@property | ||
def year(self) -> float: ... # type: ignore[override] | ||
def year(self) -> float: ... | ||
@property | ||
def quarter(self) -> float: ... | ||
# error: Return type "float" of "month" incompatible with return | ||
# type "int" in supertype "date" | ||
@property | ||
def month(self) -> float: ... # type: ignore[override] | ||
# error: Return type "float" of "day" incompatible with return | ||
# type "int" in supertype "date" | ||
def month(self) -> float: ... | ||
@property | ||
def day(self) -> float: ... # type: ignore[override] | ||
# error: Return type "float" of "hour" incompatible with return | ||
# type "int" in supertype "date" | ||
def day(self) -> float: ... | ||
@property | ||
def hour(self) -> float: ... # type: ignore[override] | ||
# error: Return type "float" of "minute" incompatible with return | ||
# type "int" in supertype "date" | ||
def hour(self) -> float: ... | ||
@property | ||
def minute(self) -> float: ... # type: ignore[override] | ||
# error: Return type "float" of "second" incompatible with return | ||
# type "int" in supertype "date" | ||
def minute(self) -> float: ... | ||
@property | ||
def second(self) -> float: ... # type: ignore[override] | ||
def second(self) -> float: ... | ||
@property | ||
def millisecond(self) -> float: ... | ||
# error: Return type "float" of "microsecond" incompatible with return | ||
# type "int" in supertype "date" | ||
@property | ||
def microsecond(self) -> float: ... # type: ignore[override] | ||
def microsecond(self) -> float: ... | ||
@property | ||
def nanosecond(self) -> float: ... | ||
# inject Timedelta properties | ||
|
@@ -132,24 +119,7 @@ class NaTType(datetime): | |
def qyear(self) -> float: ... | ||
def __eq__(self, other: Any) -> bool: ... | ||
def __ne__(self, other: Any) -> bool: ... | ||
# https://github.com/python/mypy/issues/9015 | ||
# error: Argument 1 of "__lt__" is incompatible with supertype "date"; | ||
# supertype defines the argument type as "date" | ||
def __lt__( # type: ignore[override] | ||
self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64 | ||
) -> bool: ... | ||
# error: Argument 1 of "__le__" is incompatible with supertype "date"; | ||
# supertype defines the argument type as "date" | ||
def __le__( # type: ignore[override] | ||
self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64 | ||
) -> bool: ... | ||
# error: Argument 1 of "__gt__" is incompatible with supertype "date"; | ||
# supertype defines the argument type as "date" | ||
def __gt__( # type: ignore[override] | ||
self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64 | ||
) -> bool: ... | ||
# error: Argument 1 of "__ge__" is incompatible with supertype "date"; | ||
# supertype defines the argument type as "date" | ||
def __ge__( # type: ignore[override] | ||
self, other: datetime | timedelta | Period | np.datetime64 | np.timedelta64 | ||
) -> bool: ... | ||
__lt__: _NatComparison | ||
__le__: _NatComparison | ||
__gt__: _NatComparison | ||
__ge__: _NatComparison |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -32,9 +32,7 @@ class Timestamp(datetime): | |
|
||
resolution: ClassVar[Timedelta] | ||
value: int # np.int64 | ||
|
||
# error: "__new__" must return a class instance (got "Union[Timestamp, NaTType]") | ||
def __new__( # type: ignore[misc] | ||
def __new__( | ||
cls: type[_DatetimeT], | ||
ts_input: int | ||
| np.integer | ||
|
@@ -57,7 +55,10 @@ class Timestamp(datetime): | |
tzinfo: _tzinfo | None = ..., | ||
*, | ||
fold: int | None = ..., | ||
) -> _DatetimeT | NaTType: ... | ||
) -> _DatetimeT: ... | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you add a comment (maybe ref this issue) about the reasoning here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done in next commit |
||
# GH 46171 | ||
# While Timestamp can return pd.NaT, having the constructor return | ||
# a Union with NaTType makes things awkward for users of pandas | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would be inclined to return Any in the public api if there are genuine concerns rather that have the incorrect return type. |
||
def _set_freq(self, freq: BaseOffset | None) -> None: ... | ||
@property | ||
def year(self) -> int: ... | ||
|
@@ -145,9 +146,11 @@ class Timestamp(datetime): | |
) -> _DatetimeT: ... | ||
def __radd__(self: _DatetimeT, other: timedelta) -> _DatetimeT: ... | ||
@overload # type: ignore | ||
def __sub__(self, other: datetime) -> timedelta: ... | ||
def __sub__(self, other: datetime) -> Timedelta: ... | ||
@overload | ||
def __sub__(self, other: timedelta | np.timedelta64 | Tick) -> datetime: ... | ||
def __sub__( | ||
self: _DatetimeT, other: timedelta | np.timedelta64 | Tick | ||
) -> _DatetimeT: ... | ||
def __hash__(self) -> int: ... | ||
def weekday(self) -> int: ... | ||
def isoweekday(self) -> int: ... | ||
|
@@ -206,3 +209,5 @@ class Timestamp(datetime): | |
def to_numpy( | ||
self, dtype: np.dtype | None = ..., copy: bool = ... | ||
) -> np.datetime64: ... | ||
@property | ||
def _date_repr(self) -> str: ... |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1767,16 +1767,13 @@ def _format_datetime64_dateonly( | |
nat_rep: str = "NaT", | ||
date_format: str | None = None, | ||
) -> str: | ||
if x is NaT: | ||
if isinstance(x, NaTType): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. -1 on code changes to satisfy the type checker. We know that static type checker have issues with singletons and imo that was adequately noted in the removed comment adjacent to the ignore statement. It may be nbd is some cases, but am always -1 on principle. |
||
return nat_rep | ||
|
||
if date_format: | ||
return x.strftime(date_format) | ||
else: | ||
# error: Item "NaTType" of "Union[NaTType, Any]" has no attribute "_date_repr" | ||
# The underlying problem here is that mypy doesn't understand that NaT | ||
# is a singleton, so that the check above excludes it here. | ||
return x._date_repr # type: ignore[union-attr] | ||
return x._date_repr | ||
|
||
|
||
def get_format_datetime64( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
NaTType
inherits fromdatetime
. Removing the inheritance because it removes a few mypy ignores doesn't sound like a good idea to me. If there is a reason whyNaTType
shouldn't inherit fromdatetime
, the implantation should change.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't think
NaTType
should inherit fromdatetime
, but maybe that's a separate PR? My reasoning why it should get removed is as follows. With the current code, the following happens:So here we are saying that the
Timedelta
constructor can produce an object of typeNaTType
which is also adatetime
, butdatetime
andtimedelta
are different in the python standard library.From a static typing perspective, it is messing up a number of different things, and I don't think we get any benefit from that inheritance with respect to typing.
Furthermore, as best as I can tell, the
NaTType
implementation just overrides the entiredatetime
API, so we are not getting any implementation benefits from explicitly stating the inheritance. But I could be missing something here.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While in the
nattype.pyx
part of the code, we indicate this inheritance, from a typing perspective, it's not really the case because there are methods indatetime
that are not implemented inNaTType
. We even have a test to check that:pandas/tests/scalar/test_nat.py:test_missing_public_nat_methods()
With static typing, the assumption is that a class that is a subclass of the parent has all the methods of the parent. Our implementation of
NaTType
doesn't do that, so we need to tell the type system thatNaTType
is not a subclass ofdatetime
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Wouldn't changing the annotation without changing the implementation mess up static inference on e.g.:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm really looking forward to #24983 (which might take some time).
If Timestamp/Timedelta/NaTType were the last pieces to get pandas to be a py.typed library, I would fully agree to purposefully make the two large changes proposed in this PR (not inherit from
datetime
and simplifying__new__
) - the benefits for users would (from my perspective) outweigh the discrepancy between implementation and typing. With the current state of Pandas it is more difficult to judge: I'm not against this PR but also not in favor of it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It really depends on how you are handling
pd.NaT
in your user code, whether you are talking aboutTimestamp
versusTimedelta
, etc.As pandas users, my staff and I live in the
Timestamp
/Timedelta
world. We usepd.to_datetime()
to convert strings to dates and times and getTimestamp
objects as a result. The relationships of those types todatetime.datetime
anddatetime.timedelta
doesn't matter. So I wouldn't write code like that example. Besides, we have this kind of inconsistency to worry about, independent of typing:Seems kind of odd that you ask for a
Timestamp
, which is a subclass ofdatetime
, and the result isTrue
to ask if it is adatetime
, but isFalse
when asking if it is aTimestamp
So the point is that the implementation has issues - that's maybe what #24983 would take care of, but until then we should type it in a way that makes sense from a user perspective.
For what it's worth, we don't document that
Timestamp
is a subclass ofdatetime.datetime
nor do we even have a documentation page aboutpd.NaT
at all that indicates anything about its type.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a note about it here in the API reference but probably easy to miss: https://pandas.pydata.org/pandas-docs/stable/reference/arrays.html#datetime-data