Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,10 @@ def _hash_pandas_object(
values, encoding=encoding, hash_key=hash_key, categorize=categorize
)

def _cast_pointwise_result(self, values: ArrayLike) -> ArrayLike:
values = np.asarray(values, dtype=object)
return lib.maybe_convert_objects(values, convert_non_numeric=True)

# Signature of "argmin" incompatible with supertype "ExtensionArray"
def argmin(self, axis: AxisInt = 0, skipna: bool = True): # type: ignore[override]
# override base class by adding axis keyword
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@ def _cast_pointwise_result(self, values) -> ArrayLike:
# e.g. test_by_column_values_with_same_starting_value with nested
# values, one entry of which is an ArrowStringArray
# or test_agg_lambda_complex128_dtype_conversion for complex values
return super()._cast_pointwise_result(values)
return values

if pa.types.is_null(arr.type):
if lib.infer_dtype(values) == "decimal":
Expand Down Expand Up @@ -498,7 +498,7 @@ def _cast_pointwise_result(self, values) -> ArrayLike:
if self.dtype.na_value is np.nan:
# ArrowEA has different semantics, so we return numpy-based
# result instead
return super()._cast_pointwise_result(values)
return values
return ArrowExtensionArray(arr)
return self._from_pyarrow_array(arr)

Expand Down
56 changes: 52 additions & 4 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
cast,
overload,
)
import warnings

import numpy as np

Expand All @@ -33,6 +34,7 @@
cache_readonly,
set_module,
)
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import (
validate_bool_kwarg,
validate_insert_loc,
Expand Down Expand Up @@ -86,6 +88,7 @@
AstypeArg,
AxisInt,
Dtype,
DtypeObj,
FillnaOptions,
InterpolateOptions,
NumpySorter,
Expand Down Expand Up @@ -353,6 +356,38 @@ def _from_sequence_of_strings(
"""
raise AbstractMethodError(cls)

@classmethod
def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
"""
Strict analogue to _from_sequence, allowing only sequences of scalars
that should be specifically inferred to the given dtype.

Parameters
----------
scalars : sequence
dtype : ExtensionDtype

Raises
------
TypeError or ValueError

Notes
-----
This is called in a try/except block when casting the result of a
pointwise operation.
"""
try:
return cls._from_sequence(scalars, dtype=dtype, copy=False)
except (ValueError, TypeError):
raise
except Exception:
warnings.warn(
"_from_scalars should only raise ValueError or TypeError. "
"Consider overriding _from_scalars where appropriate.",
stacklevel=find_stack_level(),
)
raise

@classmethod
def _from_factorized(cls, values, original):
"""
Expand Down Expand Up @@ -383,13 +418,26 @@ def _from_factorized(cls, values, original):
"""
raise AbstractMethodError(cls)

def _cast_pointwise_result(self, values) -> ArrayLike:
def _cast_pointwise_result(self, values: ArrayLike) -> ArrayLike:
"""
Construct an ExtensionArray after a pointwise operation.

Cast the result of a pointwise operation (e.g. Series.map) to an
array, preserve dtype_backend if possible.
array. This is not required to return an ExtensionArray of the same
type as self or of the same dtype. It can also return another
ExtensionArray of the same "family" if you implement multiple
ExtensionArrays/Dtypes that are interoperable (e.g. if you have float
array with units, this method can return an int array with units).

If converting to your own ExtensionArray is not possible, this method
can raise an error (TypeError or ValueError) or return the input
`values` as-is. Then pandas will do the further type inference.

"""
values = np.asarray(values, dtype=object)
return lib.maybe_convert_objects(values, convert_non_numeric=True)
try:
return type(self)._from_scalars(values, dtype=self.dtype)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC this change is bc older geopandas implements _from_scalars? can we catch+deprecate this so that we can eventually re-simplify this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, see my note about that in the top post (first bullet point)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right, I'm asking you to catch+deprecate

except (ValueError, TypeError):
return values

# ------------------------------------------------------------------------
# Must be a Sequence
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,7 @@ def _from_factorized(cls, values, original) -> Self:
return cls(values, dtype=original.dtype)

def _cast_pointwise_result(self, values):
result = super()._cast_pointwise_result(values)
result = lib.maybe_convert_objects(values, convert_non_numeric=True)
if result.dtype.kind == self.dtype.kind:
try:
# e.g. test_groupby_agg_extension
Expand Down
36 changes: 36 additions & 0 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,42 @@ def maybe_upcast_numeric_to_64bit(arr: NumpyIndexT) -> NumpyIndexT:
return arr


def cast_pointwise_result(
result: ArrayLike,
original_array: ArrayLike,
) -> ArrayLike:
"""
Try casting result of a pointwise operation back to the original dtype if
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is "original dtype" accurate here or would the "dtype family" phrasing be more accurate?

appropriate.

Parameters
----------
result : array-like
Result to cast.
original_array : array-like
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

isn't this an EA?

Input array from which result was calculated.

Returns
-------
array-like
"""
if isinstance(original_array.dtype, ExtensionDtype):
try:
result = original_array._cast_pointwise_result(result)
except (TypeError, ValueError):
pass

if isinstance(result.dtype, ExtensionDtype):
return result

if not isinstance(result, np.ndarray):
result = np.asarray(result, dtype=object)

if result.dtype != object:
return result
return lib.maybe_convert_objects(result, convert_non_numeric=True)


@overload
def ensure_dtype_can_hold_na(dtype: np.dtype) -> np.dtype: ...

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@
from pandas.core.dtypes.cast import (
LossySetitemError,
can_hold_element,
cast_pointwise_result,
construct_1d_arraylike_from_scalar,
construct_2d_arraylike_from_scalar,
find_common_type,
Expand Down Expand Up @@ -11200,7 +11201,7 @@ def _append_internal(
if isinstance(self.index.dtype, ExtensionDtype):
# GH#41626 retain e.g. CategoricalDtype if reached via
# df.loc[key] = item
row_df.index = self.index.array._cast_pointwise_result(row_df.index._values)
row_df.index = cast_pointwise_result(row_df.index._values, self.index.array)

# infer_objects is needed for
# test_append_empty_frame_to_series_with_dateutil_tz
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from pandas.util._decorators import cache_readonly

from pandas.core.dtypes.cast import (
cast_pointwise_result,
maybe_downcast_to_dtype,
)
from pandas.core.dtypes.common import (
Expand Down Expand Up @@ -963,7 +964,7 @@ def agg_series(
np.ndarray or ExtensionArray
"""
result = self._aggregate_series_pure_python(obj, func)
return obj.array._cast_pointwise_result(result)
return cast_pointwise_result(result, obj.array)

@final
def _aggregate_series_pure_python(
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@
from pandas.core.dtypes.cast import (
LossySetitemError,
can_hold_element,
cast_pointwise_result,
common_dtype_categorical_compat,
find_result_type,
infer_dtype_from,
Expand Down Expand Up @@ -6531,7 +6532,7 @@ def map(self, mapper, na_action: Literal["ignore"] | None = None):
# e.g. if we are floating and new_values is all ints, then we
# don't want to cast back to floating. But if we are UInt64
# and new_values is all ints, we want to try.
new_values = arr._cast_pointwise_result(new_values)
new_values = cast_pointwise_result(new_values, arr)
dtype = new_values.dtype
return Index(new_values, dtype=dtype, copy=False, name=self.name)

Expand Down
3 changes: 2 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
from pandas.core.dtypes.astype import astype_is_view
from pandas.core.dtypes.cast import (
LossySetitemError,
cast_pointwise_result,
construct_1d_arraylike_from_scalar,
find_common_type,
infer_dtype_from,
Expand Down Expand Up @@ -3252,7 +3253,7 @@ def combine(
new_values[:] = [func(lv, other) for lv in self._values]
new_name = self.name

res_values = self.array._cast_pointwise_result(new_values)
res_values = cast_pointwise_result(new_values, self.array)
return self._constructor(
res_values,
dtype=res_values.dtype,
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/extension/decimal/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,12 @@ def _from_factorized(cls, values, original):
return cls(values)

def _cast_pointwise_result(self, values):
result = super()._cast_pointwise_result(values)
try:
# If this were ever made a non-test EA, special-casing could
# be avoided by handling Decimal in maybe_convert_objects
res = type(self)._from_sequence(result, dtype=self.dtype)
res = type(self)._from_sequence(values, dtype=self.dtype)
except (ValueError, TypeError):
return result
return values
return res

_HANDLED_TYPES = (decimal.Decimal, numbers.Number, np.ndarray)
Expand Down
5 changes: 2 additions & 3 deletions pandas/tests/extension/json/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,10 @@ def _from_factorized(cls, values, original):
return cls([UserDict(x) for x in values if x != ()])

def _cast_pointwise_result(self, values):
result = super()._cast_pointwise_result(values)
try:
return type(self)._from_sequence(result, dtype=self.dtype)
return type(self)._from_sequence(values, dtype=self.dtype)
except (ValueError, TypeError):
return result
return values

def __getitem__(self, item):
if isinstance(item, tuple):
Expand Down
Loading