Skip to content

Commit 5455d2b

Browse files
committed
Standardize input validation error type on pandas
1 parent 42a2ddc commit 5455d2b

21 files changed

+107
-107
lines changed

python/pyspark/pandas/base.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1498,7 +1498,7 @@ def shift(self, periods=1, fill_value=None) -> Union["Series", "Index"]:
14981498

14991499
def _shift(self, periods, fill_value, *, part_cols=()):
15001500
if not isinstance(periods, int):
1501-
raise ValueError("periods should be an int; however, got [%s]" % type(periods).__name__)
1501+
raise TypeError("periods should be an int; however, got [%s]" % type(periods).__name__)
15021502

15031503
col = self.spark.column
15041504
window = (
@@ -1828,7 +1828,7 @@ def take(self, indices) -> Union["Series", "Index"]:
18281828
)
18291829
"""
18301830
if not is_list_like(indices) or isinstance(indices, (dict, set)):
1831-
raise ValueError("`indices` must be a list-like except dict or set")
1831+
raise TypeError("`indices` must be a list-like except dict or set")
18321832
if isinstance(self, ps.Series):
18331833
return cast(ps.Series, self.iloc[indices])
18341834
else:

python/pyspark/pandas/config.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class Option:
7070
>>> option.validate('abc') # doctest: +NORMALIZE_WHITESPACE
7171
Traceback (most recent call last):
7272
...
73-
ValueError: The value for option 'option.name' was <class 'str'>;
73+
TypeError: The value for option 'option.name' was <class 'str'>;
7474
however, expected types are [(<class 'float'>, <class 'int'>)].
7575
7676
>>> option.validate(-1.1)
@@ -101,7 +101,7 @@ def validate(self, v: Any) -> None:
101101
Validate the given value and throw an exception with related information such as key.
102102
"""
103103
if not isinstance(v, self.types):
104-
raise ValueError(
104+
raise TypeError(
105105
"The value for option '%s' was %s; however, expected types are "
106106
"[%s]." % (self.key, type(v), str(self.types))
107107
)

python/pyspark/pandas/frame.py

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -771,7 +771,7 @@ def _map_series_op(self, op, other):
771771
if not isinstance(other, DataFrame) and (
772772
isinstance(other, IndexOpsMixin) or is_sequence(other)
773773
):
774-
raise ValueError(
774+
raise TypeError(
775775
"%s with a sequence is currently not supported; "
776776
"however, got %s." % (op, type(other).__name__)
777777
)
@@ -2936,7 +2936,7 @@ class locomotion
29362936
from pyspark.pandas.series import first_series
29372937

29382938
if not is_name_like_value(key):
2939-
raise ValueError("'key' should be a scalar value or tuple that contains scalar values")
2939+
raise TypeError("'key' should be a scalar value or tuple that contains scalar values")
29402940

29412941
if level is not None and is_name_like_tuple(key):
29422942
raise KeyError(key)
@@ -3301,7 +3301,7 @@ def where(self, cond, other=np.nan) -> "DataFrame":
33013301
]
33023302
kdf[tmp_cond_col_names] = cond
33033303
else:
3304-
raise ValueError("type of cond must be a DataFrame or Series")
3304+
raise TypeError("type of cond must be a DataFrame or Series")
33053305

33063306
tmp_other_col_names = [
33073307
tmp_other_col_name(name_like_string(label)) for label in self._internal.column_labels
@@ -3431,7 +3431,7 @@ def mask(self, cond, other=np.nan) -> "DataFrame":
34313431
from pyspark.pandas.series import Series
34323432

34333433
if not isinstance(cond, (DataFrame, Series)):
3434-
raise ValueError("type of cond must be a DataFrame or Series")
3434+
raise TypeError("type of cond must be a DataFrame or Series")
34353435

34363436
cond_inversed = cond._apply_series_op(lambda kser: ~kser)
34373437
return self.where(cond_inversed, other)
@@ -3997,7 +3997,7 @@ def insert(
39973997
assert allow_duplicates is False
39983998

39993999
if not is_name_like_value(column):
4000-
raise ValueError(
4000+
raise TypeError(
40014001
'"column" should be a scalar value or tuple that contains scalar values'
40024002
)
40034003

@@ -4289,7 +4289,7 @@ def round(self, decimals=0) -> "DataFrame":
42894289
elif isinstance(decimals, int):
42904290
decimals = {k: decimals for k in self._internal.column_labels}
42914291
else:
4292-
raise ValueError("decimals must be an integer, a dict-like or a Series")
4292+
raise TypeError("decimals must be an integer, a dict-like or a Series")
42934293

42944294
def op(kser):
42954295
label = kser._column_label
@@ -5660,7 +5660,7 @@ def clip(self, lower: Union[float, int] = None, upper: Union[float, int] = None)
56605660
will output the original DataFrame, simply ignoring the incompatible types.
56615661
"""
56625662
if is_list_like(lower) or is_list_like(upper):
5663-
raise ValueError(
5663+
raise TypeError(
56645664
"List-like value are not supported for 'lower' and 'upper' at the " + "moment"
56655665
)
56665666

@@ -5941,20 +5941,20 @@ def pivot_table(
59415941
small 5.5 2.333333 17 13
59425942
"""
59435943
if not is_name_like_value(columns):
5944-
raise ValueError("columns should be one column name.")
5944+
raise TypeError("columns should be one column name.")
59455945

59465946
if not is_name_like_value(values) and not (
59475947
isinstance(values, list) and all(is_name_like_value(v) for v in values)
59485948
):
5949-
raise ValueError("values should be one column or list of columns.")
5949+
raise TypeError("values should be one column or list of columns.")
59505950

59515951
if not isinstance(aggfunc, str) and (
59525952
not isinstance(aggfunc, dict)
59535953
or not all(
59545954
is_name_like_value(key) and isinstance(value, str) for key, value in aggfunc.items()
59555955
)
59565956
):
5957-
raise ValueError(
5957+
raise TypeError(
59585958
"aggfunc must be a dict mapping from column name "
59595959
"to aggregate functions (string)."
59605960
)
@@ -6031,7 +6031,7 @@ def pivot_table(
60316031
.agg(*agg_cols)
60326032
)
60336033
else:
6034-
raise ValueError("index should be a None or a list of columns.")
6034+
raise TypeError("index should be a None or a list of columns.")
60356035

60366036
if fill_value is not None and isinstance(fill_value, (int, float)):
60376037
sdf = sdf.fillna(fill_value)
@@ -7940,7 +7940,7 @@ def append(
79407940
3 3 4
79417941
"""
79427942
if isinstance(other, ps.Series):
7943-
raise ValueError("DataFrames.append() does not support appending Series to DataFrames")
7943+
raise TypeError("DataFrames.append() does not support appending Series to DataFrames")
79447944
if sort:
79457945
raise NotImplementedError("The 'sort' parameter is currently not supported")
79467946

@@ -10726,7 +10726,7 @@ def quantile(
1072610726
raise NotImplementedError('axis should be either 0 or "index" currently.')
1072710727

1072810728
if not isinstance(accuracy, int):
10729-
raise ValueError(
10729+
raise TypeError(
1073010730
"accuracy must be an integer; however, got [%s]" % type(accuracy).__name__
1073110731
)
1073210732

@@ -10735,7 +10735,7 @@ def quantile(
1073510735

1073610736
for v in q if isinstance(q, list) else [q]:
1073710737
if not isinstance(v, float):
10738-
raise ValueError(
10738+
raise TypeError(
1073910739
"q must be a float or an array of floats; however, [%s] found." % type(v)
1074010740
)
1074110741
if v < 0.0 or v > 1.0:
@@ -10904,9 +10904,9 @@ def query(self, expr, inplace=False) -> Optional["DataFrame"]:
1090410904
0 1 10 10
1090510905
"""
1090610906
if isinstance(self.columns, pd.MultiIndex):
10907-
raise ValueError("Doesn't support for MultiIndex columns")
10907+
raise TypeError("Doesn't support for MultiIndex columns")
1090810908
if not isinstance(expr, str):
10909-
raise ValueError(
10909+
raise TypeError(
1091010910
"expr must be a string to be evaluated, {} given".format(type(expr).__name__)
1091110911
)
1091210912
inplace = validate_bool_kwarg(inplace, "inplace")
@@ -11012,7 +11012,7 @@ class max_speed
1101211012
"""
1101311013
axis = validate_axis(axis)
1101411014
if not is_list_like(indices) or isinstance(indices, (dict, set)):
11015-
raise ValueError("`indices` must be a list-like except dict or set")
11015+
raise TypeError("`indices` must be a list-like except dict or set")
1101611016
if axis == 0:
1101711017
return cast(DataFrame, self.iloc[indices, :])
1101811018
else:
@@ -11098,7 +11098,7 @@ def eval(self, expr, inplace=False) -> Optional[Union["DataFrame", "Series"]]:
1109811098
from pyspark.pandas.series import first_series
1109911099

1110011100
if isinstance(self.columns, pd.MultiIndex):
11101-
raise ValueError("`eval` is not supported for multi-index columns")
11101+
raise TypeError("`eval` is not supported for multi-index columns")
1110211102
inplace = validate_bool_kwarg(inplace, "inplace")
1110311103
should_return_series = False
1110411104
series_name = None
@@ -11179,7 +11179,7 @@ def explode(self, column) -> "DataFrame":
1117911179
from pyspark.pandas.series import Series
1118011180

1118111181
if not is_name_like_value(column):
11182-
raise ValueError("column must be a scalar")
11182+
raise TypeError("column must be a scalar")
1118311183

1118411184
kdf = DataFrame(self._internal.resolved_copy) # type: "DataFrame"
1118511185
kser = kdf[column]

python/pyspark/pandas/generic.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1895,7 +1895,7 @@ def median(
18951895
numeric_only = True
18961896

18971897
if not isinstance(accuracy, int):
1898-
raise ValueError(
1898+
raise TypeError(
18991899
"accuracy must be an integer; however, got [%s]" % type(accuracy).__name__
19001900
)
19011901

python/pyspark/pandas/groupby.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2416,7 +2416,7 @@ def median(self, numeric_only=True, accuracy=10000) -> Union[DataFrame, Series]:
24162416
Name: b, dtype: float64
24172417
"""
24182418
if not isinstance(accuracy, int):
2419-
raise ValueError(
2419+
raise TypeError(
24202420
"accuracy must be an integer; however, got [%s]" % type(accuracy).__name__
24212421
)
24222422

python/pyspark/pandas/indexes/base.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2076,7 +2076,7 @@ def repeat(self, repeats: int) -> "Index":
20762076
MultiIndex([], )
20772077
"""
20782078
if not isinstance(repeats, int):
2079-
raise ValueError(
2079+
raise TypeError(
20802080
"`repeats` argument must be integer, but got {}".format(type(repeats).__name__)
20812081
)
20822082
elif repeats < 0:

python/pyspark/pandas/indexes/multi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -342,7 +342,7 @@ def from_frame(df, names=None) -> "MultiIndex":
342342
if names is None:
343343
names = df._internal.column_labels
344344
elif not is_list_like(names):
345-
raise ValueError("Names should be list-like for a MultiIndex")
345+
raise TypeError("Names should be list-like for a MultiIndex")
346346
else:
347347
names = [name if is_name_like_tuple(name) else (name,) for name in names]
348348

python/pyspark/pandas/namespace.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def from_pandas(pobj: Union[pd.DataFrame, pd.Series, pd.Index]) -> Union[Series,
126126
elif isinstance(pobj, pd.Index):
127127
return DataFrame(pd.DataFrame(index=pobj)).index
128128
else:
129-
raise ValueError("Unknown data type: {}".format(type(pobj).__name__))
129+
raise TypeError("Unknown data type: {}".format(type(pobj).__name__))
130130

131131

132132
_range = range # built-in range
@@ -2770,7 +2770,7 @@ def broadcast(obj) -> DataFrame:
27702770
...
27712771
"""
27722772
if not isinstance(obj, DataFrame):
2773-
raise ValueError("Invalid type : expected DataFrame got {}".format(type(obj).__name__))
2773+
raise TypeError("Invalid type : expected DataFrame got {}".format(type(obj).__name__))
27742774
return DataFrame(
27752775
obj._internal.with_new_sdf(F.broadcast(obj._internal.resolved_copy.spark_frame))
27762776
)

python/pyspark/pandas/plot/core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ def get_top_n(self, data):
4040
if isinstance(data, (Series, DataFrame)):
4141
data = data.head(max_rows + 1).to_pandas()
4242
else:
43-
raise ValueError("Only DataFrame and Series are supported for plotting.")
43+
raise TypeError("Only DataFrame and Series are supported for plotting.")
4444

4545
self.partial = False
4646
if len(data) > max_rows:
@@ -80,7 +80,7 @@ def get_sampled(self, data):
8080
sampled = data._internal.resolved_copy.spark_frame.sample(fraction=self.fraction)
8181
return DataFrame(data._internal.with_new_sdf(sampled)).to_pandas()
8282
else:
83-
raise ValueError("Only DataFrame and Series are supported for plotting.")
83+
raise TypeError("Only DataFrame and Series are supported for plotting.")
8484

8585
def set_result_text(self, ax):
8686
assert hasattr(self, "fraction")

python/pyspark/pandas/series.py

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2016,7 +2016,7 @@ def clip(self, lower: Union[float, int] = None, upper: Union[float, int] = None)
20162016
original Series, simply ignoring the incompatible types.
20172017
"""
20182018
if is_list_like(lower) or is_list_like(upper):
2019-
raise ValueError(
2019+
raise TypeError(
20202020
"List-like value are not supported for 'lower' and 'upper' at the " + "moment"
20212021
)
20222022

@@ -3182,7 +3182,7 @@ def aggregate(self, func: Union[str, List[str]]) -> Union[Scalar, "Series"]:
31823182
elif isinstance(func, str):
31833183
return getattr(self, func)()
31843184
else:
3185-
raise ValueError("func must be a string or list of strings")
3185+
raise TypeError("func must be a string or list of strings")
31863186

31873187
agg = aggregate
31883188

@@ -3345,7 +3345,7 @@ def round(self, decimals=0) -> "Series":
33453345
Name: x, dtype: float64
33463346
"""
33473347
if not isinstance(decimals, int):
3348-
raise ValueError("decimals must be an integer")
3348+
raise TypeError("decimals must be an integer")
33493349
scol = F.round(self.spark.column, decimals)
33503350
return self._with_new_scol(scol)
33513351

@@ -3402,12 +3402,12 @@ def quantile(
34023402
).rename(self.name)
34033403
else:
34043404
if not isinstance(accuracy, int):
3405-
raise ValueError(
3405+
raise TypeError(
34063406
"accuracy must be an integer; however, got [%s]" % type(accuracy).__name__
34073407
)
34083408

34093409
if not isinstance(q, float):
3410-
raise ValueError(
3410+
raise TypeError(
34113411
"q must be a float or an array of floats; however, [%s] found." % type(q)
34123412
)
34133413
if q < 0.0 or q > 1.0:
@@ -3639,7 +3639,7 @@ def diff(self, periods=1) -> "Series":
36393639

36403640
def _diff(self, periods, *, part_cols=()):
36413641
if not isinstance(periods, int):
3642-
raise ValueError("periods should be an int; however, got [%s]" % type(periods).__name__)
3642+
raise TypeError("periods should be an int; however, got [%s]" % type(periods).__name__)
36433643
window = (
36443644
Window.partitionBy(*part_cols)
36453645
.orderBy(NATURAL_ORDER_COLUMN_NAME)
@@ -3984,7 +3984,7 @@ def pop(self, item) -> Union["Series", Scalar]:
39843984
dtype: float64
39853985
"""
39863986
if not is_name_like_value(item):
3987-
raise ValueError("'key' should be string or tuple that contains strings")
3987+
raise TypeError("'key' should be string or tuple that contains strings")
39883988
if not is_name_like_tuple(item):
39893989
item = (item,)
39903990
if self._internal.index_level < len(item):
@@ -4328,7 +4328,7 @@ def replace(self, to_replace=None, value=None, regex=False) -> "Series":
43284328
if to_replace is None:
43294329
return self.fillna(method="ffill")
43304330
if not isinstance(to_replace, (str, list, tuple, dict, int, float)):
4331-
raise ValueError("'to_replace' should be one of str, list, tuple, dict, int, float")
4331+
raise TypeError("'to_replace' should be one of str, list, tuple, dict, int, float")
43324332
if regex:
43334333
raise NotImplementedError("replace currently not support for regex")
43344334
to_replace = list(to_replace) if isinstance(to_replace, tuple) else to_replace
@@ -4438,7 +4438,7 @@ def update(self, other) -> None:
44384438
>>> reset_option("compute.ops_on_diff_frames")
44394439
"""
44404440
if not isinstance(other, Series):
4441-
raise ValueError("'other' must be a Series")
4441+
raise TypeError("'other' must be a Series")
44424442

44434443
combined = combine_frames(self._kdf, other._kdf, how="leftouter")
44444444

@@ -4813,7 +4813,7 @@ def combine_first(self, other) -> "Series":
48134813
dtype: float64
48144814
"""
48154815
if not isinstance(other, ps.Series):
4816-
raise ValueError("`combine_first` only allows `Series` for parameter `other`")
4816+
raise TypeError("`combine_first` only allows `Series` for parameter `other`")
48174817
if same_anchor(self, other):
48184818
this = self.spark.column
48194819
that = other.spark.column
@@ -4977,7 +4977,7 @@ def repeat(self, repeats: Union[int, "Series"]) -> "Series":
49774977
Series([], dtype: int64)
49784978
"""
49794979
if not isinstance(repeats, (int, Series)):
4980-
raise ValueError(
4980+
raise TypeError(
49814981
"`repeats` argument must be integer or Series, but got {}".format(type(repeats))
49824982
)
49834983

0 commit comments

Comments
 (0)