Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Support DataFrame.explain(extended: str) case to be consistent with S…
…cala side
  • Loading branch information
HyukjinKwon committed Jun 3, 2020
commit 984f33b16b1ecd38cfc746ededa1b4e3601c0ee0
35 changes: 24 additions & 11 deletions python/pyspark/sql/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ def explain(self, extended=None, mode=None):
"""Prints the (logical and physical) plans to the console for debugging purpose.

:param extended: boolean, default ``False``. If ``False``, prints only the physical plan.
When this is a string without specifying the ``mode``, it works as the mode is
specified.
:param mode: specifies the expected output format of plans.

* ``simple``: Print only a physical plan.
Expand Down Expand Up @@ -306,31 +308,40 @@ def explain(self, extended=None, mode=None):
Output [2]: [age#0, name#1]
...

>>> df.explain("cost")
== Optimized Logical Plan ==
...Statistics...
...

.. versionchanged:: 3.0.0
Added optional argument `mode` to specify the expected output format of plans.
"""

if extended is not None and mode is not None:
raise Exception("extended and mode can not be specified simultaneously")
raise Exception("extended and mode should not be set together.")

# For the no argument case: df.explain()
is_no_argument = extended is None and mode is None

# For the cases below:
# explain(True)
# explain(extended=False)
is_extended_case = extended is not None and isinstance(extended, bool)
is_extended_case = isinstance(extended, bool) and mode is None

# For the mode specified: df.explain(mode="formatted")
is_mode_case = mode is not None and isinstance(mode, basestring)
# For the case when extended is mode:
# df.explain("formatted")
is_extended_as_mode = isinstance(extended, basestring) and mode is None

if not is_no_argument and not (is_extended_case or is_mode_case):
if extended is not None:
err_msg = "extended (optional) should be provided as bool" \
", got {0}".format(type(extended))
else: # For mode case
err_msg = "mode (optional) should be provided as str, got {0}".format(type(mode))
raise TypeError(err_msg)
# For the mode specified:
# df.explain(mode="formatted")
is_mode_case = extended is None and isinstance(mode, basestring)

if not (is_no_argument or is_extended_case or is_extended_as_mode or is_mode_case):
argtypes = [
str(type(arg)) for arg in [extended, mode] if arg is not None]
raise TypeError(
"extended (optional) and mode (optional) should be a string "
"and bool; however, got [%s]." % ", ".join(argtypes))

# Sets an explain mode depending on a given argument
if is_no_argument:
Expand All @@ -339,6 +350,8 @@ def explain(self, extended=None, mode=None):
explain_mode = "extended" if extended else "simple"
elif is_mode_case:
explain_mode = mode
elif is_extended_as_mode:
explain_mode = extended

print(self._sc._jvm.PythonSQLUtils.explainString(self._jdf.queryExecution(), explain_mode))

Expand Down