Skip to content
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

## 0.10.13dev

* [Feature] Disable full stack trace when using spark connect ([#1011](https://github.com/ploomber/jupysql/issues/1011)) (by [@b1ackout](https://github.com/b1ackout))

## 0.10.12 (2024-07-12)

* [Feature] Remove sqlalchemy upper bound ([#1020](https://github.com/ploomber/jupysql/pull/1020))
Expand Down
4 changes: 2 additions & 2 deletions src/sql/run/sparkdataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@


def handle_spark_dataframe(dataframe, should_cache=False):
"""Execute a ResultSet sqlaproxy using pysark module."""
"""Execute a ResultSet sqlaproxy using pyspark module."""
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix typo

if not DataFrame and not CDataFrame:
raise exceptions.MissingPackageError("pysark not installed")
raise exceptions.MissingPackageError("pyspark not installed")
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fix typo


return SparkResultProxy(dataframe, dataframe.columns, should_cache)

Expand Down
17 changes: 13 additions & 4 deletions src/sql/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
from sqlglot.errors import ParseError
from sqlalchemy.exc import SQLAlchemyError
from ploomber_core.dependencies import requires

try:
from pyspark.sql.utils import AnalysisException
except ModuleNotFoundError:
AnalysisException = None

Comment on lines +11 to +15
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is to handle the case where pyspark module is not installed

import ast
from os.path import isfile
import re
Expand Down Expand Up @@ -556,11 +562,14 @@ def is_non_sqlalchemy_error(error):
"pyodbc.ProgrammingError",
# Clickhouse errors
"DB::Exception:",
# Pyspark
"UNRESOLVED_ROUTINE",
"PARSE_SYNTAX_ERROR",
Comment on lines -559 to -561
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed these as they are included in AnalysisException

]
return any(msg in str(error) for msg in specific_db_errors)
is_pyspark_analysis_exception = (
isinstance(error, AnalysisException) if AnalysisException else False
)
return (
any(msg in str(error) for msg in specific_db_errors)
or is_pyspark_analysis_exception
)
Comment on lines +566 to +572
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If AnalysisException is imported then checks if the error is of instance of pyspark's Analysis Exception and handles it accordingly



def if_substring_exists(string, substrings):
Expand Down