diff --git a/python/pyspark/sql/readwriter.py b/python/pyspark/sql/readwriter.py index 9208a527d29c3..01902f1470a09 100644 --- a/python/pyspark/sql/readwriter.py +++ b/python/pyspark/sql/readwriter.py @@ -351,6 +351,9 @@ def csv(self, path, schema=None, sep=None, encoding=None, quote=None, escape=Non :param maxCharsPerColumn: defines the maximum number of characters allowed for any given value being read. If None is set, it uses the default value, ``1000000``. + :param maxLogRecordsPerPartition: defines the maximum number of logs for the malformed + records that is going to be ignored. If None is set, it + uses the default value, ``1``. :param mode: allows a mode for dealing with corrupt records during parsing. If None is set, it uses the default value, ``PERMISSIVE``. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 88fa5cd21d58f..ec5e43805c78b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -394,6 +394,8 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * a record can have. *
  • `maxCharsPerColumn` (default `1000000`): defines the maximum number of characters allowed * for any given value being read.
  • + *
  • `maxLogRecordsPerPartition` (default `1`): defines the maximum number of logs for the + * malformed records that is going to be ignored.
  • *
  • `mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records * during parsing.
  • *