-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-21786][SQL] The 'spark.sql.parquet.compression.codec' and 'spark.sql.orc.compression.codec' configuration doesn't take effect on hive table writing #19218
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
677541b
4e70fff
3f022f9
6d77bf9
42aca3d
5cbe999
732266c
c7ff62c
384ee04
8c92074
dd5060a
d427df5
35cfa01
5387497
676d6a7
ae1da8f
fd73145
7615939
90cbcb3
dd6d635
4fe8170
aa31261
dfb36d9
c4801f6
105e129
dc12038
d779ee6
0cb7b7a
78e0403
7804f60
52cdd75
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
…rk.sql.orc.compression.codec' configuration doesn't take effect on hive table writing Fix some issue
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -69,18 +69,22 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand { | |
| } | ||
|
|
||
| fileSinkConf.tableInfo.getOutputFileFormatClassName match { | ||
| case formatName if formatName.endsWith("ParquetOutputFormat") => | ||
| case formatName if formatName.toLowerCase.endsWith("parquetoutputformat") => | ||
| val compressionConf = "parquet.compression" | ||
| val compressionCodec = getCompressionByPriority(fileSinkConf, compressionConf, | ||
| sparkSession.sessionState.conf.parquetCompressionCodec) match { | ||
| val compressionCodec = getCompressionByPriority( | ||
| fileSinkConf, | ||
| compressionConf, | ||
| default = sparkSession.sessionState.conf.parquetCompressionCodec) match { | ||
| case "NONE" => "UNCOMPRESSED" | ||
| case _@x => x | ||
|
||
| } | ||
| hadoopConf.set(compressionConf, compressionCodec) | ||
| case formatName if formatName.endsWith("OrcOutputFormat") => | ||
|
||
| val compressionConf = "orc.compress" | ||
|
||
| val compressionCodec = getCompressionByPriority(fileSinkConf, compressionConf, | ||
| sparkSession.sessionState.conf.orcCompressionCodec) match { | ||
| val compressionCodec = getCompressionByPriority( | ||
| fileSinkConf, | ||
| compressionConf, | ||
| default = sparkSession.sessionState.conf.orcCompressionCodec) match { | ||
|
||
| case "UNCOMPRESSED" => "NONE" | ||
|
||
| case _@x => x | ||
|
||
| } | ||
|
||
|
|
@@ -106,8 +110,13 @@ private[hive] trait SaveAsHiveFile extends DataWritingCommand { | |
| options = Map.empty) | ||
| } | ||
|
|
||
| // Because compression configurations can come in a variety of ways, | ||
| // we choose the compression configuration in this order: | ||
| // For parquet: `compression` > `parquet.compression` > `spark.sql.parquet.compression.codec` | ||
| // For orc: `compression` > `orc.compress` > `spark.sql.orc.compression.codec` | ||
|
||
| private def getCompressionByPriority(fileSinkConf: FileSinkDesc, | ||
| compressionConf: String, default: String): String = { | ||
|
||
| // The variable `default` was set to spark sql conf. | ||
| val props = fileSinkConf.tableInfo.getProperties | ||
| val priorities = List("compression", compressionConf) | ||
| priorities.find(props.getProperty(_, null) != null) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"parquet.compression"->ParquetOutputFormat.COMPRESSION