-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-8398][CORE] Hadoop input/output format advanced control #6848
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
koertkuipers
wants to merge
24
commits into
apache:master
from
tresata-opensource:feat-hadoop-input-format-advanced-control
Closed
Changes from 1 commit
Commits
Show all changes
24 commits
Select commit
Hold shift + click to select a range
db24636
make hadoop configuration available to user for all hadoop input form…
koertkuipers 333d943
add JobConf to all RDD saveAs... methods
koertkuipers 1f82a33
actually use conf in saveAsSequenceFile
koertkuipers 135b96e
merge from master
koertkuipers 425a578
expose hadoop Configuration or JobConf for all methods that use hadoo…
koertkuipers 9230543
address issues raised by andrewor14
koertkuipers 2bfa320
merge from master
koertkuipers 2122160
really simple tests that the Configuration provided gets used in (New…
koertkuipers df2c2ae
fix scalastyle errors
koertkuipers e2f7023
dont break binary compatibility (make MiMa happy)
koertkuipers 1cbd95a
silence mima for JavaRDDLike.saveAsTextFile and JavaRDDLike.saveAsObj…
koertkuipers c5cf6b0
merge from master
koertkuipers 96ccee0
merge from master
koertkuipers 3097312
merge from master
koertkuipers cfe3f0c
merge from master
koertkuipers 7ca662c
move mima excludes to section for right version
koertkuipers 5483148
merge from master
koertkuipers 470b3d9
Merge branch 'master' into feat-hadoop-input-format-advanced-control
koertkuipers 208c019
merge from master
koertkuipers 5e0b89c
merge from master
koertkuipers 0ea4e5c
fix scalastyle errors
koertkuipers 60c34e1
move mima exclusions for SPARK-8398 to version 2.0
koertkuipers c06548d
merge from master
koertkuipers 34f97d4
merge from master
koertkuipers File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
add JobConf to all RDD saveAs... methods
- Loading branch information
commit 333d943b77533fa162a51460820fdac7423a54fb
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,7 +27,7 @@ import scala.reflect.{classTag, ClassTag} | |
| import com.clearspring.analytics.stream.cardinality.HyperLogLogPlus | ||
| import org.apache.hadoop.io.{BytesWritable, NullWritable, Text} | ||
| import org.apache.hadoop.io.compress.CompressionCodec | ||
| import org.apache.hadoop.mapred.TextOutputFormat | ||
| import org.apache.hadoop.mapred.{ JobConf, TextOutputFormat } | ||
|
|
||
| import org.apache.spark._ | ||
| import org.apache.spark.Partitioner._ | ||
|
|
@@ -1376,7 +1376,7 @@ abstract class RDD[T: ClassTag]( | |
| /** | ||
| * Save this RDD as a text file, using string representations of elements. | ||
| */ | ||
| def saveAsTextFile(path: String): Unit = withScope { | ||
| def saveAsTextFile(path: String, conf: JobConf = new JobConf(sc.hadoopConfiguration)): Unit = withScope { | ||
| // https://issues.apache.org/jira/browse/SPARK-2075 | ||
| // | ||
| // NullWritable is a `Comparable` in Hadoop 1.+, so the compiler cannot find an implicit | ||
|
|
@@ -1397,7 +1397,7 @@ abstract class RDD[T: ClassTag]( | |
| } | ||
| } | ||
| RDD.rddToPairRDDFunctions(r)(nullWritableClassTag, textClassTag, null) | ||
| .saveAsHadoopFile[TextOutputFormat[NullWritable, Text]](path) | ||
| .saveAsHadoopFile(path, classOf[NullWritable], classOf[Text], classOf[TextOutputFormat[NullWritable, Text]], conf) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this line is almost certainly too long. Can you break it at |
||
| } | ||
|
|
||
| /** | ||
|
|
@@ -1421,10 +1421,10 @@ abstract class RDD[T: ClassTag]( | |
| /** | ||
| * Save this RDD as a SequenceFile of serialized objects. | ||
| */ | ||
| def saveAsObjectFile(path: String): Unit = withScope { | ||
| def saveAsObjectFile(path: String, conf: JobConf = new JobConf(sc.hadoopConfiguration)): Unit = withScope { | ||
| this.mapPartitions(iter => iter.grouped(10).map(_.toArray)) | ||
| .map(x => (NullWritable.get(), new BytesWritable(Utils.serialize(x)))) | ||
| .saveAsSequenceFile(path) | ||
| .saveAsSequenceFile(path, None, conf) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: I would do |
||
| } | ||
|
|
||
| /** | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
remove space around the {}