Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
435 commits
Select commit Hold shift + click to select a range
47bf406
[HOTFIX] Disabling flaky test (fix in progress as part of SPARK-7224)
pwendell Apr 30, 2015
7dacc08
[SPARK-7224] added mock repository generator for --packages tests
brkyvz Apr 30, 2015
6c65da6
[SPARK-5342] [YARN] Allow long running Spark apps to run on secure YA…
harishreedharan Apr 30, 2015
adbdb19
[SPARK-7207] [ML] [BUILD] Added ml.recommendation, ml.regression to S…
jkbradley Apr 30, 2015
e0628f2
Revert "[SPARK-5342] [YARN] Allow long running Spark apps to run on s…
pwendell Apr 30, 2015
6702324
[SPARK-7196][SQL] Support precision and scale of decimal type for JDBC
viirya Apr 30, 2015
07a8620
[SPARK-7288] Suppress compiler warnings due to use of sun.misc.Unsafe…
JoshRosen Apr 30, 2015
77cc25f
[SPARK-7267][SQL]Push down Project when it's child is Limit
pzzs Apr 30, 2015
fa01bec
[Build] Enable MiMa checks for SQL
JoshRosen Apr 30, 2015
1c3e402
[SPARK-7279] Removed diffSum which is theoretical zero in LinearRegre…
Apr 30, 2015
149b3ee
[SPARK-7242][SQL][MLLIB] Frequent items for DataFrames
brkyvz Apr 30, 2015
ee04413
[SPARK-7280][SQL] Add "drop" column/s on a data frame
rakeshchalasani May 1, 2015
0797338
[SPARK-7093] [SQL] Using newPredicate in NestedLoopJoin to enable cod…
scwf May 1, 2015
a0d8a61
[SPARK-7109] [SQL] Push down left side filter for left semi join
scwf May 1, 2015
e991255
[SPARK-6913][SQL] Fixed "java.sql.SQLException: No suitable driver fo…
SlavikBaranov May 1, 2015
3ba5aaa
[SPARK-5213] [SQL] Pluggable SQL Parser Support
chenghao-intel May 1, 2015
473552f
[SPARK-7123] [SQL] support table.star in sqlcontext
scwf May 1, 2015
beeafcf
Revert "[SPARK-5213] [SQL] Pluggable SQL Parser Support"
pwendell May 1, 2015
69a739c
[SPARK-7282] [STREAMING] Fix the race conditions in StreamingListener…
zsxwing May 1, 2015
b5347a4
[SPARK-7248] implemented random number generators for DataFrames
brkyvz May 1, 2015
36a7a68
[SPARK-6479] [BLOCK MANAGER] Create off-heap block storage API
zhzhan May 1, 2015
a9fc505
HOTFIX: Disable buggy dependency checker
pwendell May 1, 2015
0a2b15c
[SPARK-4550] In sort-based shuffle, store map outputs in serialized form
sryza May 1, 2015
7cf1eb7
[SPARK-7287] enabled fixed test
brkyvz May 1, 2015
14b3288
[SPARK-7291] [CORE] Fix a flaky test in AkkaRpcEnvSuite
zsxwing May 1, 2015
c24aeb6
[SPARK-6257] [PYSPARK] [MLLIB] MLlib API missing items in Recommendation
MechCoder May 1, 2015
7fe0f3f
[SPARK-3468] [WEBUI] Timeline-View feature
sarutak May 1, 2015
3052f49
[SPARK-4705] Handle multiple app attempts event logs, history server.
May 1, 2015
3b514af
[SPARK-3066] [MLLIB] Support recommendAll in matrix factorization model
May 1, 2015
7630213
[SPARK-5891] [ML] Add Binarizer ML Transformer
viirya May 1, 2015
c8c481d
Limit help option regex
May 1, 2015
27de6fe
changing persistence engine trait to an abstract class
nirandaperera May 1, 2015
7d42722
[SPARK-5854] personalized page rank
dwmclary May 1, 2015
1262e31
[SPARK-6846] [WEBUI] [HOTFIX] return to GET for kill link in UI since…
srowen May 1, 2015
1686032
[SPARK-7183] [NETWORK] Fix memory leak of TransportRequestHandler.str…
viirya May 1, 2015
3753776
[SPARK-7274] [SQL] Create Column expression for array/struct creation.
rxin May 1, 2015
58d6584
Revert "[SPARK-7287] enabled fixed test"
pwendell May 1, 2015
c6d9a42
Revert "[SPARK-7224] added mock repository generator for --packages t…
pwendell May 1, 2015
f53a488
[SPARK-7213] [YARN] Check for read permissions before copying a Hadoo…
nishkamravi2 May 1, 2015
7b5dd3e
[SPARK-7281] [YARN] Add option to set AM's lib path in client mode.
May 1, 2015
4dc8d74
[SPARK-7240][SQL] Single pass covariance calculation for dataframes
brkyvz May 1, 2015
b1f4ca8
[SPARK-5342] [YARN] Allow long running Spark apps to run on secure YA…
harishreedharan May 1, 2015
5c1faba
Ignore flakey test in SparkSubmitUtilsSuite
pwendell May 1, 2015
41c6a44
[SPARK-7312][SQL] SPARK-6913 broke jdk6 build
yhuai May 1, 2015
e6fb377
[SPARK-7304] [BUILD] Include $@ in call to mvn consistently in make-d…
May 2, 2015
98e7045
[SPARK-6999] [SQL] Remove the infinite recursive method (useless)
chenghao-intel May 2, 2015
ebc25a4
[SPARK-7309] [CORE] [STREAMING] Shutdown the thread pools in Received…
zsxwing May 2, 2015
b88c275
[SPARK-7112][Streaming][WIP] Add a InputInfoTracker to track all the …
jerryshao May 2, 2015
4786484
[SPARK-2808][Streaming][Kafka] update kafka to 0.8.2
koeninger May 2, 2015
ae98eec
[SPARK-3444] Provide an easy way to change log level
holdenk May 2, 2015
099327d
[SPARK-6954] [YARN] ExecutorAllocationManager can end up requesting a…
sryza May 2, 2015
2022193
[SPARK-7216] [MESOS] Add driver details page to Mesos cluster UI.
tnachen May 2, 2015
b4b43df
[SPARK-6443] [SPARK SUBMIT] Could not submit app in standalone cluste…
WangTaoTheTonic May 2, 2015
8f50a07
[SPARK-2691] [MESOS] Support for Mesos DockerInfo
hellertime May 2, 2015
38d4e9e
[SPARK-6229] Add SASL encryption to network library.
May 2, 2015
b79aeb9
[SPARK-7317] [Shuffle] Expose shuffle handle
May 2, 2015
2e0f357
[SPARK-7242] added python api for freqItems in DataFrames
brkyvz May 2, 2015
7394e7a
[SPARK-7120] [SPARK-7121] Closure cleaner nesting + documentation + t…
May 2, 2015
ecc6eb5
[SPARK-7315] [STREAMING] [TEST] Fix flaky WALBackedBlockRDDSuite
tdas May 2, 2015
856a571
[SPARK-3444] Fix typo in Dataframes.py introduced in []
deanchen May 2, 2015
da30352
[SPARK-7323] [SPARK CORE] Use insertAll instead of insert while mergi…
May 2, 2015
bfcd528
[SPARK-6030] [CORE] Using simulated field layout method to compute cl…
advancedxy May 2, 2015
82c8c37
[MINOR] [HIVE] Fix QueryPartitionSuite.
May 2, 2015
5d6b90d
[SPARK-5213] [SQL] Pluggable SQL Parser Support
chenghao-intel May 2, 2015
ea841ef
[SPARK-7255] [STREAMING] [DOCUMENTATION] Added documentation for spar…
BenFradet May 2, 2015
49549d5
[SPARK-7031] [THRIFTSERVER] let thrift server take SPARK_DAEMON_MEMOR…
WangTaoTheTonic May 2, 2015
f4af925
[SPARK-7022] [PYSPARK] [ML] Add ML.Tuning.ParamGridBuilder to PySpark
May 3, 2015
daa70bf
[SPARK-6907] [SQL] Isolated client for HiveMetastore
marmbrus May 3, 2015
9e25b09
[SPARK-7302] [DOCS] SPARK building documentation still mentions build…
srowen May 3, 2015
1ffa8cb
[SPARK-7329] [MLLIB] simplify ParamGridBuilder impl
mengxr May 4, 2015
9646018
[SPARK-7241] Pearson correlation for DataFrames
brkyvz May 4, 2015
3539cb7
[SPARK-5563] [MLLIB] LDA with online variational inference
hhbyyh May 4, 2015
343d3bf
[SPARK-5100] [SQL] add webui for thriftserver
tianyi May 4, 2015
5a1a107
[MINOR] Fix python test typo?
May 4, 2015
e0833c5
[SPARK-5956] [MLLIB] Pipeline components should be copyable.
mengxr May 4, 2015
f32e69e
[SPARK-7319][SQL] Improve the output from DataFrame.show()
May 4, 2015
fc8b581
[SPARK-6943] [SPARK-6944] DAG visualization on SparkUI
May 4, 2015
8055411
[SPARK-7243][SQL] Contingency Tables for DataFrames
brkyvz May 5, 2015
678c4da
[SPARK-7266] Add ExpectsInputTypes to expressions when possible.
rxin May 5, 2015
8aa5aea
[SPARK-7236] [CORE] Fix to prevent AkkaUtils askWithReply from sleepi…
BryanCutler May 5, 2015
e9b16e6
[SPARK-7314] [SPARK-3524] [PYSPARK] upgrade Pyrolite to 4.4
mengxr May 5, 2015
da738cf
[MINOR] Renamed variables in SparkKMeans.scala, LocalKMeans.scala and…
pippobaudos May 5, 2015
c5790a2
[MINOR] [BUILD] Declare ivy dependency in root pom.
May 5, 2015
1854ac3
[SPARK-7139] [STREAMING] Allow received block metadata to be saved to…
tdas May 5, 2015
8776fe0
[HOTFIX] [TEST] Ignoring flaky tests
tdas May 5, 2015
8436f7e
[SPARK-7113] [STREAMING] Support input information reporting for Dire…
jerryshao May 5, 2015
4d29867
[SPARK-7341] [STREAMING] [TESTS] Fix the flaky test: org.apache.spark…
zsxwing May 5, 2015
fc8feaa
[SPARK-6653] [YARN] New config to specify port for sparkYarnAM actor …
May 5, 2015
4222da6
[SPARK-5112] Expose SizeEstimator as a developer api
sryza May 5, 2015
51f4620
[SPARK-7357] Improving HBaseTest example
JihongMA May 5, 2015
d497358
[SPARK-3454] separate json endpoints for data in the UI
squito May 5, 2015
b83091a
[MINOR] Minor update for document
viirya May 5, 2015
5ffc73e
[SPARK-5074] [CORE] [TESTS] Fix the flakey test 'run shuffle with map…
zsxwing May 5, 2015
c6d1efb
[SPARK-7350] [STREAMING] [WEBUI] Attach the Streaming tab when callin…
zsxwing May 5, 2015
5ab652c
[SPARK-7202] [MLLIB] [PYSPARK] Add SparseMatrixPickler to SerDe
MechCoder May 5, 2015
5995ada
[SPARK-6612] [MLLIB] [PYSPARK] Python KMeans parity
FlytxtRnD May 5, 2015
9d250e6
Closes #5591
mengxr May 5, 2015
d4cb38a
[MLLIB] [TREE] Verify size of input rdd > 0 when building meta data
May 5, 2015
1fdabf8
[SPARK-7237] Many user provided closures are not actually cleaned
May 5, 2015
57e9f29
[SPARK-7318] [STREAMING] DStream cleans objects that are not closures
May 5, 2015
9f1f9b1
[SPARK-7007] [CORE] Add a metric source for ExecutorAllocationManager
jerryshao May 5, 2015
18340d7
[SPARK-7243][SQL] Reduce size for Contingency Tables in DataFrames
brkyvz May 5, 2015
ee374e8
[SPARK-7333] [MLLIB] Add BinaryClassificationEvaluator to PySpark
mengxr May 5, 2015
47728db
[SPARK-5888] [MLLIB] Add OneHotEncoder as a Transformer
sryza May 5, 2015
489700c
[SPARK-6939] [STREAMING] [WEBUI] Add timeline and histogram graphs fo…
zsxwing May 5, 2015
735bc3d
[SPARK-7294][SQL] ADD BETWEEN
May 5, 2015
fec7b29
[SPARK-7351] [STREAMING] [DOCS] Add spark.streaming.ui.retainedBatche…
zsxwing May 5, 2015
3059291
[SQL][Minor] make StringComparison extends ExpectsInputTypes
scwf May 5, 2015
c688e3c
[SPARK-7230] [SPARKR] Make RDD private in SparkR.
shivaram May 5, 2015
0092abb
Some minor cleanup after SPARK-4550.
sryza May 6, 2015
1fd31ba
[SPARK-6231][SQL/DF] Automatically resolve join condition ambiguity f…
rxin May 6, 2015
51b3d41
Revert "[SPARK-3454] separate json endpoints for data in the UI"
rxin May 6, 2015
a466944
[SPARK-6841] [SPARKR] add support for mean, median, stdev etc.
hqzizania May 6, 2015
ba2b566
[SPARK-7358][SQL] Move DataFrame mathfunctions into functions
brkyvz May 6, 2015
7b14578
[SPARK-6267] [MLLIB] Python API for IsotonicRegression
yanboliang May 6, 2015
9f019c7
[SPARK-7384][Core][Tests] Fix flaky tests for distributed mode in Bro…
zsxwing May 6, 2015
32cdc81
[SPARK-6940] [MLLIB] Add CrossValidator to Python ML pipeline API
mengxr May 6, 2015
322e7e7
[SQL] JavaDoc update for various DataFrame functions.
rxin May 6, 2015
150f671
[SPARK-5456] [SQL] fix decimal compare for jdbc rdd
adrian-wang May 6, 2015
c3eb441
[SPARK-6201] [SQL] promote string and do widen types for IN
adrian-wang May 6, 2015
f2c4708
[SPARK-1442] [SQL] Window Function Support for Spark SQL
yhuai May 6, 2015
002c123
[SPARK-7311] Introduce internal Serializer API for determining if ser…
JoshRosen May 6, 2015
845d1d4
Add `Private` annotation.
JoshRosen May 6, 2015
7740996
[HOT-FIX] Move HiveWindowFunctionQuerySuite.scala to hive compatibili…
yhuai May 6, 2015
1ad04da
[SPARK-5995] [ML] Make Prediction dev API public
jkbradley May 6, 2015
fbf1f34
[HOT FIX] [SPARK-7418] Ignore flaky SparkSubmitUtilsSuite test
May 7, 2015
4e93042
[SPARK-6799] [SPARKR] Remove SparkR RDD examples, add dataframe examples
shivaram May 7, 2015
316a5c0
[SPARK-7396] [STREAMING] [EXAMPLE] Update KafkaWordCountProducer to u…
jerryshao May 7, 2015
8fa6829
[SPARK-7371] [SPARK-7377] [SPARK-7408] DAG visualization addendum (#5…
May 7, 2015
71a452b
[HOT FIX] For DAG visualization #5954
May 7, 2015
14502d5
[SPARK-7405] [STREAMING] Fix the bug that ReceiverInputDStream doesn'…
zsxwing May 7, 2015
773aa25
[SPARK-7432] [MLLIB] disable cv doctest
mengxr May 7, 2015
9cfa9a5
[SPARK-6812] [SPARKR] filter() on DataFrame does not work as expected.
May 7, 2015
2d6612c
[SPARK-5938] [SPARK-5443] [SQL] Improve JsonRDD performance
May 7, 2015
cfdadcb
[SPARK-7430] [STREAMING] [TEST] General improvements to streaming tes…
tdas May 7, 2015
01187f5
[SPARK-7217] [STREAMING] Add configuration to control the default beh…
tdas May 7, 2015
fa8fddf
[SPARK-7295][SQL] bitwise operations for DataFrame DSL
Shiti May 7, 2015
fae4e2d
[SPARK-7035] Encourage __getitem__ over __getattr__ on column access …
ksonj May 7, 2015
8b6b46e
[SPARK-7421] [MLLIB] OnlineLDA cleanups
jkbradley May 7, 2015
4f87e95
[SPARK-7429] [ML] Params cleanups
jkbradley May 7, 2015
ed9be06
[SPARK-7330] [SQL] avoid NPE at jdbc rdd
adrian-wang May 7, 2015
9e2ffb1
[SPARK-7388] [SPARK-7383] wrapper for VectorAssembler in Python
brkyvz May 7, 2015
068c315
[SPARK-7118] [Python] Add the coalesce Spark SQL function available i…
May 7, 2015
1712a7c
[SPARK-6093] [MLLIB] Add RegressionMetrics in PySpark/MLlib
yanboliang May 7, 2015
5784c8d
[SPARK-1442] [SQL] [FOLLOW-UP] Address minor comments in Window Funct…
yhuai May 7, 2015
dec8f53
[SPARK-7116] [SQL] [PYSPARK] Remove cache() causing memory leak
ksonj May 7, 2015
074d75d
[SPARK-5213] [SQL] Remove the duplicated SparkSQLParser
chenghao-intel May 7, 2015
0c33bf8
[SPARK-7399] [SPARK CORE] Fixed compilation error in scala 2.11
May 7, 2015
4eecf55
[SPARK-7373] [MESOS] Add docker support for launching drivers in meso…
tnachen May 7, 2015
f121651
[SPARK-7391] DAG visualization: auto expand if linked from another viz
May 7, 2015
88717ee
[SPARK-7347] DAG visualization: add tooltips to RDDs
May 7, 2015
347a329
[SPARK-7328] [MLLIB] [PYSPARK] Pyspark.mllib.linalg.Vectors: Missing …
MechCoder May 7, 2015
658a478
[SPARK-5726] [MLLIB] Elementwise (Hadamard) Vector Product Transformer
ogeagla May 7, 2015
e43803b
[SPARK-6948] [MLLIB] compress vectors in VectorAssembler
mengxr May 7, 2015
97d1182
[SQL] [MINOR] make star and multialias extend NamedExpression
scwf May 7, 2015
ea3077f
[SPARK-7277] [SQL] Throw exception if the property mapred.reduce.task…
viirya May 7, 2015
937ba79
[SPARK-5281] [SQL] Registering table on RDD is giving MissingRequirem…
dragos May 7, 2015
35f0173
[SPARK-2155] [SQL] [WHEN D THEN E] [ELSE F] add CaseKeyWhen for "CASE…
cloud-fan May 7, 2015
88063c6
[SPARK-7450] Use UNSAFE.getLong() to speed up BitSetMethods#anySet()
tedyu May 7, 2015
22ab70e
[SPARK-7305] [STREAMING] [WEBUI] Make BatchPage show friendly informa…
zsxwing May 8, 2015
cd1d411
[SPARK-6908] [SQL] Use isolated Hive client
marmbrus May 8, 2015
92f8f80
[SPARK-7452] [MLLIB] fix bug in topBykey and update test
coderxiang May 8, 2015
3af423c
[SPARK-6986] [SQL] Use Serializer2 in more cases.
yhuai May 8, 2015
714db2e
[SPARK-7470] [SQL] Spark shell SQLContext crashes without hive
May 8, 2015
f496bf3
[SPARK-7232] [SQL] Add a Substitution batch for spark sql analyzer
scwf May 8, 2015
c2f0821
[SPARK-7392] [CORE] bugfix: Kryo buffer size cannot be larger than 2M
liyezhang556520 May 8, 2015
ebff732
[SPARK-6869] [PYSPARK] Add pyspark archives path to PYTHONPATH
lianhuiwang May 8, 2015
c796be7
[SPARK-3454] separate json endpoints for data in the UI
squito May 8, 2015
f5ff4a8
[SPARK-7383] [ML] Feature Parity in PySpark for ml.features
brkyvz May 8, 2015
65afd3c
[SPARK-7474] [MLLIB] update ParamGridBuilder doctest
mengxr May 8, 2015
008a60d
[SPARK-6824] Fill the docs for DataFrame API in SparkR
hqzizania May 8, 2015
35d6a99
[SPARK-7436] Fixed instantiation of custom recovery mode factory and …
jacek-lewandowski May 8, 2015
a1ec08f
[SPARK-7298] Harmonize style of new visualizations
mateiz May 8, 2015
2d05f32
[SPARK-7133] [SQL] Implement struct, array, and map field accessor
cloud-fan May 8, 2015
4b3bb0e
[SPARK-6627] Finished rename to ShuffleBlockResolver
kayousterhout May 8, 2015
25889d8
[SPARK-7490] [CORE] [Minor] MapOutputTracker.deserializeMapStatuses: …
May 8, 2015
dc71e47
[MINOR] Ignore python/lib/pyspark.zip
zsxwing May 8, 2015
c45c09b
[WEBUI] Remove debug feature for vis.js
sarutak May 8, 2015
4e7360e
[SPARK-7489] [SPARK SHELL] Spark shell crashes when compiled with sca…
vinodkc May 8, 2015
31da40d
[MINOR] Defeat early garbage collection of test suite variable
tellison May 8, 2015
3b0c5e7
[SPARK-7466] DAG visualization: fix orphan nodes
May 8, 2015
9042f8f
[MINOR] [CORE] Allow History Server to read kerberos opts from config…
May 8, 2015
5467c34
[SPARK-7378] [CORE] Handle deep links to unloaded apps.
May 8, 2015
90527f5
[SPARK-7390] [SQL] Only merge other CovarianceCounter when its count …
viirya May 8, 2015
6dad76e
[SPARK-4699] [SQL] Make caseSensitive configurable in spark sql analyzer
May 8, 2015
35c9599
[SPARK-5913] [MLLIB] Python API for ChiSqSelector
yanboliang May 8, 2015
1c78f68
updated ec2 instance types
May 8, 2015
ffdc40c
[SPARK-6955] Perform port retries at NettyBlockTransferService level
aarondav May 9, 2015
bd61f07
[SPARK-7469] [SQL] DAG visualization: show SQL query operators
May 9, 2015
54e6fa0
[SPARK-7237] Clean function in several RDD methods
tedyu May 9, 2015
84bf931
[SPARK-7488] [ML] Feature Parity in PySpark for ml.recommendation
brkyvz May 9, 2015
b6c797b
[SPARK-7451] [YARN] Preemption of executors is counted as failure cau…
May 9, 2015
0a901dd
[SPARK-7231] [SPARKR] Changes to make SparkR DataFrame dplyr friendly.
shivaram May 9, 2015
cde5483
[SPARK-7375] [SQL] Avoid row copying in exchange when sort.serializeM…
JoshRosen May 9, 2015
86ef4cf
[SPARK-7262] [ML] Binary LogisticRegression with L1/L2 (elastic net) …
May 9, 2015
2992623
[SPARK-7498] [ML] removed varargs annotation from Params.setDefaults
jkbradley May 9, 2015
dda6d9f
[SPARK-7438] [SPARK CORE] Fixed validation of relativeSD in countAppr…
May 9, 2015
12b95ab
[SPARK-7403] [WEBUI] Link URL in objects on Timeline View is wrong in…
sarutak May 9, 2015
7d0f172
[STREAMING] [DOCS] Fix wrong url about API docs of StreamingListener
dobashim May 9, 2015
3071aac
Upgrade version of jackson-databind in sql/core/pom.xml
tedyu May 9, 2015
bd74301
[BUILD] Reference fasterxml.jackson.version in sql/core/pom.xml
tedyu May 9, 2015
b13162b
[SPARK-7475] [MLLIB] adjust ldaExample for online LDA
hhbyyh May 9, 2015
bf7e81a
[SPARK-6091] [MLLIB] Add MulticlassMetrics in PySpark/MLlib
yanboliang May 10, 2015
d7a37bc
[SPARK-7345][SQL] Spark cannot detect renamed columns using JDBC conn…
osidorkin May 10, 2015
6bf9352
[MINOR] [SQL] Fixes variable name typo
liancheng May 10, 2015
3038443
[SPARK-7431] [ML] [PYTHON] Made CrossValidatorModel call parent init …
jkbradley May 10, 2015
8c07c75
[SPARK-5521] PCA wrapper for easy transform vectors
catap May 10, 2015
c5aca0c
[SPARK-7427] [PYSPARK] Make sharedParams match in Scala, Python
gweidner May 11, 2015
0835f1e
[SPARK-7512] [SPARKR] Fix RDD's show method to use getJRDD
shivaram May 11, 2015
2242ab3
[SPARK-7519] [SQL] fix minor bugs in thrift server UI
tianyi May 11, 2015
d70a076
[SPARK-7326] [STREAMING] Performing window() on a WindowedDStream doe…
wesleymiao May 11, 2015
042dda3
[SPARK-6092] [MLLIB] Add RankingMetrics in PySpark/MLlib
yanboliang May 11, 2015
4f8a155
[SPARK-7522] [EXAMPLES] Removed angle brackets from dataFormat option
BryanCutler May 11, 2015
1b46556
[SPARK-7361] [STREAMING] Throw unambiguous exception when attempting …
tdas May 11, 2015
0a4844f
[SPARK-7462] By default retain group by columns in aggregate
rxin May 11, 2015
82fee9d
[SPARK-6470] [YARN] Add support for YARN node labels.
sryza May 11, 2015
7ce2a33
[SPARK-7508] JettyUtils-generated servlets to log & report all errors
steveloughran May 11, 2015
6e9910c
[SPARK-7515] [DOC] Update documentation for PySpark on YARN with clus…
sarutak May 11, 2015
8e67433
[SPARK-7516] [Minor] [DOC] Replace depreciated inferSchema() with cre…
gchen May 11, 2015
25c01c5
[STREAMING] [MINOR] Close files correctly when iterator is finished i…
jerryshao May 11, 2015
a8ea096
Update Documentation: leftsemi instead of semijoin
debuggingfuture May 11, 2015
91dc3df
[MINOR] [DOCS] Fix the link to test building info on the wiki
srowen May 11, 2015
4f4dbb0
[SQL] Show better error messages for incorrect join types in DataFrames.
rxin May 12, 2015
57255dc
[SPARK-7084] improve saveAsTable documentation
phatak-dev May 12, 2015
3a9b699
[SPARK-7462][SQL] Update documentation for retaining grouping columns…
rxin May 12, 2015
87229c9
Updated DataFrame.saveAsTable Hive warning to include SPARK-7550 ticket.
rxin May 12, 2015
35fb42a
[SPARK-5893] [ML] Add bucketizer
yinxusen May 12, 2015
f9c7580
[SPARK-7530] [STREAMING] Added StreamingContext.getState() to expose …
tdas May 12, 2015
b6bf4f7
[SPARK-7324] [SQL] DataFrame.dropDuplicates
rxin May 12, 2015
e35d878
[SPARK-7411] [SQL] Support SerDe for HiveQl in CTAS
chenghao-intel May 12, 2015
4b5e1fe
[SPARK-7437] [SQL] Fold "literal in (item1, item2, ..., literal, ...)…
pzzs May 12, 2015
028ad4b
[SPARK-7509][SQL] DataFrame.drop in Python for dropping columns.
rxin May 12, 2015
b94a933
[SPARK-7435] [SPARKR] Make DataFrame.show() consistent with that of S…
rekhajoshm May 12, 2015
1669675
[SQL] Rename Dialect -> ParserDialect.
rxin May 12, 2015
640f63b
[SPARK-6994][SQL] Update docs for fetching Row fields by name
May 12, 2015
d916ad9
Refactor out SparkPlanner from SQLContext
Apr 17, 2015
72f35d8
Cleanup HiveContext, following SparkContext refactoring
Apr 17, 2015
78e74a0
Refactor out QueryExecution from SQLContext
Apr 17, 2015
b96d2dc
Factor out HiveQueryExecution from HiveContext
Apr 17, 2015
0c7fcd6
Revert erroneous test rename
Apr 17, 2015
51194d3
Move prepareForExecution inside QueryExecution
Apr 20, 2015
31be7f2
Add Apache license headers
Apr 21, 2015
1801efc
Fix Thriftserver Build
Apr 21, 2015
6c3af85
Refactor out SparkPlanner from SQLContext
Apr 17, 2015
aef1974
Cleanup HiveContext, following SparkContext refactoring
Apr 17, 2015
1e957ff
Refactor out QueryExecution from SQLContext
Apr 17, 2015
59544f9
Factor out HiveQueryExecution from HiveContext
Apr 17, 2015
cccf924
Revert erroneous test rename
Apr 17, 2015
e8ace9c
Move prepareForExecution inside QueryExecution
Apr 20, 2015
382e933
Merge branch 'sqlctx-refactor' of https://github.com/evacchi/spark in…
evacchi May 12, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
[SPARK-6229] Add SASL encryption to network library.
There are two main parts of this change:

- Extending the bootstrap mechanism in the network library to add a server-side
  bootstrap (which works a little bit differently than the client-side bootstrap), and
  to allow the  bootstraps to modify the underlying channel.

- Use SASL to encrypt data going through the RPC channel.

The second item requires some non-optimal code to be able to work around the
fact that the outbound path in netty is not thread-safe, and ordering is very important
when encryption is in the picture.

A lot of the changes outside the network/common library are just to adjust to the
changed API for initializing the RPC server.

Author: Marcelo Vanzin <[email protected]>

Closes #5377 from vanzin/SPARK-6229 and squashes the following commits:

ff01966 [Marcelo Vanzin] Use fancy new size config style.
be53f32 [Marcelo Vanzin] Merge branch 'master' into SPARK-6229
47d4aff [Marcelo Vanzin] Merge branch 'master' into SPARK-6229
7a2a805 [Marcelo Vanzin] Clean up some unneeded changes.
2f92237 [Marcelo Vanzin] Add comment.
67bb0c6 [Marcelo Vanzin] Revert "Avoid exposing ByteArrayWritableChannel outside of test code."
065f684 [Marcelo Vanzin] Add test to verify chunking.
3d1695d [Marcelo Vanzin] Minor cleanups.
73cff0e [Marcelo Vanzin] Skip bytes in decode path too.
318ad23 [Marcelo Vanzin] Avoid exposing ByteArrayWritableChannel outside of test code.
346f829 [Marcelo Vanzin] Avoid trip through channel selector by not reporting 0 bytes written.
a4a5938 [Marcelo Vanzin] Review feedback.
4797519 [Marcelo Vanzin] Remove unused import.
9908ada [Marcelo Vanzin] Fix test, SASL backend disposal.
7fe1489 [Marcelo Vanzin] Add a test that makes sure encryption is actually enabled.
adb6f9d [Marcelo Vanzin] Review feedback.
cf2a605 [Marcelo Vanzin] Clean up some code.
8584323 [Marcelo Vanzin] Fix a comment.
e98bc55 [Marcelo Vanzin] Add option to only allow encrypted connections to the server.
dad42fc [Marcelo Vanzin] Make encryption thread-safe, less memory-intensive.
b00999a [Marcelo Vanzin] Consolidate ByteArrayWritableChannel, fix SASL code to match master changes.
b923cae [Marcelo Vanzin] Make SASL encryption handler thread-safe, handle FileRegion messages.
39539a7 [Marcelo Vanzin] Add config option to enable SASL encryption.
351a86f [Marcelo Vanzin] Add SASL encryption to network library.
fbe6ccb [Marcelo Vanzin] Add TransportServerBootstrap, make SASL code use it.
  • Loading branch information
Marcelo Vanzin authored and rxin committed May 2, 2015
commit 38d4e9e446b425ca6a8fe8d8080f387b08683842
17 changes: 15 additions & 2 deletions core/src/main/scala/org/apache/spark/SecurityManager.scala
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,13 @@ import org.apache.spark.util.Utils
* authorization. If not filter is in place the user is generally null and no authorization
* can take place.
*
* Connection encryption (SSL) configuration is organized hierarchically. The user can configure
* the default SSL settings which will be used for all the supported communication protocols unless
* When authentication is being used, encryption can also be enabled by setting the option
* spark.authenticate.enableSaslEncryption to true. This is only supported by communication
* channels that use the network-common library, and can be used as an alternative to SSL in those
* cases.
*
* SSL can be used for encryption for certain communication channels. The user can configure the
* default SSL settings which will be used for all the supported communication protocols unless
* they are overwritten by protocol specific settings. This way the user can easily provide the
* common settings for all the protocols without disabling the ability to configure each one
* individually.
Expand Down Expand Up @@ -412,6 +417,14 @@ private[spark] class SecurityManager(sparkConf: SparkConf)
*/
def isAuthenticationEnabled(): Boolean = authOn

/**
* Checks whether SASL encryption should be enabled.
* @return Whether to enable SASL encryption when connecting to services that support it.
*/
def isSaslEncryptionEnabled(): Boolean = {
sparkConf.getBoolean("spark.authenticate.enableSaslEncryption", false)
}

/**
* Gets the user used for authenticating HTTP connections.
* For now use a single hardcoded user.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,12 @@ package org.apache.spark.deploy

import java.util.concurrent.CountDownLatch

import scala.collection.JavaConversions._

import org.apache.spark.{Logging, SparkConf, SecurityManager}
import org.apache.spark.network.TransportContext
import org.apache.spark.network.netty.SparkTransportConf
import org.apache.spark.network.sasl.SaslRpcHandler
import org.apache.spark.network.sasl.SaslServerBootstrap
import org.apache.spark.network.server.TransportServer
import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler
import org.apache.spark.util.Utils
Expand All @@ -44,10 +46,7 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana

private val transportConf = SparkTransportConf.fromSparkConf(sparkConf, numUsableCores = 0)
private val blockHandler = new ExternalShuffleBlockHandler(transportConf)
private val transportContext: TransportContext = {
val handler = if (useSasl) new SaslRpcHandler(blockHandler, securityManager) else blockHandler
new TransportContext(transportConf, handler)
}
private val transportContext: TransportContext = new TransportContext(transportConf, blockHandler)

private var server: TransportServer = _

Expand All @@ -62,7 +61,13 @@ class ExternalShuffleService(sparkConf: SparkConf, securityManager: SecurityMana
def start() {
require(server == null, "Shuffle server already started")
logInfo(s"Starting shuffle service on port $port with useSasl = $useSasl")
server = transportContext.createServer(port)
val bootstraps =
if (useSasl) {
Seq(new SaslServerBootstrap(transportConf, securityManager))
} else {
Nil
}
server = transportContext.createServer(port, bootstraps)
}

def stop() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ import org.apache.spark.{SecurityManager, SparkConf}
import org.apache.spark.network._
import org.apache.spark.network.buffer.ManagedBuffer
import org.apache.spark.network.client.{TransportClientBootstrap, RpcResponseCallback, TransportClientFactory}
import org.apache.spark.network.sasl.{SaslRpcHandler, SaslClientBootstrap}
import org.apache.spark.network.sasl.{SaslClientBootstrap, SaslServerBootstrap}
import org.apache.spark.network.server._
import org.apache.spark.network.shuffle.{RetryingBlockFetcher, BlockFetchingListener, OneForOneBlockFetcher}
import org.apache.spark.network.shuffle.protocol.UploadBlock
Expand All @@ -49,18 +49,18 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage
private[this] var appId: String = _

override def init(blockDataManager: BlockDataManager): Unit = {
val (rpcHandler: RpcHandler, bootstrap: Option[TransportClientBootstrap]) = {
val nettyRpcHandler = new NettyBlockRpcServer(serializer, blockDataManager)
if (!authEnabled) {
(nettyRpcHandler, None)
} else {
(new SaslRpcHandler(nettyRpcHandler, securityManager),
Some(new SaslClientBootstrap(transportConf, conf.getAppId, securityManager)))
}
val rpcHandler = new NettyBlockRpcServer(serializer, blockDataManager)
var serverBootstrap: Option[TransportServerBootstrap] = None
var clientBootstrap: Option[TransportClientBootstrap] = None
if (authEnabled) {
serverBootstrap = Some(new SaslServerBootstrap(transportConf, securityManager))
clientBootstrap = Some(new SaslClientBootstrap(transportConf, conf.getAppId, securityManager,
securityManager.isSaslEncryptionEnabled()))
}
transportContext = new TransportContext(transportConf, rpcHandler)
clientFactory = transportContext.createClientFactory(bootstrap.toList)
server = transportContext.createServer(conf.getInt("spark.blockManager.port", 0))
clientFactory = transportContext.createClientFactory(clientBootstrap.toList)
server = transportContext.createServer(conf.getInt("spark.blockManager.port", 0),
serverBootstrap.toList)
appId = conf.getAppId
logInfo("Server created on " + server.getPort)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -656,7 +656,7 @@ private[nio] class ConnectionManager(
connection.synchronized {
if (connection.sparkSaslServer == null) {
logDebug("Creating sasl Server")
connection.sparkSaslServer = new SparkSaslServer(conf.getAppId, securityManager)
connection.sparkSaslServer = new SparkSaslServer(conf.getAppId, securityManager, false)
}
}
replyToken = connection.sparkSaslServer.response(securityMsg.getToken)
Expand Down Expand Up @@ -800,7 +800,7 @@ private[nio] class ConnectionManager(
if (!conn.isSaslComplete()) {
conn.synchronized {
if (conn.sparkSaslClient == null) {
conn.sparkSaslClient = new SparkSaslClient(conf.getAppId, securityManager)
conn.sparkSaslClient = new SparkSaslClient(conf.getAppId, securityManager, false)
var firstResponse: Array[Byte] = null
try {
firstResponse = conn.sparkSaslClient.firstToken()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,8 @@ private[spark] class BlockManager(
// standard BlockTransferService to directly connect to other Executors.
private[spark] val shuffleClient = if (externalShuffleServiceEnabled) {
val transConf = SparkTransportConf.fromSparkConf(conf, numUsableCores)
new ExternalShuffleClient(transConf, securityManager, securityManager.isAuthenticationEnabled())
new ExternalShuffleClient(transConf, securityManager, securityManager.isAuthenticationEnabled(),
securityManager.isSaslEncryptionEnabled())
} else {
blockTransferService
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.apache.spark.network.server.TransportChannelHandler;
import org.apache.spark.network.server.TransportRequestHandler;
import org.apache.spark.network.server.TransportServer;
import org.apache.spark.network.server.TransportServerBootstrap;
import org.apache.spark.network.util.NettyUtils;
import org.apache.spark.network.util.TransportConf;

Expand Down Expand Up @@ -82,27 +83,40 @@ public TransportClientFactory createClientFactory() {
}

/** Create a server which will attempt to bind to a specific port. */
public TransportServer createServer(int port) {
return new TransportServer(this, port);
public TransportServer createServer(int port, List<TransportServerBootstrap> bootstraps) {
return new TransportServer(this, port, rpcHandler, bootstraps);
}

/** Creates a new server, binding to any available ephemeral port. */
public TransportServer createServer(List<TransportServerBootstrap> bootstraps) {
return createServer(0, bootstraps);
}

public TransportServer createServer() {
return new TransportServer(this, 0);
return createServer(0, Lists.<TransportServerBootstrap>newArrayList());
}

public TransportChannelHandler initializePipeline(SocketChannel channel) {
return initializePipeline(channel, rpcHandler);
}

/**
* Initializes a client or server Netty Channel Pipeline which encodes/decodes messages and
* has a {@link org.apache.spark.network.server.TransportChannelHandler} to handle request or
* response messages.
*
* @param channel The channel to initialize.
* @param channelRpcHandler The RPC handler to use for the channel.
*
* @return Returns the created TransportChannelHandler, which includes a TransportClient that can
* be used to communicate on this channel. The TransportClient is directly associated with a
* ChannelHandler to ensure all users of the same channel get the same TransportClient object.
*/
public TransportChannelHandler initializePipeline(SocketChannel channel) {
public TransportChannelHandler initializePipeline(
SocketChannel channel,
RpcHandler channelRpcHandler) {
try {
TransportChannelHandler channelHandler = createChannelHandler(channel);
TransportChannelHandler channelHandler = createChannelHandler(channel, channelRpcHandler);
channel.pipeline()
.addLast("encoder", encoder)
.addLast("frameDecoder", NettyUtils.createFrameDecoder())
Expand All @@ -123,7 +137,7 @@ public TransportChannelHandler initializePipeline(SocketChannel channel) {
* ResponseMessages. The channel is expected to have been successfully created, though certain
* properties (such as the remoteAddress()) may not be available yet.
*/
private TransportChannelHandler createChannelHandler(Channel channel) {
private TransportChannelHandler createChannelHandler(Channel channel, RpcHandler rpcHandler) {
TransportResponseHandler responseHandler = new TransportResponseHandler(channel);
TransportClient client = new TransportClient(channel, responseHandler);
TransportRequestHandler requestHandler = new TransportRequestHandler(channel, client,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@

package org.apache.spark.network.client;

import io.netty.channel.Channel;

/**
* A bootstrap which is executed on a TransportClient before it is returned to the user.
* This enables an initial exchange of information (e.g., SASL authentication tokens) on a once-per-
Expand All @@ -28,5 +30,5 @@
*/
public interface TransportClientBootstrap {
/** Performs the bootstrapping operation, throwing an exception on failure. */
public void doBootstrap(TransportClient client) throws RuntimeException;
void doBootstrap(TransportClient client, Channel channel) throws RuntimeException;
}
Original file line number Diff line number Diff line change
Expand Up @@ -172,12 +172,14 @@ private TransportClient createClient(InetSocketAddress address) throws IOExcepti
.option(ChannelOption.ALLOCATOR, pooledAllocator);

final AtomicReference<TransportClient> clientRef = new AtomicReference<TransportClient>();
final AtomicReference<Channel> channelRef = new AtomicReference<Channel>();

bootstrap.handler(new ChannelInitializer<SocketChannel>() {
@Override
public void initChannel(SocketChannel ch) {
TransportChannelHandler clientHandler = context.initializePipeline(ch);
clientRef.set(clientHandler.getClient());
channelRef.set(ch);
}
});

Expand All @@ -192,14 +194,15 @@ public void initChannel(SocketChannel ch) {
}

TransportClient client = clientRef.get();
Channel channel = channelRef.get();
assert client != null : "Channel future completed successfully with null client";

// Execute any client bootstraps synchronously before marking the Client as successful.
long preBootstrap = System.nanoTime();
logger.debug("Connection to {} successful, running bootstraps...", address);
try {
for (TransportClientBootstrap clientBootstrap : clientBootstraps) {
clientBootstrap.doBootstrap(client);
clientBootstrap.doBootstrap(client, channel);
}
} catch (Exception e) { // catch non-RuntimeExceptions too as bootstrap may be written in Scala
long bootstrapTimeMs = (System.nanoTime() - preBootstrap) / 1000000;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,12 @@

package org.apache.spark.network.sasl;

import javax.security.sasl.Sasl;
import javax.security.sasl.SaslException;

import io.netty.buffer.ByteBuf;
import io.netty.buffer.Unpooled;
import io.netty.channel.Channel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

Expand All @@ -33,14 +37,24 @@
public class SaslClientBootstrap implements TransportClientBootstrap {
private final Logger logger = LoggerFactory.getLogger(SaslClientBootstrap.class);

private final boolean encrypt;
private final TransportConf conf;
private final String appId;
private final SecretKeyHolder secretKeyHolder;

public SaslClientBootstrap(TransportConf conf, String appId, SecretKeyHolder secretKeyHolder) {
this(conf, appId, secretKeyHolder, false);
}

public SaslClientBootstrap(
TransportConf conf,
String appId,
SecretKeyHolder secretKeyHolder,
boolean encrypt) {
this.conf = conf;
this.appId = appId;
this.secretKeyHolder = secretKeyHolder;
this.encrypt = encrypt;
}

/**
Expand All @@ -49,8 +63,8 @@ public SaslClientBootstrap(TransportConf conf, String appId, SecretKeyHolder sec
* due to mismatch.
*/
@Override
public void doBootstrap(TransportClient client) {
SparkSaslClient saslClient = new SparkSaslClient(appId, secretKeyHolder);
public void doBootstrap(TransportClient client, Channel channel) {
SparkSaslClient saslClient = new SparkSaslClient(appId, secretKeyHolder, encrypt);
try {
byte[] payload = saslClient.firstToken();

Expand All @@ -62,13 +76,26 @@ public void doBootstrap(TransportClient client) {
byte[] response = client.sendRpcSync(buf.array(), conf.saslRTTimeoutMs());
payload = saslClient.response(response);
}

if (encrypt) {
if (!SparkSaslServer.QOP_AUTH_CONF.equals(saslClient.getNegotiatedProperty(Sasl.QOP))) {
throw new RuntimeException(
new SaslException("Encryption requests by negotiated non-encrypted connection."));
}
SaslEncryption.addToChannel(channel, saslClient, conf.maxSaslEncryptedBlockSize());
saslClient = null;
logger.debug("Channel {} configured for SASL encryption.", client);
}
} finally {
try {
// Once authentication is complete, the server will trust all remaining communication.
saslClient.dispose();
} catch (RuntimeException e) {
logger.error("Error while disposing SASL client", e);
if (saslClient != null) {
try {
// Once authentication is complete, the server will trust all remaining communication.
saslClient.dispose();
} catch (RuntimeException e) {
logger.error("Error while disposing SASL client", e);
}
}
}
}

}
Loading