diff --git a/.rat-excludes b/.rat-excludes
index 08fba6d351d6..91a3ce622b04 100644
--- a/.rat-excludes
+++ b/.rat-excludes
@@ -83,3 +83,5 @@ gen-java.*
.*avpr
org.apache.spark.sql.sources.DataSourceRegister
.*parquet
+spark-deps-hadoop1
+spark-deps-hadoop24
diff --git a/core/pom.xml b/core/pom.xml
index 570a25cf325a..4bbe6677778e 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -39,6 +39,15 @@
avro-mapred
${avro.mapred.classifier}
+
+ com.google.guava
+ guava
+
+
+ com.amazonaws
+ aws-java-sdk
+ ${aws.java.sdk.version}
+
com.google.guava
guava
diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py
index 623004310e18..b8fb9febdaff 100755
--- a/dev/run-tests-jenkins.py
+++ b/dev/run-tests-jenkins.py
@@ -123,6 +123,7 @@ def run_tests(tests_timeout):
ERROR_CODES["BLOCK_R_STYLE"]: 'R style tests',
ERROR_CODES["BLOCK_DOCUMENTATION"]: 'to generate documentation',
ERROR_CODES["BLOCK_BUILD"]: 'to build',
+ ERROR_CODES["BLOCK_BUILD_TESTS"]: 'build dependency tests',
ERROR_CODES["BLOCK_MIMA"]: 'MiMa tests',
ERROR_CODES["BLOCK_SPARK_UNIT_TESTS"]: 'Spark unit tests',
ERROR_CODES["BLOCK_PYSPARK_UNIT_TESTS"]: 'PySpark unit tests',
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 9e1abb069719..e9fe0f9b2b53 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -415,6 +415,11 @@ def run_python_tests(test_modules, parallelism):
run_cmd(command)
+def run_build_tests():
+ set_title_and_block("Running build tests", "BLOCK_BUILD_TESTS")
+ run_cmd([os.path.join(SPARK_HOME, "dev", "test-dependencies.sh")])
+
+
def run_sparkr_tests():
set_title_and_block("Running SparkR tests", "BLOCK_SPARKR_UNIT_TESTS")
@@ -534,6 +539,8 @@ def main():
# spark build
build_apache_spark(build_tool, hadoop_version)
+ if m.should_run_build_tests:
+ run_build_tests()
# backwards compatibility checks
if build_tool == "sbt":
diff --git a/dev/spark-deps-hadoop1 b/dev/spark-deps-hadoop1
new file mode 100644
index 000000000000..507d75ef68ee
--- /dev/null
+++ b/dev/spark-deps-hadoop1
@@ -0,0 +1,118 @@
+akka-actor_2.10-2.3.11.jar
+akka-remote_2.10-2.3.11.jar
+akka-slf4j_2.10-2.3.11.jar
+arpack_combined_all-0.1.jar
+asm-3.1.jar
+avro-1.7.7.jar
+avro-ipc-1.7.7-tests.jar
+avro-ipc-1.7.7.jar
+avro-mapred-1.7.7-hadoop2.jar
+breeze-macros_2.10-0.11.2.jar
+breeze_2.10-0.11.2.jar
+chill-java-0.5.0.jar
+chill_2.10-0.5.0.jar
+commons-beanutils-1.7.0.jar
+commons-beanutils-core-1.8.0.jar
+commons-codec-1.10.jar
+commons-collections-3.2.1.jar
+commons-compiler-2.7.8.jar
+commons-compress-1.4.1.jar
+commons-configuration-1.6.jar
+commons-digester-1.8.jar
+commons-el-1.0.jar
+commons-httpclient-3.1.jar
+commons-io-2.4.jar
+commons-lang-2.6.jar
+commons-lang3-3.3.2.jar
+commons-math-2.1.jar
+commons-math3-3.4.1.jar
+commons-net-2.2.jar
+compress-lzf-1.0.3.jar
+config-1.2.1.jar
+core-1.1.2.jar
+curator-client-2.4.0.jar
+curator-framework-2.4.0.jar
+curator-recipes-2.4.0.jar
+guava-14.0.1.jar
+hadoop-client-1.2.1.jar
+hadoop-core-1.2.1.jar
+hsqldb-1.8.0.10.jar
+ivy-2.4.0.jar
+jackson-annotations-2.4.4.jar
+jackson-core-2.4.4.jar
+jackson-core-asl-1.8.8.jar
+jackson-databind-2.4.4.jar
+jackson-jaxrs-1.8.8.jar
+jackson-mapper-asl-1.8.8.jar
+jackson-module-scala_2.10-2.4.4.jar
+jackson-xc-1.8.8.jar
+janino-2.7.8.jar
+jansi-1.4.jar
+javax.servlet-3.0.0.v201112011016.jar
+jaxb-api-2.2.7.jar
+jaxb-core-2.2.7.jar
+jaxb-impl-2.2.7.jar
+jcl-over-slf4j-1.7.10.jar
+jersey-core-1.9.jar
+jersey-json-1.9.jar
+jersey-server-1.9.jar
+jets3t-0.7.1.jar
+jettison-1.1.jar
+jline-0.9.94.jar
+jline-2.10.4.jar
+json4s-ast_2.10-3.2.10.jar
+json4s-core_2.10-3.2.10.jar
+json4s-jackson_2.10-3.2.10.jar
+jsr305-1.3.9.jar
+jtransforms-2.4.0.jar
+jul-to-slf4j-1.7.10.jar
+kryo-2.21.jar
+leveldbjni-all-1.8.jar
+log4j-1.2.17.jar
+lz4-1.3.0.jar
+mesos-0.21.1-shaded-protobuf.jar
+metrics-core-3.1.2.jar
+metrics-graphite-3.1.2.jar
+metrics-json-3.1.2.jar
+metrics-jvm-3.1.2.jar
+minlog-1.2.jar
+netty-3.8.0.Final.jar
+netty-all-4.0.29.Final.jar
+objenesis-1.2.jar
+opencsv-2.3.jar
+oro-2.0.8.jar
+paranamer-2.6.jar
+parquet-column-1.7.0.jar
+parquet-common-1.7.0.jar
+parquet-encoding-1.7.0.jar
+parquet-format-2.3.0-incubating.jar
+parquet-generator-1.7.0.jar
+parquet-hadoop-1.7.0.jar
+parquet-jackson-1.7.0.jar
+pmml-agent-1.1.15.jar
+pmml-model-1.1.15.jar
+pmml-schema-1.1.15.jar
+protobuf-java-2.4.1.jar
+py4j-0.9.jar
+pyrolite-4.9.jar
+quasiquotes_2.10-2.0.0-M8.jar
+reflectasm-1.07-shaded.jar
+scala-compiler-2.10.4.jar
+scala-library-2.10.4.jar
+scala-reflect-2.10.4.jar
+scalap-2.10.4.jar
+slf4j-api-1.7.10.jar
+slf4j-log4j12-1.7.10.jar
+snappy-java-1.1.1.7.jar
+spire-macros_2.10-0.7.4.jar
+spire_2.10-0.7.4.jar
+stream-2.7.0.jar
+tachyon-client-0.8.1.jar
+tachyon-underfs-hdfs-0.8.1.jar
+tachyon-underfs-local-0.8.1.jar
+tachyon-underfs-s3-0.8.1.jar
+uncommons-maths-1.2.2a.jar
+unused-1.0.0.jar
+xmlenc-0.52.jar
+xz-1.0.jar
+zookeeper-3.4.5.jar
diff --git a/dev/spark-deps-hadoop24 b/dev/spark-deps-hadoop24
new file mode 100644
index 000000000000..94c36c6237a3
--- /dev/null
+++ b/dev/spark-deps-hadoop24
@@ -0,0 +1,185 @@
+JavaEWAH-0.3.2.jar
+ST4-4.0.4.jar
+activation-1.1.1.jar
+akka-actor_2.10-2.3.11.jar
+akka-remote_2.10-2.3.11.jar
+akka-slf4j_2.10-2.3.11.jar
+antlr-2.7.7.jar
+antlr-runtime-3.4.jar
+aopalliance-1.0.jar
+apache-log4j-extras-1.2.17.jar
+arpack_combined_all-0.1.jar
+asm-3.1.jar
+asm-commons-3.1.jar
+asm-tree-3.1.jar
+avro-1.7.7.jar
+avro-ipc-1.7.7-tests.jar
+avro-ipc-1.7.7.jar
+avro-mapred-1.7.7-hadoop2.jar
+base64-2.3.8.jar
+bcprov-jdk15on-1.51.jar
+bonecp-0.8.0.RELEASE.jar
+breeze-macros_2.10-0.11.2.jar
+breeze_2.10-0.11.2.jar
+calcite-avatica-1.2.0-incubating.jar
+calcite-core-1.2.0-incubating.jar
+calcite-linq4j-1.2.0-incubating.jar
+chill-java-0.5.0.jar
+chill_2.10-0.5.0.jar
+commons-beanutils-1.7.0.jar
+commons-beanutils-core-1.8.0.jar
+commons-cli-1.2.jar
+commons-codec-1.10.jar
+commons-collections-3.2.1.jar
+commons-compiler-2.7.6.jar
+commons-compress-1.4.1.jar
+commons-configuration-1.6.jar
+commons-dbcp-1.4.jar
+commons-digester-1.8.jar
+commons-httpclient-3.1.jar
+commons-io-2.4.jar
+commons-lang-2.6.jar
+commons-lang3-3.3.2.jar
+commons-logging-1.1.3.jar
+commons-math3-3.4.1.jar
+commons-net-2.2.jar
+commons-pool-1.5.4.jar
+compress-lzf-1.0.3.jar
+config-1.2.1.jar
+core-1.1.2.jar
+curator-client-2.4.0.jar
+curator-framework-2.4.0.jar
+curator-recipes-2.4.0.jar
+datanucleus-api-jdo-3.2.6.jar
+datanucleus-core-3.2.10.jar
+datanucleus-rdbms-3.2.9.jar
+derby-10.10.1.1.jar
+eigenbase-properties-1.1.5.jar
+geronimo-annotation_1.0_spec-1.1.1.jar
+geronimo-jaspic_1.0_spec-1.0.jar
+geronimo-jta_1.1_spec-1.1.1.jar
+groovy-all-2.1.6.jar
+guava-14.0.1.jar
+guice-3.0.jar
+guice-servlet-3.0.jar
+hadoop-annotations-2.4.0.jar
+hadoop-auth-2.4.0.jar
+hadoop-client-2.4.0.jar
+hadoop-common-2.4.0.jar
+hadoop-hdfs-2.4.0.jar
+hadoop-mapreduce-client-app-2.4.0.jar
+hadoop-mapreduce-client-common-2.4.0.jar
+hadoop-mapreduce-client-core-2.4.0.jar
+hadoop-mapreduce-client-jobclient-2.4.0.jar
+hadoop-mapreduce-client-shuffle-2.4.0.jar
+hadoop-yarn-api-2.4.0.jar
+hadoop-yarn-client-2.4.0.jar
+hadoop-yarn-common-2.4.0.jar
+hadoop-yarn-server-common-2.4.0.jar
+hadoop-yarn-server-web-proxy-2.4.0.jar
+httpclient-4.3.2.jar
+httpcore-4.3.2.jar
+ivy-2.4.0.jar
+jackson-annotations-2.4.4.jar
+jackson-core-2.4.4.jar
+jackson-core-asl-1.9.13.jar
+jackson-databind-2.4.4.jar
+jackson-jaxrs-1.9.13.jar
+jackson-mapper-asl-1.9.13.jar
+jackson-module-scala_2.10-2.4.4.jar
+jackson-xc-1.9.13.jar
+janino-2.7.8.jar
+jansi-1.4.jar
+java-xmlbuilder-1.0.jar
+javax.inject-1.jar
+javax.servlet-3.0.0.v201112011016.jar
+javolution-5.5.1.jar
+jaxb-api-2.2.2.jar
+jaxb-core-2.2.7.jar
+jaxb-impl-2.2.7.jar
+jcl-over-slf4j-1.7.10.jar
+jdo-api-3.0.1.jar
+jersey-client-1.9.jar
+jersey-core-1.9.jar
+jersey-guice-1.9.jar
+jersey-json-1.9.jar
+jersey-server-1.9.jar
+jets3t-0.9.3.jar
+jettison-1.1.jar
+jetty-6.1.26.jar
+jetty-all-7.6.0.v20120127.jar
+jetty-util-6.1.26.jar
+jline-2.10.4.jar
+jline-2.12.jar
+joda-time-2.9.jar
+jodd-core-3.5.2.jar
+jpam-1.1.jar
+json-20090211.jar
+json4s-ast_2.10-3.2.10.jar
+json4s-core_2.10-3.2.10.jar
+json4s-jackson_2.10-3.2.10.jar
+jsr305-1.3.9.jar
+jta-1.1.jar
+jtransforms-2.4.0.jar
+jul-to-slf4j-1.7.10.jar
+kryo-2.21.jar
+leveldbjni-all-1.8.jar
+libfb303-0.9.2.jar
+libthrift-0.9.2.jar
+log4j-1.2.17.jar
+lz4-1.3.0.jar
+mail-1.4.7.jar
+mesos-0.21.1-shaded-protobuf.jar
+metrics-core-3.1.2.jar
+metrics-graphite-3.1.2.jar
+metrics-json-3.1.2.jar
+metrics-jvm-3.1.2.jar
+minlog-1.2.jar
+mx4j-3.0.2.jar
+netty-3.8.0.Final.jar
+netty-all-4.0.29.Final.jar
+objenesis-1.2.jar
+opencsv-2.3.jar
+oro-2.0.8.jar
+paranamer-2.6.jar
+parquet-column-1.7.0.jar
+parquet-common-1.7.0.jar
+parquet-encoding-1.7.0.jar
+parquet-format-2.3.0-incubating.jar
+parquet-generator-1.7.0.jar
+parquet-hadoop-1.7.0.jar
+parquet-hadoop-bundle-1.6.0.jar
+parquet-jackson-1.7.0.jar
+pmml-agent-1.1.15.jar
+pmml-model-1.1.15.jar
+pmml-schema-1.1.15.jar
+protobuf-java-2.5.0.jar
+py4j-0.9.jar
+pyrolite-4.9.jar
+quasiquotes_2.10-2.0.0-M8.jar
+reflectasm-1.07-shaded.jar
+scala-compiler-2.10.4.jar
+scala-library-2.10.4.jar
+scala-reflect-2.10.4.jar
+scalap-2.10.4.jar
+servlet-api-2.5.jar
+slf4j-api-1.7.10.jar
+slf4j-log4j12-1.7.10.jar
+snappy-0.2.jar
+snappy-java-1.1.1.7.jar
+spire-macros_2.10-0.7.4.jar
+spire_2.10-0.7.4.jar
+stax-api-1.0-2.jar
+stax-api-1.0.1.jar
+stream-2.7.0.jar
+stringtemplate-3.2.1.jar
+super-csv-2.2.0.jar
+tachyon-client-0.8.1.jar
+tachyon-underfs-hdfs-0.8.1.jar
+tachyon-underfs-local-0.8.1.jar
+tachyon-underfs-s3-0.8.1.jar
+uncommons-maths-1.2.2a.jar
+unused-1.0.0.jar
+xmlenc-0.52.jar
+xz-1.0.jar
+zookeeper-3.4.5.jar
diff --git a/dev/sparktestsupport/__init__.py b/dev/sparktestsupport/__init__.py
index 8ab6d9e37ca2..61acd8d4aab3 100644
--- a/dev/sparktestsupport/__init__.py
+++ b/dev/sparktestsupport/__init__.py
@@ -31,5 +31,6 @@
"BLOCK_SPARK_UNIT_TESTS": 18,
"BLOCK_PYSPARK_UNIT_TESTS": 19,
"BLOCK_SPARKR_UNIT_TESTS": 20,
+ "BLOCK_BUILD_TESTS": 21,
"BLOCK_TIMEOUT": 124
}
diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py
index d65547e04db4..ae7c7634699d 100644
--- a/dev/sparktestsupport/modules.py
+++ b/dev/sparktestsupport/modules.py
@@ -31,7 +31,7 @@ class Module(object):
def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=(), environ={},
sbt_test_goals=(), python_test_goals=(), blacklisted_python_implementations=(),
- test_tags=(), should_run_r_tests=False):
+ test_tags=(), should_run_r_tests=False, should_run_build_tests=False):
"""
Define a new module.
@@ -53,6 +53,7 @@ def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=
:param test_tags A set of tags that will be excluded when running unit tests if the module
is not explicitly changed.
:param should_run_r_tests: If true, changes in this module will trigger all R tests.
+ :param should_run_build_tests: If true, changes in this module will trigger build tests.
"""
self.name = name
self.dependencies = dependencies
@@ -64,6 +65,7 @@ def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=
self.blacklisted_python_implementations = blacklisted_python_implementations
self.test_tags = test_tags
self.should_run_r_tests = should_run_r_tests
+ self.should_run_build_tests = should_run_build_tests
self.dependent_modules = set()
for dep in dependencies:
@@ -394,6 +396,13 @@ def contains_file(self, filename):
]
)
+build = Module(
+ name="build",
+ dependencies=[],
+ source_file_regexes=[
+ ".*pom.xml",
+ ]
+)
ec2 = Module(
name="ec2",
diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh
new file mode 100755
index 000000000000..220807186f55
--- /dev/null
+++ b/dev/test-dependencies.sh
@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+set -e
+
+# TODO: This would be much nicer to do in SBT, once SBT supports Maven-style
+# resolution.
+
+MVN="build/mvn --force"
+# NOTE: These should match those in the release publishing script
+HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pyarn -Phive"
+LOCAL_REPO="mvn-tmp"
+
+if [ -n "$AMPLAB_JENKINS" ]; then
+ # To speed up Maven install process we remove source files
+ # Maven dependency list only works once installed
+ find . -name *.scala | xargs rm
+ find . -name *.java | xargs rm
+fi
+
+# Use custom version to avoid Maven contention
+spark_version="spark-$(date +%s | tail -c6)"
+$MVN -q versions:set -DnewVersion=$spark_version > /dev/null
+
+echo "Performing Maven install"
+$MVN $HADOOP2_MODULE_PROFILES install -q \
+ -pl '!assembly' \
+ -pl '!examples' \
+ -pl '!external/flume-assembly' \
+ -pl '!external/kafka-assembly' \
+ -pl '!external/twitter' \
+ -pl '!external/flume' \
+ -pl '!external/mqtt' \
+ -pl '!external/mqtt-assembly' \
+ -pl '!external/zeromq' \
+ -pl '!external/kafka' \
+ -DskipTests
+
+echo "Generating dependency manifest"
+
+$MVN -Phadoop-1 dependency:build-classpath -pl assembly \
+ | grep "Building Spark Project Assembly" -A 5 \
+ | tail -n 1 | tr ":" "\n" | rev | cut -d "/" -f 1 | rev | sort \
+ | grep -v spark > dev/pr-deps-hadoop1
+
+
+$MVN $HADOOP2_MODULE_PROFILES -Phadoop-2.4 dependency:build-classpath -pl assembly \
+ | grep "Building Spark Project Assembly" -A 5 \
+ | tail -n 1 | tr ":" "\n" | rev | cut -d "/" -f 1 | rev | sort \
+ | grep -v spark > dev/pr-deps-hadoop24
+
+if [ -n "$AMPLAB_JENKINS" ]; then
+ git reset --hard HEAD
+fi
+
+if [[ $@ == **replace-manifest** ]]; then
+ echo "Replacing manifest and creating new file at dev/spark-deps"
+ mv dev/pr-deps-hadoop1 dev/spark-deps-hadoop1
+ mv dev/pr-deps-hadoop24 dev/spark-deps-hadoop24
+ exit 0
+fi
+
+set +e
+dep_diff="$(diff dev/pr-deps-hadoop1 dev/spark-deps-hadoop1)"
+dep_diff="$(diff dev/pr-deps-hadoop24 dev/spark-deps-hadoop24)"
+set -e
+
+if [ "$dep_diff" != "" ]; then
+ echo "Spark's published dependencies DO NOT MATCH the manifest file (dev/spark-deps)."
+ echo "To update the manifest file, run './dev/test-dependencies --replace-manifest'."
+ echo "$dep_diff"
+ exit 1
+fi
diff --git a/pom.xml b/pom.xml
index 762bfc728233..37064acfd417 100644
--- a/pom.xml
+++ b/pom.xml
@@ -2052,6 +2052,22 @@
maven-deploy-plugin
2.8.2
+
+ org.apache.maven.plugins
+ maven-dependency-plugin
+
+
+ default-cli
+
+ build-classpath
+
+
+
+ compile
+
+
+
+