diff --git a/.rat-excludes b/.rat-excludes index 08fba6d351d6..91a3ce622b04 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -83,3 +83,5 @@ gen-java.* .*avpr org.apache.spark.sql.sources.DataSourceRegister .*parquet +spark-deps-hadoop1 +spark-deps-hadoop24 diff --git a/core/pom.xml b/core/pom.xml index 570a25cf325a..4bbe6677778e 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -39,6 +39,15 @@ avro-mapred ${avro.mapred.classifier} + + com.google.guava + guava + + + com.amazonaws + aws-java-sdk + ${aws.java.sdk.version} + com.google.guava guava diff --git a/dev/run-tests-jenkins.py b/dev/run-tests-jenkins.py index 623004310e18..b8fb9febdaff 100755 --- a/dev/run-tests-jenkins.py +++ b/dev/run-tests-jenkins.py @@ -123,6 +123,7 @@ def run_tests(tests_timeout): ERROR_CODES["BLOCK_R_STYLE"]: 'R style tests', ERROR_CODES["BLOCK_DOCUMENTATION"]: 'to generate documentation', ERROR_CODES["BLOCK_BUILD"]: 'to build', + ERROR_CODES["BLOCK_BUILD_TESTS"]: 'build dependency tests', ERROR_CODES["BLOCK_MIMA"]: 'MiMa tests', ERROR_CODES["BLOCK_SPARK_UNIT_TESTS"]: 'Spark unit tests', ERROR_CODES["BLOCK_PYSPARK_UNIT_TESTS"]: 'PySpark unit tests', diff --git a/dev/run-tests.py b/dev/run-tests.py index 9e1abb069719..e9fe0f9b2b53 100755 --- a/dev/run-tests.py +++ b/dev/run-tests.py @@ -415,6 +415,11 @@ def run_python_tests(test_modules, parallelism): run_cmd(command) +def run_build_tests(): + set_title_and_block("Running build tests", "BLOCK_BUILD_TESTS") + run_cmd([os.path.join(SPARK_HOME, "dev", "test-dependencies.sh")]) + + def run_sparkr_tests(): set_title_and_block("Running SparkR tests", "BLOCK_SPARKR_UNIT_TESTS") @@ -534,6 +539,8 @@ def main(): # spark build build_apache_spark(build_tool, hadoop_version) + if m.should_run_build_tests: + run_build_tests() # backwards compatibility checks if build_tool == "sbt": diff --git a/dev/spark-deps-hadoop1 b/dev/spark-deps-hadoop1 new file mode 100644 index 000000000000..507d75ef68ee --- /dev/null +++ b/dev/spark-deps-hadoop1 @@ -0,0 +1,118 @@ +akka-actor_2.10-2.3.11.jar +akka-remote_2.10-2.3.11.jar +akka-slf4j_2.10-2.3.11.jar +arpack_combined_all-0.1.jar +asm-3.1.jar +avro-1.7.7.jar +avro-ipc-1.7.7-tests.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +breeze-macros_2.10-0.11.2.jar +breeze_2.10-0.11.2.jar +chill-java-0.5.0.jar +chill_2.10-0.5.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-codec-1.10.jar +commons-collections-3.2.1.jar +commons-compiler-2.7.8.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-digester-1.8.jar +commons-el-1.0.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.3.2.jar +commons-math-2.1.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +compress-lzf-1.0.3.jar +config-1.2.1.jar +core-1.1.2.jar +curator-client-2.4.0.jar +curator-framework-2.4.0.jar +curator-recipes-2.4.0.jar +guava-14.0.1.jar +hadoop-client-1.2.1.jar +hadoop-core-1.2.1.jar +hsqldb-1.8.0.10.jar +ivy-2.4.0.jar +jackson-annotations-2.4.4.jar +jackson-core-2.4.4.jar +jackson-core-asl-1.8.8.jar +jackson-databind-2.4.4.jar +jackson-jaxrs-1.8.8.jar +jackson-mapper-asl-1.8.8.jar +jackson-module-scala_2.10-2.4.4.jar +jackson-xc-1.8.8.jar +janino-2.7.8.jar +jansi-1.4.jar +javax.servlet-3.0.0.v201112011016.jar +jaxb-api-2.2.7.jar +jaxb-core-2.2.7.jar +jaxb-impl-2.2.7.jar +jcl-over-slf4j-1.7.10.jar +jersey-core-1.9.jar +jersey-json-1.9.jar +jersey-server-1.9.jar +jets3t-0.7.1.jar +jettison-1.1.jar +jline-0.9.94.jar +jline-2.10.4.jar +json4s-ast_2.10-3.2.10.jar +json4s-core_2.10-3.2.10.jar +json4s-jackson_2.10-3.2.10.jar +jsr305-1.3.9.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.10.jar +kryo-2.21.jar +leveldbjni-all-1.8.jar +log4j-1.2.17.jar +lz4-1.3.0.jar +mesos-0.21.1-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +minlog-1.2.jar +netty-3.8.0.Final.jar +netty-all-4.0.29.Final.jar +objenesis-1.2.jar +opencsv-2.3.jar +oro-2.0.8.jar +paranamer-2.6.jar +parquet-column-1.7.0.jar +parquet-common-1.7.0.jar +parquet-encoding-1.7.0.jar +parquet-format-2.3.0-incubating.jar +parquet-generator-1.7.0.jar +parquet-hadoop-1.7.0.jar +parquet-jackson-1.7.0.jar +pmml-agent-1.1.15.jar +pmml-model-1.1.15.jar +pmml-schema-1.1.15.jar +protobuf-java-2.4.1.jar +py4j-0.9.jar +pyrolite-4.9.jar +quasiquotes_2.10-2.0.0-M8.jar +reflectasm-1.07-shaded.jar +scala-compiler-2.10.4.jar +scala-library-2.10.4.jar +scala-reflect-2.10.4.jar +scalap-2.10.4.jar +slf4j-api-1.7.10.jar +slf4j-log4j12-1.7.10.jar +snappy-java-1.1.1.7.jar +spire-macros_2.10-0.7.4.jar +spire_2.10-0.7.4.jar +stream-2.7.0.jar +tachyon-client-0.8.1.jar +tachyon-underfs-hdfs-0.8.1.jar +tachyon-underfs-local-0.8.1.jar +tachyon-underfs-s3-0.8.1.jar +uncommons-maths-1.2.2a.jar +unused-1.0.0.jar +xmlenc-0.52.jar +xz-1.0.jar +zookeeper-3.4.5.jar diff --git a/dev/spark-deps-hadoop24 b/dev/spark-deps-hadoop24 new file mode 100644 index 000000000000..94c36c6237a3 --- /dev/null +++ b/dev/spark-deps-hadoop24 @@ -0,0 +1,185 @@ +JavaEWAH-0.3.2.jar +ST4-4.0.4.jar +activation-1.1.1.jar +akka-actor_2.10-2.3.11.jar +akka-remote_2.10-2.3.11.jar +akka-slf4j_2.10-2.3.11.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +aopalliance-1.0.jar +apache-log4j-extras-1.2.17.jar +arpack_combined_all-0.1.jar +asm-3.1.jar +asm-commons-3.1.jar +asm-tree-3.1.jar +avro-1.7.7.jar +avro-ipc-1.7.7-tests.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +base64-2.3.8.jar +bcprov-jdk15on-1.51.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.10-0.11.2.jar +breeze_2.10-0.11.2.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.5.0.jar +chill_2.10-0.5.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.1.jar +commons-compiler-2.7.6.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.3.2.jar +commons-logging-1.1.3.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +config-1.2.1.jar +core-1.1.2.jar +curator-client-2.4.0.jar +curator-framework-2.4.0.jar +curator-recipes-2.4.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.10.1.1.jar +eigenbase-properties-1.1.5.jar +geronimo-annotation_1.0_spec-1.1.1.jar +geronimo-jaspic_1.0_spec-1.0.jar +geronimo-jta_1.1_spec-1.1.1.jar +groovy-all-2.1.6.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.4.0.jar +hadoop-auth-2.4.0.jar +hadoop-client-2.4.0.jar +hadoop-common-2.4.0.jar +hadoop-hdfs-2.4.0.jar +hadoop-mapreduce-client-app-2.4.0.jar +hadoop-mapreduce-client-common-2.4.0.jar +hadoop-mapreduce-client-core-2.4.0.jar +hadoop-mapreduce-client-jobclient-2.4.0.jar +hadoop-mapreduce-client-shuffle-2.4.0.jar +hadoop-yarn-api-2.4.0.jar +hadoop-yarn-client-2.4.0.jar +hadoop-yarn-common-2.4.0.jar +hadoop-yarn-server-common-2.4.0.jar +hadoop-yarn-server-web-proxy-2.4.0.jar +httpclient-4.3.2.jar +httpcore-4.3.2.jar +ivy-2.4.0.jar +jackson-annotations-2.4.4.jar +jackson-core-2.4.4.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.4.4.jar +jackson-jaxrs-1.9.13.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-scala_2.10-2.4.4.jar +jackson-xc-1.9.13.jar +janino-2.7.8.jar +jansi-1.4.jar +java-xmlbuilder-1.0.jar +javax.inject-1.jar +javax.servlet-3.0.0.v201112011016.jar +javolution-5.5.1.jar +jaxb-api-2.2.2.jar +jaxb-core-2.2.7.jar +jaxb-impl-2.2.7.jar +jcl-over-slf4j-1.7.10.jar +jdo-api-3.0.1.jar +jersey-client-1.9.jar +jersey-core-1.9.jar +jersey-guice-1.9.jar +jersey-json-1.9.jar +jersey-server-1.9.jar +jets3t-0.9.3.jar +jettison-1.1.jar +jetty-6.1.26.jar +jetty-all-7.6.0.v20120127.jar +jetty-util-6.1.26.jar +jline-2.10.4.jar +jline-2.12.jar +joda-time-2.9.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json-20090211.jar +json4s-ast_2.10-3.2.10.jar +json4s-core_2.10-3.2.10.jar +json4s-jackson_2.10-3.2.10.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.10.jar +kryo-2.21.jar +leveldbjni-all-1.8.jar +libfb303-0.9.2.jar +libthrift-0.9.2.jar +log4j-1.2.17.jar +lz4-1.3.0.jar +mail-1.4.7.jar +mesos-0.21.1-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +minlog-1.2.jar +mx4j-3.0.2.jar +netty-3.8.0.Final.jar +netty-all-4.0.29.Final.jar +objenesis-1.2.jar +opencsv-2.3.jar +oro-2.0.8.jar +paranamer-2.6.jar +parquet-column-1.7.0.jar +parquet-common-1.7.0.jar +parquet-encoding-1.7.0.jar +parquet-format-2.3.0-incubating.jar +parquet-generator-1.7.0.jar +parquet-hadoop-1.7.0.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.7.0.jar +pmml-agent-1.1.15.jar +pmml-model-1.1.15.jar +pmml-schema-1.1.15.jar +protobuf-java-2.5.0.jar +py4j-0.9.jar +pyrolite-4.9.jar +quasiquotes_2.10-2.0.0-M8.jar +reflectasm-1.07-shaded.jar +scala-compiler-2.10.4.jar +scala-library-2.10.4.jar +scala-reflect-2.10.4.jar +scalap-2.10.4.jar +servlet-api-2.5.jar +slf4j-api-1.7.10.jar +slf4j-log4j12-1.7.10.jar +snappy-0.2.jar +snappy-java-1.1.1.7.jar +spire-macros_2.10-0.7.4.jar +spire_2.10-0.7.4.jar +stax-api-1.0-2.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +tachyon-client-0.8.1.jar +tachyon-underfs-hdfs-0.8.1.jar +tachyon-underfs-local-0.8.1.jar +tachyon-underfs-s3-0.8.1.jar +uncommons-maths-1.2.2a.jar +unused-1.0.0.jar +xmlenc-0.52.jar +xz-1.0.jar +zookeeper-3.4.5.jar diff --git a/dev/sparktestsupport/__init__.py b/dev/sparktestsupport/__init__.py index 8ab6d9e37ca2..61acd8d4aab3 100644 --- a/dev/sparktestsupport/__init__.py +++ b/dev/sparktestsupport/__init__.py @@ -31,5 +31,6 @@ "BLOCK_SPARK_UNIT_TESTS": 18, "BLOCK_PYSPARK_UNIT_TESTS": 19, "BLOCK_SPARKR_UNIT_TESTS": 20, + "BLOCK_BUILD_TESTS": 21, "BLOCK_TIMEOUT": 124 } diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index d65547e04db4..ae7c7634699d 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -31,7 +31,7 @@ class Module(object): def __init__(self, name, dependencies, source_file_regexes, build_profile_flags=(), environ={}, sbt_test_goals=(), python_test_goals=(), blacklisted_python_implementations=(), - test_tags=(), should_run_r_tests=False): + test_tags=(), should_run_r_tests=False, should_run_build_tests=False): """ Define a new module. @@ -53,6 +53,7 @@ def __init__(self, name, dependencies, source_file_regexes, build_profile_flags= :param test_tags A set of tags that will be excluded when running unit tests if the module is not explicitly changed. :param should_run_r_tests: If true, changes in this module will trigger all R tests. + :param should_run_build_tests: If true, changes in this module will trigger build tests. """ self.name = name self.dependencies = dependencies @@ -64,6 +65,7 @@ def __init__(self, name, dependencies, source_file_regexes, build_profile_flags= self.blacklisted_python_implementations = blacklisted_python_implementations self.test_tags = test_tags self.should_run_r_tests = should_run_r_tests + self.should_run_build_tests = should_run_build_tests self.dependent_modules = set() for dep in dependencies: @@ -394,6 +396,13 @@ def contains_file(self, filename): ] ) +build = Module( + name="build", + dependencies=[], + source_file_regexes=[ + ".*pom.xml", + ] +) ec2 = Module( name="ec2", diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh new file mode 100755 index 000000000000..220807186f55 --- /dev/null +++ b/dev/test-dependencies.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash + +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +set -e + +# TODO: This would be much nicer to do in SBT, once SBT supports Maven-style +# resolution. + +MVN="build/mvn --force" +# NOTE: These should match those in the release publishing script +HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pyarn -Phive" +LOCAL_REPO="mvn-tmp" + +if [ -n "$AMPLAB_JENKINS" ]; then + # To speed up Maven install process we remove source files + # Maven dependency list only works once installed + find . -name *.scala | xargs rm + find . -name *.java | xargs rm +fi + +# Use custom version to avoid Maven contention +spark_version="spark-$(date +%s | tail -c6)" +$MVN -q versions:set -DnewVersion=$spark_version > /dev/null + +echo "Performing Maven install" +$MVN $HADOOP2_MODULE_PROFILES install -q \ + -pl '!assembly' \ + -pl '!examples' \ + -pl '!external/flume-assembly' \ + -pl '!external/kafka-assembly' \ + -pl '!external/twitter' \ + -pl '!external/flume' \ + -pl '!external/mqtt' \ + -pl '!external/mqtt-assembly' \ + -pl '!external/zeromq' \ + -pl '!external/kafka' \ + -DskipTests + +echo "Generating dependency manifest" + +$MVN -Phadoop-1 dependency:build-classpath -pl assembly \ + | grep "Building Spark Project Assembly" -A 5 \ + | tail -n 1 | tr ":" "\n" | rev | cut -d "/" -f 1 | rev | sort \ + | grep -v spark > dev/pr-deps-hadoop1 + + +$MVN $HADOOP2_MODULE_PROFILES -Phadoop-2.4 dependency:build-classpath -pl assembly \ + | grep "Building Spark Project Assembly" -A 5 \ + | tail -n 1 | tr ":" "\n" | rev | cut -d "/" -f 1 | rev | sort \ + | grep -v spark > dev/pr-deps-hadoop24 + +if [ -n "$AMPLAB_JENKINS" ]; then + git reset --hard HEAD +fi + +if [[ $@ == **replace-manifest** ]]; then + echo "Replacing manifest and creating new file at dev/spark-deps" + mv dev/pr-deps-hadoop1 dev/spark-deps-hadoop1 + mv dev/pr-deps-hadoop24 dev/spark-deps-hadoop24 + exit 0 +fi + +set +e +dep_diff="$(diff dev/pr-deps-hadoop1 dev/spark-deps-hadoop1)" +dep_diff="$(diff dev/pr-deps-hadoop24 dev/spark-deps-hadoop24)" +set -e + +if [ "$dep_diff" != "" ]; then + echo "Spark's published dependencies DO NOT MATCH the manifest file (dev/spark-deps)." + echo "To update the manifest file, run './dev/test-dependencies --replace-manifest'." + echo "$dep_diff" + exit 1 +fi diff --git a/pom.xml b/pom.xml index 762bfc728233..37064acfd417 100644 --- a/pom.xml +++ b/pom.xml @@ -2052,6 +2052,22 @@ maven-deploy-plugin 2.8.2 + + org.apache.maven.plugins + maven-dependency-plugin + + + default-cli + + build-classpath + + + + compile + + + +