Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dev/.rat-excludes
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,4 @@ spark-warehouse
structured-streaming/*
kafka-source-initial-offset-version-2.1.0.bin
kafka-source-initial-offset-future-version.bin
vote.tmpl
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

even if rat doesn't check, isn't vote.tmpl packaged into the source release this way?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you saying this file should not be packaged in the source release? Not sure I see why that would be the case. There's a lot of stuff in .rat-excludes that is still packaged.

109 changes: 109 additions & 0 deletions dev/create-release/do-release-docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

set -e
SELF=$(cd $(dirname $0) && pwd)
. "$SELF/release-util.sh"

WORKDIR=
IMGTAG=latest
JAVA=
while getopts "d:j:nt:" opt; do
case $opt in
d) WORKDIR="$OPTARG" ;;
n) DRY_RUN=1 ;;
t) IMGTAG="$OPTARG" ;;
j) JAVA="$OPTARG" ;;
?) error "Invalid option: $OPTARG" ;;
esac
done

if [ -z "$WORKDIR" ] || [ ! -d "$WORKDIR" ]; then
error "Work directory (-d) must be defined and exist."
fi

if [ -d "$WORKDIR/output" ]; then
read -p "Output directory already exists. Overwrite and continue? [y/n] " ANSWER
if [ "$ANSWER" != "y" ]; then
error "Exiting."
fi
fi

cd "$WORKDIR"
rm -rf "$WORKDIR/output"
mkdir "$WORKDIR/output"

get_release_info

# Place all RM scripts and necessary data in a local directory that must be defined in the command
# line. This directory is mounted into the image.
for f in "$SELF"/*; do
if [ -f "$f" ]; then
cp "$f" "$WORKDIR"
fi
done
GPG_KEY_FILE="$WORKDIR/gpg.key"
fcreate_secure "$GPG_KEY_FILE"
$GPG --export-secret-key --armor "$GPG_KEY" > "$GPG_KEY_FILE"

run_silent "Building spark-rm image with tag $IMGTAG..." "docker-build.log" \
docker build -t "spark-rm:$IMGTAG" --build-arg UID=$UID "$SELF/spark-rm"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So we need to do export UID=xxx before running this script?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

got it. This is a system variable. So we can't run this script with root user...


# Write the release information to a file with environment variables to be used when running the
# image.
ENVFILE="$WORKDIR/env.list"
fcreate_secure "$ENVFILE"

function cleanup {
rm -f "$ENVFILE"
rm -f "$GPG_KEY_FILE"
}

trap cleanup EXIT

cat > $ENVFILE <<EOF
DRY_RUN=$DRY_RUN
SKIP_TAG=$SKIP_TAG
RUNNING_IN_DOCKER=1
GIT_BRANCH=$GIT_BRANCH
NEXT_VERSION=$NEXT_VERSION
RELEASE_VERSION=$RELEASE_VERSION
RELEASE_TAG=$RELEASE_TAG
GIT_REF=$GIT_REF
SPARK_PACKAGE_VERSION=$SPARK_PACKAGE_VERSION
ASF_USERNAME=$ASF_USERNAME
GIT_NAME=$GIT_NAME
GIT_EMAIL=$GIT_EMAIL
GPG_KEY=$GPG_KEY
ASF_PASSWORD=$ASF_PASSWORD
GPG_PASSPHRASE=$GPG_PASSPHRASE
EOF

JAVA_VOL=
if [ -n "$JAVA" ]; then
echo "JAVA_HOME=/opt/spark-java" >> $ENVFILE
JAVA_VOL="--volume $JAVA:/opt/spark-java"
fi

echo "Building $RELEASE_TAG; output will be at $WORKDIR/output"
docker run -ti \
--env-file "$ENVFILE" \
--volume "$WORKDIR:/opt/spark-rm" \
$JAVA_VOL \
"spark-rm:$IMGTAG"
59 changes: 59 additions & 0 deletions dev/create-release/do-release.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/usr/bin/env bash

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

SELF=$(cd $(dirname $0) && pwd)
. "$SELF/release-util.sh"

while getopts "bn" opt; do
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it would be nice to have a high level description in the script just saying this does a release which does things like tag, build, etc and pushes things to the asf spark repo.

case $opt in
b) GIT_BRANCH=$OPTARG ;;
n) DRY_RUN=1 ;;
?) error "Invalid option: $OPTARG" ;;
esac
done

if [ "$RUNNING_IN_DOCKER" = "1" ]; then
# Inside docker, need to import the GPG key stored in the current directory.
echo $GPG_PASSPHRASE | $GPG --passphrase-fd 0 --import "$SELF/gpg.key"

# We may need to adjust the path since JAVA_HOME may be overridden by the driver script.
if [ -n "$JAVA_HOME" ]; then
export PATH="$JAVA_HOME/bin:$PATH"
else
# JAVA_HOME for the openjdk package.
export JAVA_HOME=/usr
fi
else
# Outside docker, need to ask for information about the release.
get_release_info
fi

if [ $SKIP_TAG = 0 ]; then
maybe_run "Creating release tag $RELEASE_TAG..." "tag.log" \
"$SELF/release-tag.sh"
else
echo "Skipping tag creation for $RELEASE_TAG."
fi

run_silent "Building Spark..." "build.log" \
"$SELF/release-build.sh" package
run_silent "Building documentation..." "docs.log" \
"$SELF/release-build.sh" docs
maybe_run "Publishing release" "publish.log" \
"$SELF/release-build.sh" publish-release
108 changes: 70 additions & 38 deletions dev/create-release/release-build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
# limitations under the License.
#

SELF=$(cd $(dirname $0) && pwd)
. "$SELF/release-util.sh"

function exit_with_usage {
cat << EOF
usage: release-build.sh <package|docs|publish-snapshot|publish-release>
Expand Down Expand Up @@ -89,17 +92,6 @@ BASE_DIR=$(pwd)

MVN="build/mvn --force"

# Hive-specific profiles for some builds
HIVE_PROFILES="-Phive -Phive-thriftserver"
# Profiles for publishing snapshots and release to Maven Central
PUBLISH_PROFILES="-Pmesos -Pyarn -Pkubernetes -Pflume $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl"
# Profiles for building binary releases
BASE_RELEASE_PROFILES="-Pmesos -Pyarn -Pkubernetes -Pflume -Psparkr"
# Scala 2.11 only profiles for some builds
SCALA_2_11_PROFILES="-Pkafka-0-8"
# Scala 2.12 only profiles for some builds
SCALA_2_12_PROFILES="-Pscala-2.12"

rm -rf spark
git clone https://git-wip-us.apache.org/repos/asf/spark.git
cd spark
Expand All @@ -112,6 +104,28 @@ if [ -z "$SPARK_VERSION" ]; then
| grep -v INFO | grep -v WARNING | grep -v Download)
fi

# Depending on the version being built, certain extra profiles need to be activated, and
# different versions of Scala are supported.
BASE_PROFILES="-Pmesos -Pyarn"
PUBLISH_SCALA_2_10=0
SCALA_2_10_PROFILES="-Pscala-2.10"
SCALA_2_11_PROFILES=
SCALA_2_12_PROFILES="-Pscala-2.12"

if [[ $SPARK_VERSION > "2.3" ]]; then
BASE_PROFILES="$BASE_PROFILES -Pkubernetes -Pflume"
SCALA_2_11_PROFILES="-Pkafka-0-8"
else
PUBLISH_SCALA_2_10=1
fi

# Hive-specific profiles for some builds
HIVE_PROFILES="-Phive -Phive-thriftserver"
# Profiles for publishing snapshots and release to Maven Central
PUBLISH_PROFILES="$BASE_PROFILES $HIVE_PROFILES -Pspark-ganglia-lgpl -Pkinesis-asl"
# Profiles for building binary releases
BASE_RELEASE_PROFILES="$BASE_PROFILES -Psparkr"

# Verify we have the right java version set
if [ -z "$JAVA_HOME" ]; then
echo "Please set JAVA_HOME."
Expand All @@ -127,7 +141,7 @@ if [[ ! $SPARK_VERSION < "2.2." ]]; then
exit 1
fi
else
if [[ $java_version > "1.7." ]]; then
if ! [[ $java_version =~ 1\.7\..* ]]; then
if [ -z "$JAVA_7_HOME" ]; then
echo "Java version $java_version is higher than required 1.7 for pre-2.2"
echo "Please set JAVA_HOME correctly."
Expand Down Expand Up @@ -174,8 +188,9 @@ if [[ "$1" == "package" ]]; then
FLAGS=$2
ZINC_PORT=$3
BUILD_PACKAGE=$4
cp -r spark spark-$SPARK_VERSION-bin-$NAME

echo "Building binary dist $NAME"
cp -r spark spark-$SPARK_VERSION-bin-$NAME
cd spark-$SPARK_VERSION-bin-$NAME

# TODO There should probably be a flag to make-distribution to allow 2.12 support
Expand Down Expand Up @@ -250,25 +265,32 @@ if [[ "$1" == "package" ]]; then
# We increment the Zinc port each time to avoid OOM's and other craziness if multiple builds
# share the same Zinc server.
make_binary_release "hadoop2.6" "-Phadoop-2.6 $HIVE_PROFILES $SCALA_2_11_PROFILES $BASE_RELEASE_PROFILES" "3035" "withr" &
make_binary_release "hadoop2.7" "-Phadoop-2.7 $HIVE_PROFILES $SCALA_2_11_PROFILES $BASE_RELEASE_PROFILES" "3036" "withpip" &
make_binary_release "without-hadoop" "-Phadoop-provided $SCALA_2_11_PROFILES $BASE_RELEASE_PROFILES" "3038" &

if ! is_dry_run; then
make_binary_release "hadoop2.7" "-Phadoop-2.7 $HIVE_PROFILES $SCALA_2_11_PROFILES $BASE_RELEASE_PROFILES" "3036" "withpip" &
make_binary_release "without-hadoop" "-Phadoop-provided $SCALA_2_11_PROFILES $BASE_RELEASE_PROFILES" "3038" &
fi

wait
rm -rf spark-$SPARK_VERSION-bin-*/

svn co --depth=empty $RELEASE_STAGING_LOCATION svn-spark
rm -rf "svn-spark/${DEST_DIR_NAME}-bin"
mkdir -p "svn-spark/${DEST_DIR_NAME}-bin"

echo "Copying release tarballs"
cp spark-* "svn-spark/${DEST_DIR_NAME}-bin/"
cp pyspark-* "svn-spark/${DEST_DIR_NAME}-bin/"
cp SparkR_* "svn-spark/${DEST_DIR_NAME}-bin/"
svn add "svn-spark/${DEST_DIR_NAME}-bin"
if ! is_dry_run; then
svn co --depth=empty $RELEASE_STAGING_LOCATION svn-spark
rm -rf "svn-spark/${DEST_DIR_NAME}-bin"
mkdir -p "svn-spark/${DEST_DIR_NAME}-bin"

echo "Copying release tarballs"
cp spark-* "svn-spark/${DEST_DIR_NAME}-bin/"
cp pyspark-* "svn-spark/${DEST_DIR_NAME}-bin/"
cp SparkR_* "svn-spark/${DEST_DIR_NAME}-bin/"
svn add "svn-spark/${DEST_DIR_NAME}-bin"

cd svn-spark
svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m"Apache Spark $SPARK_PACKAGE_VERSION"
cd ..
rm -rf svn-spark
fi

cd svn-spark
svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m"Apache Spark $SPARK_PACKAGE_VERSION"
cd ..
rm -rf svn-spark
exit 0
fi

Expand All @@ -282,18 +304,22 @@ if [[ "$1" == "docs" ]]; then
cd ..
cd ..

svn co --depth=empty $RELEASE_STAGING_LOCATION svn-spark
rm -rf "svn-spark/${DEST_DIR_NAME}-docs"
mkdir -p "svn-spark/${DEST_DIR_NAME}-docs"
if ! is_dry_run; then
svn co --depth=empty $RELEASE_STAGING_LOCATION svn-spark
rm -rf "svn-spark/${DEST_DIR_NAME}-docs"
mkdir -p "svn-spark/${DEST_DIR_NAME}-docs"

echo "Copying release documentation"
cp -R "spark/docs/_site" "svn-spark/${DEST_DIR_NAME}-docs/"
svn add "svn-spark/${DEST_DIR_NAME}-docs"
echo "Copying release documentation"
cp -R "spark/docs/_site" "svn-spark/${DEST_DIR_NAME}-docs/"
svn add "svn-spark/${DEST_DIR_NAME}-docs"

cd svn-spark
svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m"Apache Spark $SPARK_PACKAGE_VERSION docs"
cd ..
rm -rf svn-spark
cd svn-spark
svn ci --username $ASF_USERNAME --password "$ASF_PASSWORD" -m"Apache Spark $SPARK_PACKAGE_VERSION docs"
cd ..
rm -rf svn-spark
fi

mv "spark/docs/_site" docs/
exit 0
fi

Expand Down Expand Up @@ -356,6 +382,12 @@ if [[ "$1" == "publish-release" ]]; then

$MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -DskipTests $SCALA_2_11_PROFILES $PUBLISH_PROFILES clean install

if [[ $PUBLISH_SCALA_2_10 = 1 ]]; then
./dev/change-scala-version.sh 2.10
$MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo -Dscala-2.10 \
-DskipTests $PUBLISH_PROFILES $SCALA_2_10_PROFILES clean install
fi

#./dev/change-scala-version.sh 2.12
#$MVN -DzincPort=$ZINC_PORT -Dmaven.repo.local=$tmp_repo \
# -DskipTests $SCALA_2_12_PROFILES §$PUBLISH_PROFILES clean install
Expand Down
Loading