Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 35 additions & 21 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,8 @@ name: Build and test
on:
push:
branches:
- master
pull_request:
branches:
- master
workflow_dispatch:
inputs:
target:
description: 'Target branch to run'
required: true
- '**'
- '!branch-*.*'

jobs:
# Build: build Spark and run the tests for specified modules.
Expand Down Expand Up @@ -82,17 +75,23 @@ jobs:
# GitHub Actions' default miniconda to use in pip packaging test.
CONDA_PREFIX: /usr/share/miniconda
GITHUB_PREV_SHA: ${{ github.event.before }}
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
SPARK_LOCAL_IP: localhost
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
# In order to fetch changed files
with:
fetch-depth: 0
- name: Merge dispatched input branch
if: ${{ github.event.inputs.target != '' }}
run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
repository: apache/spark
ref: master
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
id: sync-branch
run: |
apache_spark_ref=`git rev-parse HEAD`
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
git merge --progress --ff-only FETCH_HEAD
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
Expand Down Expand Up @@ -133,6 +132,7 @@ jobs:
# Run the tests.
- name: Run tests
run: |
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
# Hive and SQL tests become flaky when running in parallel as it's too intensive.
if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
Expand Down Expand Up @@ -171,17 +171,23 @@ jobs:
# GitHub Actions' default miniconda to use in pip packaging test.
CONDA_PREFIX: /usr/share/miniconda
GITHUB_PREV_SHA: ${{ github.event.before }}
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
SPARK_LOCAL_IP: localhost
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
# In order to fetch changed files
with:
fetch-depth: 0
- name: Merge dispatched input branch
if: ${{ github.event.inputs.target != '' }}
run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
repository: apache/spark
ref: master
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
id: sync-branch
run: |
apache_spark_ref=`git rev-parse HEAD`
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
git merge --progress --ff-only FETCH_HEAD
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
Expand Down Expand Up @@ -216,6 +222,7 @@ jobs:
# Run the tests.
- name: Run tests
run: |
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
- name: Upload test results to report
if: always()
Expand All @@ -239,17 +246,23 @@ jobs:
HADOOP_PROFILE: hadoop3.2
HIVE_PROFILE: hive2.3
GITHUB_PREV_SHA: ${{ github.event.before }}
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
SPARK_LOCAL_IP: localhost
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
# In order to fetch changed files
with:
fetch-depth: 0
- name: Merge dispatched input branch
if: ${{ github.event.inputs.target != '' }}
run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
repository: apache/spark
ref: master
- name: Sync the current branch with the latest in Apache Spark
if: github.repository != 'apache/spark'
id: sync-branch
run: |
apache_spark_ref=`git rev-parse HEAD`
git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
git merge --progress --ff-only FETCH_HEAD
echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
# Cache local repositories. Note that GitHub Actions cache has a 2G limit.
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
Expand All @@ -275,6 +288,7 @@ jobs:
# R issues at docker environment
export TZ=UTC
export _R_CHECK_SYSTEM_CLOCK_=FALSE
export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
./dev/run-tests --parallelism 2 --modules sparkr
- name: Upload test results to report
if: always()
Expand Down
40 changes: 40 additions & 0 deletions .github/workflows/notify_test_workflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: Notify test workflow
on:
pull_request_target:
types: [opened, reopened, synchronize]

jobs:
notify:
runs-on: ubuntu-20.04
steps:
- name: "Notify test workflow"
uses: actions/github-script@v3
if: ${{ github.base_ref == 'master' }}
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
script: |
const endpoint = "GET /repos/:owner/:repo/actions/workflows/:id/runs?&branch=:branch"
const params = {
owner: context.payload.pull_request.head.repo.owner.login,
repo: context.payload.pull_request.head.repo.name,
id: "build_and_test.yml",
branch: context.payload.pull_request.head.ref,
}
const runs = await github.request(endpoint, params)
var runID = runs.data.workflow_runs[0].id
var msg = "**[Test build #" + runID + "]"
+ "(https://github.com/" + context.payload.pull_request.head.repo.full_name
+ "/actions/runs/" + runID + ")** "
+ "for PR " + context.issue.number
+ " at commit [`" + context.payload.pull_request.head.sha.substring(0, 7) + "`]"
+ "(https://github.com/" + context.payload.pull_request.head.repo.full_name
+ "/commit/" + context.payload.pull_request.head.sha + ")."
github.issues.createComment({
issue_number: context.issue.number,
owner: context.payload.repository.owner.login,
repo: context.payload.repository.name,
body: msg
})
12 changes: 3 additions & 9 deletions dev/run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -656,16 +656,10 @@ def main():
# If we're running the tests in GitHub Actions, attempt to detect and test
# only the affected modules.
if test_env == "github_actions":
if os.environ["GITHUB_INPUT_BRANCH"] != "":
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@maropu FYI. I think we should update https://spark.apache.org/developer-tools.html because now we always run the tests on each commit in each branch in forked repositories.. I will take a look and fix it soon.

# Dispatched request
# Note that it assumes GitHub Actions has already merged
# the given `GITHUB_INPUT_BRANCH` branch.
if os.environ["APACHE_SPARK_REF"] != "":
# Fork repository
changed_files = identify_changed_files_from_git_commits(
"HEAD", target_branch=os.environ["GITHUB_SHA"])
elif os.environ["GITHUB_BASE_REF"] != "":
# Pull requests
changed_files = identify_changed_files_from_git_commits(
os.environ["GITHUB_SHA"], target_branch=os.environ["GITHUB_BASE_REF"])
"HEAD", target_ref=os.environ["APACHE_SPARK_REF"])
else:
# Build for each commit.
changed_files = identify_changed_files_from_git_commits(
Expand Down