apache · HyukjinKwon · Apr 8, 2021 · Apr 13, 2021 · Apr 14, 2021 · HyukjinKwon
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -3,15 +3,8 @@ name: Build and test
 on:
   push:
     branches:
-    - master
-  pull_request:
-    branches:
-    - master
-  workflow_dispatch:
-    inputs:
-      target:
-        description: 'Target branch to run'
-        required: true
+    - '**'
+    - '!branch-*.*'
 
 jobs:
   # Build: build Spark and run the tests for specified modules.
@@ -82,17 +75,23 @@ jobs:
       # GitHub Actions' default miniconda to use in pip packaging test.
       CONDA_PREFIX: /usr/share/miniconda
       GITHUB_PREV_SHA: ${{ github.event.before }}
-      GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
       SPARK_LOCAL_IP: localhost
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
       # In order to fetch changed files
       with:
         fetch-depth: 0
-    - name: Merge dispatched input branch
-      if: ${{ github.event.inputs.target != '' }}
-      run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
+        repository: apache/spark
+        ref: master
+    - name: Sync the current branch with the latest in Apache Spark
+      if: github.repository != 'apache/spark'
+      id: sync-branch
+      run: |
+        apache_spark_ref=`git rev-parse HEAD`
+        git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
+        git merge --progress --ff-only FETCH_HEAD
+        echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
     # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
     - name: Cache Scala, SBT and Maven
       uses: actions/cache@v2
@@ -133,6 +132,7 @@ jobs:
     # Run the tests.
     - name: Run tests
       run: |
+        export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
         # Hive and SQL tests become flaky when running in parallel as it's too intensive.
         if [[ "$MODULES_TO_TEST" == "hive" ]] || [[ "$MODULES_TO_TEST" == "sql" ]]; then export SERIAL_SBT_TESTS=1; fi
         ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
@@ -171,17 +171,23 @@ jobs:
       # GitHub Actions' default miniconda to use in pip packaging test.
       CONDA_PREFIX: /usr/share/miniconda
       GITHUB_PREV_SHA: ${{ github.event.before }}
-      GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
       SPARK_LOCAL_IP: localhost
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
       # In order to fetch changed files
       with:
         fetch-depth: 0
-    - name: Merge dispatched input branch
-      if: ${{ github.event.inputs.target != '' }}
-      run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
+        repository: apache/spark
+        ref: master
+    - name: Sync the current branch with the latest in Apache Spark
+      if: github.repository != 'apache/spark'
+      id: sync-branch
+      run: |
+        apache_spark_ref=`git rev-parse HEAD`
+        git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
+        git merge --progress --ff-only FETCH_HEAD
+        echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
     # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
     - name: Cache Scala, SBT and Maven
       uses: actions/cache@v2
@@ -216,6 +222,7 @@ jobs:
     # Run the tests.
     - name: Run tests
       run: |
+        export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
         ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
     - name: Upload test results to report
       if: always()
@@ -239,17 +246,23 @@ jobs:
       HADOOP_PROFILE: hadoop3.2
       HIVE_PROFILE: hive2.3
       GITHUB_PREV_SHA: ${{ github.event.before }}
-      GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
       SPARK_LOCAL_IP: localhost
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
       # In order to fetch changed files
       with:
         fetch-depth: 0
-    - name: Merge dispatched input branch
-      if: ${{ github.event.inputs.target != '' }}
-      run: git merge --progress --ff-only origin/${{ github.event.inputs.target }}
+        repository: apache/spark
+        ref: master
+    - name: Sync the current branch with the latest in Apache Spark
+      if: github.repository != 'apache/spark'
+      id: sync-branch
+      run: |
+        apache_spark_ref=`git rev-parse HEAD`
+        git fetch https://github.com/$GITHUB_REPOSITORY.git ${GITHUB_REF##*/}
+        git merge --progress --ff-only FETCH_HEAD
+        echo "::set-output name=APACHE_SPARK_REF::$apache_spark_ref"
     # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
     - name: Cache Scala, SBT and Maven
       uses: actions/cache@v2
@@ -275,6 +288,7 @@ jobs:
         # R issues at docker environment
         export TZ=UTC
         export _R_CHECK_SYSTEM_CLOCK_=FALSE
+        export APACHE_SPARK_REF=${{ steps.sync-branch.outputs.APACHE_SPARK_REF }}
         ./dev/run-tests --parallelism 2 --modules sparkr
     - name: Upload test results to report
       if: always()

diff --git a/.github/workflows/notify_test_workflow.yml b/.github/workflows/notify_test_workflow.yml
@@ -0,0 +1,40 @@
+name: Notify test workflow
+on:
+  pull_request_target:
+    types: [opened, reopened, synchronize]
+
+jobs:
+  notify:
+    runs-on: ubuntu-20.04
+    steps:
+      - name: "Notify test workflow"
+        uses: actions/github-script@v3
+        if: ${{ github.base_ref == 'master' }}
+        with:
+          github-token: ${{ secrets.GITHUB_TOKEN }}
+          script: |
+            const endpoint = "GET /repos/:owner/:repo/actions/workflows/:id/runs?&branch=:branch"
+            const params = {
+              owner: context.payload.pull_request.head.repo.owner.login,
+              repo: context.payload.pull_request.head.repo.name,
+              id: "build_and_test.yml",
+              branch: context.payload.pull_request.head.ref,
+            }
+
+            const runs = await github.request(endpoint, params)
+            var runID = runs.data.workflow_runs[0].id
+
+            var msg = "**[Test build #" + runID + "]"
+              + "(https://github.com/" +  context.payload.pull_request.head.repo.full_name
+              + "/actions/runs/" + runID + ")** "
+              + "for PR " + context.issue.number
+              + " at commit [`" + context.payload.pull_request.head.sha.substring(0, 7) + "`]"
+              + "(https://github.com/" + context.payload.pull_request.head.repo.full_name
+              + "/commit/" + context.payload.pull_request.head.sha + ")."
+
+            github.issues.createComment({
+              issue_number: context.issue.number,
+              owner: context.payload.repository.owner.login,
+              repo: context.payload.repository.name,
+              body: msg
+            })
diff --git a/dev/run-tests.py b/dev/run-tests.py
@@ -656,16 +656,10 @@ def main():
         # If we're running the tests in GitHub Actions, attempt to detect and test
         # only the affected modules.
         if test_env == "github_actions":
-            if os.environ["GITHUB_INPUT_BRANCH"] != "":
-                # Dispatched request
-                # Note that it assumes GitHub Actions has already merged
-                # the given `GITHUB_INPUT_BRANCH` branch.
+            if os.environ["APACHE_SPARK_REF"] != "":
+                # Fork repository
                 changed_files = identify_changed_files_from_git_commits(
-                    "HEAD", target_branch=os.environ["GITHUB_SHA"])
-            elif os.environ["GITHUB_BASE_REF"] != "":
-                # Pull requests
-                changed_files = identify_changed_files_from_git_commits(
-                    os.environ["GITHUB_SHA"], target_branch=os.environ["GITHUB_BASE_REF"])
+                    "HEAD", target_ref=os.environ["APACHE_SPARK_REF"])
             else:
                 # Build for each commit.
                 changed_files = identify_changed_files_from_git_commits(