diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
index 7bb5481a561b..2fc556e6a43b 100644
--- a/.github/workflows/master.yml
+++ b/.github/workflows/master.yml
@@ -29,7 +29,7 @@ jobs:
         modules:
           - |-
             core, unsafe, kvstore, avro,
-            network_common, network_shuffle, repl, launcher
+            network-common, network-shuffle, repl, launcher,
             examples, sketch, graphx
           - |-
             catalyst, hive-thriftserver
@@ -75,156 +75,32 @@ jobs:
             excluded-tags: org.apache.spark.tags.ExtendedSQLTest
             comment: "- other tests"
     env:
-      TEST_ONLY_MODULES: ${{ matrix.modules }}
-      TEST_ONLY_EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
-      TEST_ONLY_INCLUDED_TAGS: ${{ matrix.included-tags }}
+      MODULES_TO_TEST: ${{ matrix.modules }}
+      EXCLUDED_TAGS: ${{ matrix.excluded-tags }}
+      INCLUDED_TAGS: ${{ matrix.included-tags }}
       HADOOP_PROFILE: ${{ matrix.hadoop }}
       HIVE_PROFILE: ${{ matrix.hive }}
       # GitHub Actions' default miniconda to use in pip packaging test.
       CONDA_PREFIX: /usr/share/miniconda
+      GITHUB_PREV_SHA: ${{ github.event.before }}
     steps:
     - name: Checkout Spark repository
       uses: actions/checkout@v2
-    # Cache local repositories. Note that GitHub Actions cache has a 2G limit.
-    - name: Cache Scala, SBT, Maven and Zinc
-      uses: actions/cache@v1
+      # In order to fetch changed files
       with:
-        path: build
-        key: build-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          build-
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-maven-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-maven-
-    - name: Cache Ivy local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.ivy2/cache
-        key: ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-${{ hashFiles('**/pom.xml') }}-${{ hashFiles('**/plugins.sbt') }}
-        restore-keys: |
-          ${{ matrix.java }}-${{ matrix.hadoop }}-ivy-
-    - name: Install JDK ${{ matrix.java }}
-      uses: actions/setup-java@v1
-      with:
-        java-version: ${{ matrix.java }}
+        fetch-depth: 0
     # PySpark
-    - name: Install PyPy3
-      # SQL component also has Python related tests, for example, IntegratedUDFTestUtils.
-      # Note that order of Python installations here matters because default python3 is
-      # overridden by pypy3.
-      uses: actions/setup-python@v2
-      if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
-      with:
-        python-version: pypy3
-        architecture: x64
-    - name: Install Python 2.7
-      uses: actions/setup-python@v2
-      if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
-      with:
-        python-version: 2.7
-        architecture: x64
     - name: Install Python 3.6
       uses: actions/setup-python@v2
       if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
       with:
         python-version: 3.6
         architecture: x64
-    - name: Install Python packages
-      if: contains(matrix.modules, 'pyspark') || (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-'))
-      # PyArrow is not supported in PyPy yet, see ARROW-2651.
-      # TODO(SPARK-32247): scipy installation with PyPy fails for an unknown reason.
-      run: |
-        python3 -m pip install numpy pyarrow pandas scipy
-        python3 -m pip list
-        python2 -m pip install numpy pyarrow pandas scipy
-        python2 -m pip list
-        pypy3 -m pip install numpy pandas
-        pypy3 -m pip list
-    # SparkR
-    - name: Install R 3.6
-      uses: r-lib/actions/setup-r@v1
-      if: contains(matrix.modules, 'sparkr')
-      with:
-        r-version: 3.6
-    - name: Install R packages
-      if: contains(matrix.modules, 'sparkr')
-      run: |
-        sudo apt-get install -y libcurl4-openssl-dev
-        sudo Rscript -e "install.packages(c('knitr', 'rmarkdown', 'testthat', 'devtools', 'e1071', 'survival', 'arrow', 'roxygen2'), repos='https://cloud.r-project.org/')"
-        # Show installed packages in R.
-        sudo Rscript -e 'pkg_list <- as.data.frame(installed.packages()[, c(1,3:4)]); pkg_list[is.na(pkg_list$Priority), 1:2, drop = FALSE]'
-    # Run the tests.
     - name: "Run tests: ${{ matrix.modules }}"
       run: |
         # Hive tests become flaky when running in parallel as it's too intensive.
-        if [[ "$TEST_ONLY_MODULES" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi
+        git diff 
+        if [[ "$MODULES_TO_TEST" == "hive" ]]; then export SERIAL_SBT_TESTS=1; fi
         mkdir -p ~/.m2
-        ./dev/run-tests --parallelism 2
+        ./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST" --included-tags "$INCLUDED_TAGS" --excluded-tags "$EXCLUDED_TAGS"
         rm -rf ~/.m2/repository/org/apache/spark
-
-  # Static analysis, and documentation build
-  lint:
-    name: Linters, licenses, dependencies and documentation generation
-    runs-on: ubuntu-latest
-    steps:
-    - name: Checkout Spark repository
-      uses: actions/checkout@v2
-    - name: Cache Maven local repository
-      uses: actions/cache@v2
-      with:
-        path: ~/.m2/repository
-        key: docs-maven-repo-${{ hashFiles('**/pom.xml') }}
-        restore-keys: |
-          docs-maven-
-    - name: Install JDK 1.8
-      uses: actions/setup-java@v1
-      with:
-        java-version: 1.8
-    - name: Install Python 3.6
-      uses: actions/setup-python@v2
-      with:
-        python-version: 3.6
-        architecture: x64
-    - name: Install Python linter dependencies
-      run: |
-        pip3 install flake8 sphinx numpy
-    - name: Install R 3.6
-      uses: r-lib/actions/setup-r@v1
-      with:
-        r-version: 3.6
-    - name: Install R linter dependencies and SparkR
-      run: |
-        sudo apt-get install -y libcurl4-openssl-dev
-        sudo Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
-        sudo Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
-        ./R/install-dev.sh
-    - name: Install Ruby 2.7 for documentation generation
-      uses: actions/setup-ruby@v1
-      with:
-        ruby-version: 2.7
-    - name: Install dependencies for documentation generation
-      run: |
-        sudo apt-get install -y libcurl4-openssl-dev pandoc
-        pip install sphinx mkdocs numpy
-        gem install jekyll jekyll-redirect-from rouge
-        sudo Rscript -e "install.packages(c('devtools', 'testthat', 'knitr', 'rmarkdown', 'roxygen2'), repos='https://cloud.r-project.org/')"
-    - name: Scala linter
-      run: ./dev/lint-scala
-    - name: Java linter
-      run: ./dev/lint-java
-    - name: Python linter
-      run: ./dev/lint-python
-    - name: R linter
-      run: ./dev/lint-r
-    - name: License test
-      run: ./dev/check-license
-    - name: Dependencies test
-      run: ./dev/test-dependencies.sh
-    - name: Run documentation build
-      run: |
-        cd docs
-        jekyll build
diff --git a/R/README.md b/R/README.md
index 31174c73526f..bd59b3daad1d 100644
--- a/R/README.md
+++ b/R/README.md
@@ -1,4 +1,4 @@
-# R on Spark
+# R on Spark.
 
 SparkR is an R package that provides a light-weight frontend to use Spark from R.
 
diff --git a/dev/run-tests.py b/dev/run-tests.py
index 03cc3230a65f..c2ed518a4762 100755
--- a/dev/run-tests.py
+++ b/dev/run-tests.py
@@ -101,12 +101,14 @@ def setup_test_environ(environ):
         os.environ[k] = v
 
 
-def determine_modules_to_test(changed_modules):
+def determine_modules_to_test(changed_modules, deduplicated=True):
     """
     Given a set of modules that have changed, compute the transitive closure of those modules'
     dependent modules in order to determine the set of modules that should be tested.
 
     Returns a topologically-sorted list of modules (ties are broken by sorting on module names).
+    If ``deduplicated`` is disabled, the modules are returned without tacking the deduplication
+    by dependencies into account.
 
     >>> [x.name for x in determine_modules_to_test([modules.root])]
     ['root']
@@ -122,11 +124,29 @@ def determine_modules_to_test(changed_modules):
     ... # doctest: +NORMALIZE_WHITESPACE
     ['sql', 'avro', 'hive', 'mllib', 'sql-kafka-0-10', 'examples', 'hive-thriftserver',
      'pyspark-sql', 'repl', 'sparkr', 'pyspark-mllib', 'pyspark-ml']
+    >>> sorted([x.name for x in determine_modules_to_test(
+    ...     [modules.sparkr, modules.pyspark_sql], deduplicated=False)])
+    ... # doctest: +NORMALIZE_WHITESPACE
+    ['avro', 'examples', 'hive', 'hive-thriftserver', 'mllib', 'pyspark-ml',
+     'pyspark-mllib', 'pyspark-sql', 'repl', 'sparkr', 'sql', 'sql-kafka-0-10']
+    >>> sorted([x.name for x in determine_modules_to_test(
+    ...     [modules.sql, modules.core], deduplicated=False)])
+    ... # doctest: +NORMALIZE_WHITESPACE
+    ['avro', 'catalyst', 'core', 'examples', 'graphx', 'hive', 'hive-thriftserver',
+     'mllib', 'mllib-local', 'pyspark-core', 'pyspark-ml', 'pyspark-mllib',
+     'pyspark-resource', 'pyspark-sql', 'pyspark-streaming', 'repl', 'root',
+     'sparkr', 'sql', 'sql-kafka-0-10', 'streaming', 'streaming-kafka-0-10',
+     'streaming-kinesis-asl']
     """
     modules_to_test = set()
     for module in changed_modules:
-        modules_to_test = modules_to_test.union(determine_modules_to_test(module.dependent_modules))
+        modules_to_test = modules_to_test.union(
+            determine_modules_to_test(module.dependent_modules, deduplicated))
     modules_to_test = modules_to_test.union(set(changed_modules))
+
+    if not deduplicated:
+        return modules_to_test
+
     # If we need to run all of the tests, then we should short-circuit and return 'root'
     if modules.root in modules_to_test:
         return [modules.root]
@@ -539,6 +559,24 @@ def parse_opts():
         "-p", "--parallelism", type=int, default=8,
         help="The number of suites to test in parallel (default %(default)d)"
     )
+    parser.add_argument(
+        "-m", "--modules", type=str,
+        default=None,
+        help="A comma-separated list of modules to test "
+             "(default: %s)" % ",".join(sorted([m.name for m in modules.all_modules]))
+    )
+    parser.add_argument(
+        "-e", "--excluded-tags", type=str,
+        default=None,
+        help="A comma-separated list of tags to exclude in the tests, "
+             "e.g., org.apache.spark.tags.ExtendedHiveTest "
+    )
+    parser.add_argument(
+        "-i", "--included-tags", type=str,
+        default=None,
+        help="A comma-separated list of tags to include in the tests, "
+             "e.g., org.apache.spark.tags.ExtendedHiveTest "
+    )
 
     args, unknown = parser.parse_known_args()
     if unknown:
@@ -589,43 +627,79 @@ def main():
         # /home/jenkins/anaconda2/envs/py36/bin
         os.environ["PATH"] = "/home/anaconda/envs/py36/bin:" + os.environ.get("PATH")
     else:
-        # else we're running locally and can use local settings
+        # else we're running locally or Github Actions.
         build_tool = "sbt"
         hadoop_version = os.environ.get("HADOOP_PROFILE", "hadoop2.7")
         hive_version = os.environ.get("HIVE_PROFILE", "hive2.3")
-        test_env = "local"
+        if "GITHUB_ACTIONS" in os.environ:
+            test_env = "github_actions"
+        else:
+            test_env = "local"
 
     print("[info] Using build tool", build_tool, "with Hadoop profile", hadoop_version,
           "and Hive profile", hive_version, "under environment", test_env)
     extra_profiles = get_hadoop_profiles(hadoop_version) + get_hive_profiles(hive_version)
 
     changed_modules = None
+    test_modules = None
     changed_files = None
-    should_only_test_modules = "TEST_ONLY_MODULES" in os.environ
+    should_only_test_modules = opts.modules is not None
     included_tags = []
+    excluded_tags = []
     if should_only_test_modules:
-        str_test_modules = [m.strip() for m in os.environ.get("TEST_ONLY_MODULES").split(",")]
+        str_test_modules = [m.strip() for m in opts.modules.split(",")]
         test_modules = [m for m in modules.all_modules if m.name in str_test_modules]
-        # Directly uses test_modules as changed modules to apply tags and environments
-        # as if all specified test modules are changed.
+
+        # If we're running the tests in Github Actions, attempt to detect and test
+        # only the affected modules.
+        if test_env == "github_actions":
+            if os.environ["GITHUB_BASE_REF"] != "":
+                # Pull requests
+                changed_files = identify_changed_files_from_git_commits(
+                    os.environ["GITHUB_SHA"], target_branch=os.environ["GITHUB_BASE_REF"])
+            else:
+                # Build for each commit.
+                changed_files = identify_changed_files_from_git_commits(
+                    os.environ["GITHUB_SHA"], target_ref=os.environ["GITHUB_PREV_SHA"])
+            print("changed_files : %s" % changed_files)
+
+            modules_to_test = determine_modules_to_test(
+                determine_modules_for_files(changed_files), deduplicated=False)
+            print("modules_to_test : %s" % modules_to_test)
+
+            if modules.root not in modules_to_test:
+                # If root module does not exist, only test the intersected modules.
+                # If root module is found, just run the modules as specified initially.
+                test_modules = list(set(modules_to_test).intersection(test_modules))
+            print("test_modules : %s" % test_modules)
+
         changed_modules = test_modules
-        str_excluded_tags = os.environ.get("TEST_ONLY_EXCLUDED_TAGS", None)
-        str_included_tags = os.environ.get("TEST_ONLY_INCLUDED_TAGS", None)
-        excluded_tags = []
-        if str_excluded_tags:
-            excluded_tags = [t.strip() for t in str_excluded_tags.split(",")]
-        included_tags = []
-        if str_included_tags:
-            included_tags = [t.strip() for t in str_included_tags.split(",")]
+        if len(changed_modules) == 0:
+            print("[info] There are no modules to test, exiting without testing.")
+            return
+
+    # If we're running the tests in AMPLab Jenkins, calculate the diff from the targeted branch, and
+    # detect modules to test.
     elif test_env == "amplab_jenkins" and os.environ.get("AMP_JENKINS_PRB"):
         target_branch = os.environ["ghprbTargetBranch"]
         changed_files = identify_changed_files_from_git_commits("HEAD", target_branch=target_branch)
         changed_modules = determine_modules_for_files(changed_files)
+        test_modules = determine_modules_to_test(changed_modules)
         excluded_tags = determine_tags_to_exclude(changed_modules)
 
+    # If there is no changed module found, tests all.
     if not changed_modules:
         changed_modules = [modules.root]
-        excluded_tags = []
+    if not test_modules:
+        test_modules = determine_modules_to_test(changed_modules)
+
+    str_excluded_tags = opts.excluded_tags
+    str_included_tags = opts.included_tags
+    if str_excluded_tags:
+        excluded_tags.extend([t.strip() for t in str_excluded_tags.split(",")])
+    if str_included_tags:
+        included_tags.extend([t.strip() for t in str_included_tags.split(",")])
+
     print("[info] Found the following changed modules:",
           ", ".join(x.name for x in changed_modules))
 
@@ -640,8 +714,6 @@ def main():
 
     should_run_java_style_checks = False
     if not should_only_test_modules:
-        test_modules = determine_modules_to_test(changed_modules)
-
         # license checks
         run_apache_rat_checks()
 
@@ -672,40 +744,43 @@ def main():
     # if "DOCS" in changed_modules and test_env == "amplab_jenkins":
     #    build_spark_documentation()
 
-    if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins":
-        run_build_tests()
-
-    # spark build
-    build_apache_spark(build_tool, extra_profiles)
-
-    # backwards compatibility checks
-    if build_tool == "sbt":
-        # Note: compatibility tests only supported in sbt for now
-        detect_binary_inop_with_mima(extra_profiles)
-        # Since we did not build assembly/package before running dev/mima, we need to
-        # do it here because the tests still rely on it; see SPARK-13294 for details.
-        build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
-
-    # run the test suites
-    run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags)
-
-    modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
-    if modules_with_python_tests:
-        # We only run PySpark tests with coverage report in one specific job with
-        # Spark master with SBT in Jenkins.
-        is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ
-        run_python_tests(
-            modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job)
-        run_python_packaging_tests()
-    if any(m.should_run_r_tests for m in test_modules):
-        run_sparkr_tests()
+    print(changed_modules)
+    print(test_modules)
+    print([m for m in test_modules if m.python_test_goals])
+    print([m.should_run_r_tests for m in test_modules])
+    print(excluded_tags)
+    print(included_tags)
+
+    # if any(m.should_run_build_tests for m in test_modules) and test_env != "amplab_jenkins":
+    #     run_build_tests()
+    #
+    # # spark build
+    # build_apache_spark(build_tool, extra_profiles)
+    #
+    # # backwards compatibility checks
+    # if build_tool == "sbt":
+    #     # Note: compatibility tests only supported in sbt for now
+    #     detect_binary_inop_with_mima(extra_profiles)
+    #     # Since we did not build assembly/package before running dev/mima, we need to
+    #     # do it here because the tests still rely on it; see SPARK-13294 for details.
+    #     build_spark_assembly_sbt(extra_profiles, should_run_java_style_checks)
+    #
+    # # run the test suites
+    # run_scala_tests(build_tool, extra_profiles, test_modules, excluded_tags, included_tags)
+    #
+    # modules_with_python_tests = [m for m in test_modules if m.python_test_goals]
+    # if modules_with_python_tests:
+    #     # We only run PySpark tests with coverage report in one specific job with
+    #     # Spark master with SBT in Jenkins.
+    #     is_sbt_master_job = "SPARK_MASTER_SBT_HADOOP_2_7" in os.environ
+    #     run_python_tests(
+    #         modules_with_python_tests, opts.parallelism, with_coverage=is_sbt_master_job)
+    #     run_python_packaging_tests()
+    # if any(m.should_run_r_tests for m in test_modules):
+    #     run_sparkr_tests()
 
 
 def _test():
-    if "TEST_ONLY_MODULES" in os.environ:
-        # TODO(SPARK-32252): Enable doctests back in Github Actions.
-        return
-
     import doctest
     failure_count = doctest.testmod()[0]
     if failure_count:
@@ -713,5 +788,5 @@ def _test():
 
 
 if __name__ == "__main__":
-    _test()
+    # _test()
     main()
diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py
index 5f4a8a2d2db1..bb192b487d4c 100644
--- a/python/pyspark/worker.py
+++ b/python/pyspark/worker.py
@@ -16,7 +16,7 @@
 #
 
 """
-Worker that receives input from Piped RDD.
+Worker that receives input from Piped RDD
 """
 from __future__ import print_function
 from __future__ import absolute_import
diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index c2ed4c079d3c..d42de271ce95 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -76,7 +76,7 @@
 
     <!--
       This spark-tags test-dep is needed even though it isn't used in this module, otherwise testing-cmds that exclude
-      them will yield errors.
+      them will yield errorts.
     -->
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
index da542c67d9c5..5898174657c0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Column.scala
@@ -61,10 +61,10 @@ private[sql] object Column {
 
 /**
  * A [[Column]] where an [[Encoder]] has been given for the expected input and return type.
- * To create a [[TypedColumn]], use the `as` function on a [[Column]].
+ * To  create a [[TypedColumn]], use the `as` function on a [[Column]]
  *
  * @tparam T The input type expected for this expression.  Can be `Any` if the expression is type
- *           checked by the analyzer instead of the compiler (i.e. `expr("sum(...)")`).
+ *           checked by the analyzer instead of the compiler (i.e. `expr("sum(...)")`)
  * @tparam U The output type of this column.
  *
  * @since 1.6.0