-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-7017][Build][Project Infra]: Refactor dev/run-tests into Python #5694
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
6126c4f
3c53a1a
639f1e9
2cb413b
ec03bf3
07210a9
26e18e8
c095fa6
83e80ef
b0b2604
803143a
a5bd445
7613558
b37328c
56d3cb9
e4a96cc
76335fb
2386785
983f2a2
f041d8a
d825aa4
9a592ec
1dada6b
afeb093
b1ca593
703f095
f950010
6d0a052
f9deba1
b1248dc
d90ab2d
0629de8
8afbe93
1f607b1
2fcdfc0
1ecca26
db7ae6f
eb684b6
2898717
7d2f5e2
60b3d51
705d12e
03fdd7b
b7c72b9
ec1ae78
aa03d9e
0379833
fb85a41
c42cf9a
767a668
2dff136
22edb78
05d435b
8135518
f9fbe54
3922a85
154ed73
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
…h level project changes to properly execute core tests only when necessary, changed variable names for simplicity
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -159,6 +159,11 @@ def run_python_style_checks(): | |
| run_cmd(["./dev/lint-python"]) | ||
|
|
||
|
|
||
| def build_spark_documentation(): | ||
| set_title_and_block("Building Spark Documentation", "BLOCK_DOCUMENTATION") | ||
| os.environ["PRODUCTION"] = "1 jekyll build" | ||
|
|
||
|
|
||
| def exec_maven(mvn_args=[]): | ||
| """Will call Maven in the current directory with the list of mvn_args passed | ||
| in and returns the subprocess for any further processing""" | ||
|
|
@@ -215,21 +220,26 @@ def get_hadoop_profiles(hadoop_version): | |
|
|
||
| def get_build_profiles(hadoop_version="hadoop2.3", | ||
| enable_base_profiles=True, | ||
| enable_hive_profiles=False): | ||
| enable_hive_profiles=False, | ||
| enable_doc_profiles=False): | ||
| """Returns a list of hadoop profiles to be used as looked up from the passed in hadoop profile | ||
| key with the option of adding on the base and hive profiles.""" | ||
|
|
||
| base_profiles = ["-Pkinesis-asl"] | ||
| hive_profiles = ["-Phive", "-Phive-thriftserver"] | ||
| doc_profiles = [] | ||
| hadoop_profiles = get_hadoop_profiles(hadoop_version) | ||
|
|
||
| build_profiles = hadoop_profiles | ||
|
|
||
| if enable_base_profiles: | ||
| build_profiles = build_profiles + base_profiles | ||
| build_profiles += base_profiles | ||
|
|
||
| if enable_hive_profiles: | ||
| build_profiles = build_profiles + hive_profiles | ||
| build_profiles += hive_profiles | ||
|
|
||
| if enable_doc_profiles: | ||
| build_profiles += doc_profiles | ||
|
|
||
| return build_profiles | ||
|
|
||
|
|
@@ -259,7 +269,7 @@ def build_spark_sbt(hadoop_version): | |
| exec_sbt(profiles_and_goals) | ||
|
|
||
|
|
||
| def build_apache_spark(build_tool, hadoop_version): | ||
| def build_apache_spark(build_tool, hadoop_version, changed_modules): | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
| """Will build Spark against Hive v0.13.1 given the passed in build tool (either `sbt` or | ||
| `maven`). Defaults to using `sbt`.""" | ||
|
|
||
|
|
@@ -284,7 +294,7 @@ def identify_changed_modules(test_env): | |
| If run under the `amplab_jenkins` environment will determine the changed files | ||
| as compared to the `ghprbTargetBranch` and execute the necessary set of tests | ||
| to provide coverage for the changed code.""" | ||
| test_suite = set() | ||
| changed_modules = set() | ||
|
|
||
| if test_env == "amplab_jenkins": | ||
| target_branch = os.environ["ghprbTargetBranch"] | ||
|
|
@@ -295,7 +305,6 @@ def identify_changed_modules(test_env): | |
| # remove any empty strings | ||
| changed_files = [f for f in raw_output.split('\n') if f] | ||
|
|
||
| # find any sql files | ||
| sql_files = [f for f in changed_files | ||
| if any(f.startswith(p) for p in | ||
| ["sql/", | ||
|
|
@@ -322,31 +331,39 @@ def identify_changed_modules(test_env): | |
| if any(f.startswith(p) for p in | ||
| ["examples/src/main/scala/org/apache/spark/examples/graphx/", | ||
| "graphx/"])] | ||
|
|
||
| non_sql_files = set(changed_files).difference(set(sql_files)) | ||
|
|
||
| if non_sql_files: | ||
| test_suite.add("CORE") | ||
| doc_files = [f for f in changed_files if f.startswith("docs/")] | ||
|
|
||
| # union together all changed top level project files | ||
| top_level_project_files = set().union([set(f) for f in [sql_files, | ||
| mllib_files, | ||
| streaming_files, | ||
| graphx_files, | ||
| doc_files]]) | ||
| changed_core_files = set(changed_files).difference(top_level_project_files) | ||
|
|
||
| if changed_core_files: | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It would also be nice to log a "Detected changes in Core" message in this branch. |
||
| changed_modules.add("CORE") | ||
| if sql_files: | ||
| print "[info] Detected changes in SQL. Will run Hive test suite." | ||
| test_suite.add("SQL") | ||
| if not non_sql_files: | ||
| print "[info] Detected no changes except in SQL. Will only run SQL tests." | ||
| changed_modules.add("SQL") | ||
| if mllib_files: | ||
| print "[info] Detected changes in MLlib. Will run MLlib test suite." | ||
| test_suite.add("MLLIB") | ||
| changed_modules.add("MLLIB") | ||
| if streaming_files: | ||
| print "[info] Detected changes in Streaming. Will run Streaming test suite." | ||
| test_suite.add("STREAMING") | ||
| changed_modules.add("STREAMING") | ||
| if graphx_files: | ||
| print "[info] Detected changes in GraphX. Will run GraphX test suite." | ||
| test_suite.add("GRAPHX") | ||
| changed_modules.add("GRAPHX") | ||
| if doc_files: | ||
| print "[info] Detected changes in documentation. Will build spark with documentation." | ||
| changed_modules.add("DOCS") | ||
|
|
||
| return test_suite | ||
| return changed_modules | ||
| else: | ||
| # we aren't in the Amplab environment so simply run all tests | ||
| test_suite.add("ALL") | ||
| return test_suite | ||
| changed_modules.add("ALL") | ||
| return changed_modules | ||
|
|
||
|
|
||
| def run_scala_tests_maven(test_profiles): | ||
|
|
@@ -482,15 +499,21 @@ def main(): | |
| run_scala_style_checks() | ||
| run_python_style_checks() | ||
|
|
||
| # determine high level changes | ||
| changed_modules = identify_changed_modules(test_env) | ||
|
|
||
| # determine if docs were changed and if we're inside the amplab environment | ||
| if "DOCS" in changed_modules and test_env == "amplab_jenkins": | ||
| build_spark_documentation() | ||
|
|
||
| # spark build | ||
| build_apache_spark(build_tool, hadoop_version) | ||
| build_apache_spark(build_tool, hadoop_version, changed_modules) | ||
|
|
||
| # backwards compatibility checks | ||
| detect_binary_inop_with_mima() | ||
|
|
||
| # test suites | ||
| test_modules = identify_changed_modules(test_env) | ||
| run_scala_tests(build_tool, hadoop_version, test_modules) | ||
| # run the test suites | ||
| run_scala_tests(build_tool, hadoop_version, changed_modules) | ||
| run_python_tests() | ||
| run_sparkr_tests() | ||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We already handle the default Hadoop version elsewhere, so I think that we shouldn't also declare it as a default argument here.