-
Notifications
You must be signed in to change notification settings - Fork 29k
[SPARK-18652][PYTHON] Include the example data and third-party licenses in pyspark package. #16082
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
43019db
9735e20
ab51ae3
90ca61d
5706d8c
8d3ef53
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
- Loading branch information
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -74,7 +74,10 @@ | |
| JARS_TARGET = os.path.join(TEMP_PATH, "jars") | ||
| EXAMPLES_TARGET = os.path.join(TEMP_PATH, "examples") | ||
| DATA_TARGET = os.path.join(TEMP_PATH, "data") | ||
| LICENSES_PATH = os.path.join(SPARK_HOME, "licenses") | ||
| LICENSES_TARGET = os.path.join(TEMP_PATH, "licenses") | ||
|
|
||
| data_files = glob.glob(os.path.join(LICENSES_PATH, "*")) | ||
|
||
|
|
||
| # Check and see if we are under the spark path in which case we need to build the symlink farm. | ||
| # This is important because we only want to build the symlink farm while under Spark otherwise we | ||
|
|
@@ -117,12 +120,14 @@ def _supports_symlinks(): | |
| os.symlink(SCRIPTS_PATH, SCRIPTS_TARGET) | ||
| os.symlink(EXAMPLES_PATH, EXAMPLES_TARGET) | ||
| os.symlink(DATA_PATH, DATA_TARGET) | ||
| os.symlink(LICENSES_PATH, LICENSES_TARGET) | ||
| else: | ||
| # For windows fall back to the slower copytree | ||
| copytree(JARS_PATH, JARS_TARGET) | ||
| copytree(SCRIPTS_PATH, SCRIPTS_TARGET) | ||
| copytree(EXAMPLES_PATH, EXAMPLES_TARGET) | ||
| copytree(DATA_PATH, DATA_TARGET) | ||
| copytree(LICENSES_PATH, LICENSES_TARGET) | ||
| else: | ||
| # If we are not inside of SPARK_HOME verify we have the required symlink farm | ||
| if not os.path.exists(JARS_TARGET): | ||
|
|
@@ -166,21 +171,25 @@ def _supports_symlinks(): | |
| 'pyspark.python.pyspark', | ||
| 'pyspark.python.lib', | ||
| 'pyspark.data', | ||
| 'pyspark.licenses', | ||
| 'pyspark.examples.src.main.python'], | ||
| include_package_data=True, | ||
| package_dir={ | ||
| 'pyspark.jars': 'deps/jars', | ||
| 'pyspark.bin': 'deps/bin', | ||
| 'pyspark.python.lib': 'lib', | ||
| 'pyspark.data': 'deps/data', | ||
| 'pyspark.licenses': 'deps/licenses', | ||
| 'pyspark.examples.src.main.python': 'deps/examples', | ||
| }, | ||
| package_data={ | ||
| 'pyspark.jars': ['*.jar'], | ||
| 'pyspark.bin': ['*'], | ||
| 'pyspark.python.lib': ['*.zip'], | ||
| 'pyspark.data': ['*'], | ||
| 'pyspark.licenses': ['*.txt'], | ||
| 'pyspark.examples.src.main.python': ['*.py', '*/*.py']}, | ||
| data_files=[('', data_files)], | ||
|
||
| scripts=scripts, | ||
| license='http://www.apache.org/licenses/LICENSE-2.0', | ||
| install_requires=['py4j==0.10.4'], | ||
|
|
@@ -210,9 +219,11 @@ def _supports_symlinks(): | |
| os.remove(os.path.join(TEMP_PATH, "bin")) | ||
| os.remove(os.path.join(TEMP_PATH, "examples")) | ||
| os.remove(os.path.join(TEMP_PATH, "data")) | ||
| os.remove(os.path.join(TEMP_PATH, "licenses")) | ||
| else: | ||
| rmtree(os.path.join(TEMP_PATH, "jars")) | ||
| rmtree(os.path.join(TEMP_PATH, "bin")) | ||
| rmtree(os.path.join(TEMP_PATH, "examples")) | ||
| rmtree(os.path.join(TEMP_PATH, "data")) | ||
| rmtree(os.path.join(TEMP_PATH, "licenses")) | ||
| os.rmdir(TEMP_PATH) | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Super minor, and sorry I didn't notice it earlier, but we define all of the _PATHs before the target - just for someone skimming the code would be good to keep the current flow.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Done. Good catch!