[SPARK-1267][PYSPARK] Adds pip installer for pyspark

apache · alope107 · Aug 19, 2015 · Aug 19, 2015 · Aug 19, 2015 · Aug 20, 2015
commit a288923d7600055b9af346ed74f88c7be598fbb1
diff --git a/python/pyspark/__init__.py b/python/pyspark/__init__.py
@@ -36,6 +36,33 @@
       Finer-grained cache persistence levels.
 
 """
+import os
+import sys
+
+import xml.etree.ElementTree as ET
+
+if (os.environ.get("SPARK_HOME", "not found") == "not found"):
+    raise ImportError("Environment variable SPARK_HOME is undefined.")
+
+spark_home = os.environ['SPARK_HOME']
+pom_xml_file_path = spark_home + '/pom.xml'
+
+try:
+    tree = ET.parse(pom_xml_file_path)
+    root = tree.getroot()
+    version_tag = root[4].text
+    snapshot_version = version_tag[:5]
+except:
+    raise ImportError("Could not read the spark version, because pom.xml file" +
+                      " is not found in SPARK_HOME(%s) directory." % (spark_home))
+
+from pyspark.pyspark_version import __version__
+if (snapshot_version != __version__):
+    raise ImportError("Incompatible version of Spark(%s) and PySpark(%s)." %
+                      (snapshot_version, __version__))
+
+sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j-0.8.1-src.zip"))
+
 
 from pyspark.conf import SparkConf
 from pyspark.context import SparkContext

diff --git a/python/pyspark/pyspark_version.py b/python/pyspark/pyspark_version.py
@@ -0,0 +1,17 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+__version__ = '1.5.0'
diff --git a/python/setup.py b/python/setup.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python
+
+from setuptools import setup
+
+exec(compile(open("pyspark/pyspark_version.py").read(), 
+   "pyspark/pyspark_version.py", 'exec'))
+VERSION = __version__
+
+setup(name = 'pyspark',
+   version = VERSION,
+   description = 'Apache Spark Python API',
+   author = 'Prabin Banka',
+   author_email = '[email protected]',
+   url = 'https://github.com/apache/spark/tree/master/python',
+   packages = ['pyspark', 'pyspark.mllib', 'pyspark.ml', 'pyspark.sql', 'pyspark.streaming'],
+   data_files = [('pyspark', ['pyspark/pyspark_version.py'])],
+   install_requires = ['numpy>=1.7', 'py4j==0.8.2.1', 'pandas'],
+   license = 'http://www.apache.org/licenses/LICENSE-2.0',
+   )