[SPARK-2948] [SPARK-2910] [SPARK-2101] Python 2.6 fixes

- Modify python/run-tests to test with Python 2.6, if available. - Use unittest2 when running on Python 2.6. - Fix issue with namedtuple. - Skip TestOutputFormat.test_newhadoop on Python 2.6 until SPARK-2951 is fixed. Closes #1868. Closes #1042.
apache · JoshRosen · Aug 9, 2014 · Aug 10, 2014 · Aug 10, 2014 · 5d18fd7af6b78d99b3efb8de2dae45560900713a
commit 5d18fd7af6b78d99b3efb8de2dae45560900713a
diff --git a/python/pyspark/mllib/tests.py b/python/pyspark/mllib/tests.py
@@ -19,8 +19,13 @@
 Fuller unit tests for Python MLlib.
 """
 
+import sys
 from numpy import array, array_equal
-import unittest
+
+if sys.version_info[:2] <= (2, 6):
+    import unittest2 as unittest
+else:
+    import unittest
 
 from pyspark.mllib._common import _convert_vector, _serialize_double_vector, \
     _deserialize_double_vector, _dot, _squared_distance

diff --git a/python/pyspark/serializers.py b/python/pyspark/serializers.py
@@ -314,8 +314,8 @@ def _copy_func(f):
 
     _old_namedtuple = _copy_func(collections.namedtuple)
 
-    def namedtuple(name, fields, verbose=False, rename=False):
-        cls = _old_namedtuple(name, fields, verbose, rename)
+    def namedtuple(*args, **kwargs):
+        cls = _old_namedtuple(*args, **kwargs)
         return _hack_namedtuple(cls)
 
     # replace namedtuple with new one

diff --git a/python/pyspark/tests.py b/python/pyspark/tests.py
@@ -29,9 +29,14 @@
 import sys
 import tempfile
 import time
-import unittest
 import zipfile
 
+if sys.version_info[:2] <= (2, 6):
+    import unittest2 as unittest
+else:
+    import unittest
+
+
 from pyspark.context import SparkContext
 from pyspark.files import SparkFiles
 from pyspark.serializers import read_int
@@ -605,6 +610,7 @@ def test_oldhadoop(self):
             conf=input_conf).collect())
         self.assertEqual(old_dataset, dict_data)
 
+    @unittest.skipIf(sys.version_info[:2] <= (2, 6), "Skipped on 2.6 until SPARK-2951 is fixed")
     def test_newhadoop(self):
         basepath = self.tempdir.name
         # use custom ArrayWritable types and converters to handle arrays
@@ -905,8 +911,9 @@ def createFileInZip(self, name, content):
         pattern = re.compile(r'^ *\|', re.MULTILINE)
         content = re.sub(pattern, '', content.strip())
         path = os.path.join(self.programDir, name + ".zip")
-        with zipfile.ZipFile(path, 'w') as zip:
-            zip.writestr(name, content)
+        zip = zipfile.ZipFile(path, 'w')
+        zip.writestr(name, content)
+        zip.close()
         return path
 
     def test_single_script(self):

diff --git a/python/run-tests b/python/run-tests
@@ -48,6 +48,14 @@ function run_test() {
 
 echo "Running PySpark tests. Output is in python/unit-tests.log."
 
+# Try to test with Python 2.6, since that's the minimum version that we support:
+if [ $(which python2.6) ]; then
+    export PYSPARK_PYTHON="python2.6"
+fi
+
+echo "Testing with Python version:"
+$PYSPARK_PYTHON --version
+
 run_test "pyspark/rdd.py"
 run_test "pyspark/context.py"
 run_test "pyspark/conf.py"