Skip to content
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
[SPARK-2954] Fix MLlib _deserialize_double on Python 2.6.
  • Loading branch information
JoshRosen committed Aug 10, 2014
commit 983d25947fa2ebadee0360ae4ca54538a5ae4338
11 changes: 10 additions & 1 deletion python/pyspark/mllib/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#

import struct
import sys
import numpy
from numpy import ndarray, float64, int64, int32, array_equal, array
from pyspark import SparkContext, RDD
Expand Down Expand Up @@ -78,6 +79,14 @@
LABELED_POINT_MAGIC = 4


# Workaround for SPARK-2954: before Python 2.7, struct.unpack couldn't unpack bytearray()s.
if sys.version_info[:2] <= (2, 6):
def _unpack(fmt, string):
return struct.unpack(fmt, buffer(string))
else:
_unpack = struct.unpack


def _deserialize_numpy_array(shape, ba, offset, dtype=float64):
"""
Deserialize a numpy array of the given type from an offset in
Expand Down Expand Up @@ -191,7 +200,7 @@ def _deserialize_double(ba, offset=0):
raise TypeError("_deserialize_double called on a %s; wanted bytearray" % type(ba))
if len(ba) - offset != 8:
raise TypeError("_deserialize_double called on a %d-byte array; wanted 8 bytes." % nb)
return struct.unpack("d", ba[offset:])[0]
return _unpack("d", ba[offset:])[0]


def _deserialize_double_vector(ba, offset=0):
Expand Down