Skip to content
Closed
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion python/pyspark/mllib/recommendation.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import array
from collections import namedtuple

from pyspark import SparkContext
from pyspark import SparkContext, since
from pyspark.rdd import RDD
from pyspark.mllib.common import JavaModelWrapper, callMLlibFunc, inherit_doc
from pyspark.mllib.util import JavaLoader, JavaSaveable
Expand All @@ -36,6 +36,8 @@ class Rating(namedtuple("Rating", ["user", "product", "rating"])):
(1, 2, 5.0)
>>> (r[0], r[1], r[2])
(1, 2, 5.0)

.. addedversion:: 1.2.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be .. versionadded::

"""

def __reduce__(self):
Expand Down Expand Up @@ -111,13 +113,17 @@ class MatrixFactorizationModel(JavaModelWrapper, JavaSaveable, JavaLoader):
... rmtree(path)
... except OSError:
... pass

.. addedversion:: 0.9.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be .. versionadded::

"""
@since("0.9.0")
def predict(self, user, product):
"""
Predicts rating for the given user and product.
"""
return self._java_model.predict(int(user), int(product))

@since("0.9.0")
def predictAll(self, user_product):
"""
Returns a list of predicted ratings for input user and product pairs.
Expand All @@ -128,27 +134,31 @@ def predictAll(self, user_product):
user_product = user_product.map(lambda u_p: (int(u_p[0]), int(u_p[1])))
return self.call("predict", user_product)

@since("1.2.0")
def userFeatures(self):
"""
Returns a paired RDD, where the first element is the user and the
second is an array of features corresponding to that user.
"""
return self.call("getUserFeatures").mapValues(lambda v: array.array('d', v))

@since("1.2.0")
def productFeatures(self):
"""
Returns a paired RDD, where the first element is the product and the
second is an array of features corresponding to that product.
"""
return self.call("getProductFeatures").mapValues(lambda v: array.array('d', v))

@since("1.4.0")
def recommendUsers(self, product, num):
"""
Recommends the top "num" number of users for a given product and returns a list
of Rating objects sorted by the predicted rating in descending order.
"""
return list(self.call("recommendUsers", product, num))

@since("1.4.0")
def recommendProducts(self, user, num):
"""
Recommends the top "num" number of products for a given user and returns a list
Expand All @@ -157,17 +167,25 @@ def recommendProducts(self, user, num):
return list(self.call("recommendProducts", user, num))

@property
@since("1.3.1")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Think this was only added in 1.4.0

def rank(self):
"""Rank for the features in this model"""
return self.call("rank")

@classmethod
@since("1.3.1")
def load(cls, sc, path):
"""Load a model from the given path"""
model = cls._load_java(sc, path)
wrapper = sc._jvm.MatrixFactorizationModelWrapper(model)
return MatrixFactorizationModel(wrapper)


class ALS(object):
"""Alternating Least Squares matrix factorization

.. addedversion:: 0.9.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same again - should be .. versionadded::

"""

@classmethod
def _prepare(cls, ratings):
Expand All @@ -188,15 +206,31 @@ def _prepare(cls, ratings):
return ratings

@classmethod
@since("0.9.0")
def train(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, nonnegative=False,
seed=None):
"""
Train a matrix factorization model given an RDD of ratings given by users to some products,
in the form of (userID, productID, rating) pairs. We approximate the ratings matrix as the
product of two lower-rank matrices of a given rank (number of features). To solve for these
features, we run a given number of iterations of ALS. This is done using a level of
parallelism given by `blocks`.
"""
model = callMLlibFunc("trainALSModel", cls._prepare(ratings), rank, iterations,
lambda_, blocks, nonnegative, seed)
return MatrixFactorizationModel(model)

@classmethod
@since("0.9.0")
def trainImplicit(cls, ratings, rank, iterations=5, lambda_=0.01, blocks=-1, alpha=0.01,
nonnegative=False, seed=None):
"""
Train a matrix factorization model given an RDD of 'implicit preferences' given by users
to some products, in the form of (userID, productID, preference) pairs. We approximate the
ratings matrix as the product of two lower-rank matrices of a given rank (number of
features). To solve for these features, we run a given number of iterations of ALS.
This is done using a level of parallelism given by `blocks`.
"""
model = callMLlibFunc("trainImplicitALSModel", cls._prepare(ratings), rank,
iterations, lambda_, blocks, alpha, nonnegative, seed)
return MatrixFactorizationModel(model)
Expand Down