Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@ import org.apache.spark.storage.StorageLevel
*
* @param weights Weights computed for every feature.
* @param intercept Intercept computed for this model.
*
* @since 0.8.0
*/
@DeveloperApi
abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double)
Expand All @@ -53,6 +55,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
*
* @param testData RDD representing data points to be predicted
* @return RDD[Double] where each entry contains the corresponding prediction
*
* @since 0.8.0
*/
def predict(testData: RDD[Vector]): RDD[Double] = {
// A small optimization to avoid serializing the entire model. Only the weightsMatrix
Expand All @@ -71,13 +75,17 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
*
* @param testData array representing a single data point
* @return Double prediction from the trained model
*
* @since 0.8.0
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How were you able to tag this? Any version less that 1.0.0 gives me fatal: Invalid object name error.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I hadn't checked for the right signature of the method. I will correct this and add other updates to the PR soon.

*/
def predict(testData: Vector): Double = {
predictPoint(testData, weights, intercept)
}

/**
* Print a summary of the model.
*
* @since 1.2.0
*/
override def toString: String = {
s"${this.getClass.getName}: intercept = ${intercept}, numFeatures = ${weights.size}"
Expand All @@ -88,14 +96,19 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
* :: DeveloperApi ::
* GeneralizedLinearAlgorithm implements methods to train a Generalized Linear Model (GLM).
* This class should be extended with an Optimizer to create a new GLM.
*
* @since 0.8.0
*/
@DeveloperApi
abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
extends Logging with Serializable {

protected val validators: Seq[RDD[LabeledPoint] => Boolean] = List()

/** The optimizer to solve the problem. */
/** The optimizer to solve the problem.
*
* @since 1.0.0
*/
def optimizer: Optimizer

/** Whether to add intercept (default: false). */
Expand Down Expand Up @@ -130,6 +143,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]

/**
* The dimension of training features.
*
* @since 1.4.0
*/
def getNumFeatures: Int = this.numFeatures

Expand All @@ -146,19 +161,21 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
this
}

/**
* Create a model given the weights and intercept
*/
/* Create a model given the weights and intercept */
protected def createModel(weights: Vector, intercept: Double): M

/**
* Get if the algorithm uses addIntercept
*
* @since 1.4.0
*/
def isAddIntercept: Boolean = this.addIntercept

/**
* Set if the algorithm should add an intercept. Default false.
* We set the default to false because adding the intercept will cause memory allocation.
*
* @since 0.8.0
*/
def setIntercept(addIntercept: Boolean): this.type = {
this.addIntercept = addIntercept
Expand All @@ -167,6 +184,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]

/**
* Set if the algorithm should validate data before training. Default true.
*
* @since 0.8.0
*/
def setValidateData(validateData: Boolean): this.type = {
this.validateData = validateData
Expand All @@ -176,6 +195,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
/**
* Run the algorithm with the configured parameters on an input
* RDD of LabeledPoint entries.
*
* @since 0.8.0
*/
def run(input: RDD[LabeledPoint]): M = {
if (numFeatures < 0) {
Expand Down Expand Up @@ -208,6 +229,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
/**
* Run the algorithm with the configured parameters on an input RDD
* of LabeledPoint entries starting from the initial weights provided.
*
* @since 0.8.0
*/
def run(input: RDD[LabeledPoint], initialWeights: Vector): M = {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@ import org.apache.spark.sql.SQLContext
* @param predictions Array of predictions associated to the boundaries at the same index.
* Results of isotonic regression and therefore monotone.
* @param isotonic indicates whether this is isotonic or antitonic.
*
* @since 1.3.0
*/
@Experimental
class IsotonicRegressionModel (
Expand All @@ -59,7 +61,10 @@ class IsotonicRegressionModel (
assertOrdered(boundaries)
assertOrdered(predictions)(predictionOrd)

/** A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter. */
/** A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter.
*
* @since 1.4.0
*/
def this(boundaries: java.lang.Iterable[Double],
predictions: java.lang.Iterable[Double],
isotonic: java.lang.Boolean) = {
Expand All @@ -83,6 +88,8 @@ class IsotonicRegressionModel (
*
* @param testData Features to be labeled.
* @return Predicted labels.
*
* @since 1.3.0
*/
def predict(testData: RDD[Double]): RDD[Double] = {
testData.map(predict)
Expand All @@ -94,6 +101,8 @@ class IsotonicRegressionModel (
*
* @param testData Features to be labeled.
* @return Predicted labels.
*
* @since 1.3.0
*/
def predict(testData: JavaDoubleRDD): JavaDoubleRDD = {
JavaDoubleRDD.fromRDD(predict(testData.rdd.retag.asInstanceOf[RDD[Double]]))
Expand All @@ -114,6 +123,8 @@ class IsotonicRegressionModel (
* 3) If testData falls between two values in boundary array then prediction is treated
* as piecewise linear function and interpolated value is returned. In case there are
* multiple values with the same boundary then the same rules as in 2) are used.
*
* @since 1.3.0
*/
def predict(testData: Double): Double = {

Expand Down Expand Up @@ -147,14 +158,21 @@ class IsotonicRegressionModel (

/** A convenient method for boundaries called by the Python API. */
private[mllib] def predictionVector: Vector = Vectors.dense(predictions)

/*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be /**

* @since 1.4.0
*/
override def save(sc: SparkContext, path: String): Unit = {
IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic)
}

/*
* @since 1.4.0
*/
override protected def formatVersion: String = "1.0"
}

/*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

* @since 1.4.0
*/
object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {

import org.apache.spark.mllib.util.Loader._
Expand Down Expand Up @@ -200,6 +218,9 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
}
}

/*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

* @since 1.4.0
*/
override def load(sc: SparkContext, path: String): IsotonicRegressionModel = {
implicit val formats = DefaultFormats
val (loadedClassName, version, metadata) = loadMetadata(sc, path)
Expand Down Expand Up @@ -237,6 +258,8 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
* Available from [[http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf]]
*
* @see [[http://en.wikipedia.org/wiki/Isotonic_regression Isotonic regression (Wikipedia)]]
*
* @since 1.3.0
*/
@Experimental
class IsotonicRegression private (private var isotonic: Boolean) extends Serializable {
Expand All @@ -245,6 +268,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
* Constructs IsotonicRegression instance with default parameter isotonic = true.
*
* @return New instance of IsotonicRegression.
*
* @since 1.3.0
*/
def this() = this(true)

Expand All @@ -253,6 +278,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
*
* @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence.
* @return This instance of IsotonicRegression.
*
* @since 1.3.0
*/
def setIsotonic(isotonic: Boolean): this.type = {
this.isotonic = isotonic
Expand All @@ -268,6 +295,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
* If multiple labels share the same feature value then they are ordered before
* the algorithm is executed.
* @return Isotonic regression model.
*
* @since 1.3.0
*/
def run(input: RDD[(Double, Double, Double)]): IsotonicRegressionModel = {
val preprocessedInput = if (isotonic) {
Expand All @@ -293,6 +322,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
* If multiple labels share the same feature value then they are ordered before
* the algorithm is executed.
* @return Isotonic regression model.
*
* @since 1.3.0
*/
def run(input: JavaRDD[(JDouble, JDouble, JDouble)]): IsotonicRegressionModel = {
run(input.rdd.retag.asInstanceOf[RDD[(Double, Double, Double)]])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,21 +28,30 @@ import org.apache.spark.SparkException
*
* @param label Label for this data point.
* @param features List of features for this data point.
*
* @since 0.8.0
*/
@BeanInfo
case class LabeledPoint(label: Double, features: Vector) {
/*
* @since 0.9.0
*/
override def toString: String = {
s"($label,$features)"
}
}

/**
* Parser for [[org.apache.spark.mllib.regression.LabeledPoint]].
*
* @since 1.1.0
*/
object LabeledPoint {
/**
* Parses a string resulted from `LabeledPoint#toString` into
* an [[org.apache.spark.mllib.regression.LabeledPoint]].
*
* @since 1.1.0
*/
def parse(s: String): LabeledPoint = {
if (s.startsWith("(")) {
Expand Down
25 changes: 25 additions & 0 deletions mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ import org.apache.spark.rdd.RDD
*
* @param weights Weights computed for every feature.
* @param intercept Intercept computed for this model.
*
* @since 0.8.0
*/
class LassoModel (
override val weights: Vector,
Expand All @@ -44,15 +46,24 @@ class LassoModel (
weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
}

/*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

* @since 1.3.0
*/
override def save(sc: SparkContext, path: String): Unit = {
GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
}

override protected def formatVersion: String = "1.0"
}

/*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

* @since 1.3.0
*/
object LassoModel extends Loader[LassoModel] {

/*
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

* @since 1.3.0
*/
override def load(sc: SparkContext, path: String): LassoModel = {
val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
// Hard-code class name string in case it changes in the future
Expand All @@ -77,6 +88,8 @@ object LassoModel extends Loader[LassoModel] {
* Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
* its corresponding right hand side label y.
* See also the documentation for the precise formulation.
*
* @since 0.8.0
*/
class LassoWithSGD private (
private var stepSize: Double,
Expand All @@ -96,6 +109,8 @@ class LassoWithSGD private (
/**
* Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100,
* regParam: 0.01, miniBatchFraction: 1.0}.
*
* @since 0.8.0
*/
def this() = this(1.0, 100, 0.01, 1.0)

Expand All @@ -106,6 +121,8 @@ class LassoWithSGD private (

/**
* Top-level methods for calling Lasso.
*
* @since 0.8.0
*/
object LassoWithSGD {

Expand All @@ -123,6 +140,8 @@ object LassoWithSGD {
* @param miniBatchFraction Fraction of data to be used per iteration.
* @param initialWeights Initial set of weights to be used. Array should be equal in size to
* the number of features in the data.
*
* @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
Expand All @@ -146,6 +165,8 @@ object LassoWithSGD {
* @param stepSize Step size to be used for each iteration of gradient descent.
* @param regParam Regularization parameter.
* @param miniBatchFraction Fraction of data to be used per iteration.
*
* @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
Expand All @@ -167,6 +188,8 @@ object LassoWithSGD {
* @param regParam Regularization parameter.
* @param numIterations Number of iterations of gradient descent to run.
* @return a LassoModel which has the weights and offset from training.
*
* @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
Expand All @@ -185,6 +208,8 @@ object LassoWithSGD {
* matrix A as well as the corresponding right hand side label y
* @param numIterations Number of iterations of gradient descent to run.
* @return a LassoModel which has the weights and offset from training.
*
* @since 0.8.0
*/
def train(
input: RDD[LabeledPoint],
Expand Down
Loading