From 89fdb66fdc91dcd84cb98c3d1fe39d6240239e1c Mon Sep 17 00:00:00 2001 From: Prayag Chandran Date: Sun, 19 Jul 2015 16:53:28 -0400 Subject: [PATCH 1/4] SPARK-8916 [Documentation, MLlib] Add @since tags to mllib.regression --- .../GeneralizedLinearAlgorithm.scala | 31 ++++++++++++++-- .../mllib/regression/IsotonicRegression.scala | 37 +++++++++++++++++-- .../spark/mllib/regression/LabeledPoint.scala | 9 +++++ .../apache/spark/mllib/regression/Lasso.scala | 25 +++++++++++++ .../mllib/regression/LinearRegression.scala | 24 ++++++++++++ .../mllib/regression/RegressionModel.scala | 9 +++++ .../mllib/regression/RidgeRegression.scala | 34 +++++++++++++++++ .../regression/StreamingLinearAlgorithm.scala | 28 ++++++++++++-- .../StreamingLinearRegressionWithSGD.scala | 28 +++++++++++--- 9 files changed, 209 insertions(+), 16 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala index 6709bd79bc82..7927c2a22a74 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala @@ -34,6 +34,8 @@ import org.apache.spark.storage.StorageLevel * * @param weights Weights computed for every feature. * @param intercept Intercept computed for this model. + * + * @since 0.8.0 */ @DeveloperApi abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double) @@ -53,6 +55,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double * * @param testData RDD representing data points to be predicted * @return RDD[Double] where each entry contains the corresponding prediction + * + * @since 0.8.0 */ def predict(testData: RDD[Vector]): RDD[Double] = { // A small optimization to avoid serializing the entire model. Only the weightsMatrix @@ -71,6 +75,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double * * @param testData array representing a single data point * @return Double prediction from the trained model + * + * @since 0.8.0 */ def predict(testData: Vector): Double = { predictPoint(testData, weights, intercept) @@ -78,6 +84,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double /** * Print a summary of the model. + * + * @since 1.2.0 */ override def toString: String = { s"${this.getClass.getName}: intercept = ${intercept}, numFeatures = ${weights.size}" @@ -88,6 +96,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double * :: DeveloperApi :: * GeneralizedLinearAlgorithm implements methods to train a Generalized Linear Model (GLM). * This class should be extended with an Optimizer to create a new GLM. + * + * @since 0.8.0 */ @DeveloperApi abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel] @@ -95,7 +105,10 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel] protected val validators: Seq[RDD[LabeledPoint] => Boolean] = List() - /** The optimizer to solve the problem. */ + /** The optimizer to solve the problem. + * + * @since 1.0.0 + */ def optimizer: Optimizer /** Whether to add intercept (default: false). */ @@ -130,6 +143,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel] /** * The dimension of training features. + * + * @since 1.4.0 */ def getNumFeatures: Int = this.numFeatures @@ -146,19 +161,21 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel] this } - /** - * Create a model given the weights and intercept - */ + /* Create a model given the weights and intercept */ protected def createModel(weights: Vector, intercept: Double): M /** * Get if the algorithm uses addIntercept + * + * @since 1.4.0 */ def isAddIntercept: Boolean = this.addIntercept /** * Set if the algorithm should add an intercept. Default false. * We set the default to false because adding the intercept will cause memory allocation. + * + * @since 0.8.0 */ def setIntercept(addIntercept: Boolean): this.type = { this.addIntercept = addIntercept @@ -167,6 +184,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel] /** * Set if the algorithm should validate data before training. Default true. + * + * @since 0.8.0 */ def setValidateData(validateData: Boolean): this.type = { this.validateData = validateData @@ -176,6 +195,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel] /** * Run the algorithm with the configured parameters on an input * RDD of LabeledPoint entries. + * + * @since 0.8.0 */ def run(input: RDD[LabeledPoint]): M = { if (numFeatures < 0) { @@ -208,6 +229,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel] /** * Run the algorithm with the configured parameters on an input RDD * of LabeledPoint entries starting from the initial weights provided. + * + * @since 0.8.0 */ def run(input: RDD[LabeledPoint], initialWeights: Vector): M = { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala index f3b46c75c05f..242f21803d90 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala @@ -46,6 +46,8 @@ import org.apache.spark.sql.SQLContext * @param predictions Array of predictions associated to the boundaries at the same index. * Results of isotonic regression and therefore monotone. * @param isotonic indicates whether this is isotonic or antitonic. + * + * @since 1.3.0 */ @Experimental class IsotonicRegressionModel ( @@ -59,7 +61,10 @@ class IsotonicRegressionModel ( assertOrdered(boundaries) assertOrdered(predictions)(predictionOrd) - /** A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter. */ + /** A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter. + * + * @since 1.4.0 + */ def this(boundaries: java.lang.Iterable[Double], predictions: java.lang.Iterable[Double], isotonic: java.lang.Boolean) = { @@ -83,6 +88,8 @@ class IsotonicRegressionModel ( * * @param testData Features to be labeled. * @return Predicted labels. + * + * @since 1.3.0 */ def predict(testData: RDD[Double]): RDD[Double] = { testData.map(predict) @@ -94,6 +101,8 @@ class IsotonicRegressionModel ( * * @param testData Features to be labeled. * @return Predicted labels. + * + * @since 1.3.0 */ def predict(testData: JavaDoubleRDD): JavaDoubleRDD = { JavaDoubleRDD.fromRDD(predict(testData.rdd.retag.asInstanceOf[RDD[Double]])) @@ -114,6 +123,8 @@ class IsotonicRegressionModel ( * 3) If testData falls between two values in boundary array then prediction is treated * as piecewise linear function and interpolated value is returned. In case there are * multiple values with the same boundary then the same rules as in 2) are used. + * + * @since 1.3.0 */ def predict(testData: Double): Double = { @@ -147,14 +158,21 @@ class IsotonicRegressionModel ( /** A convenient method for boundaries called by the Python API. */ private[mllib] def predictionVector: Vector = Vectors.dense(predictions) - + /* + * @since 1.4.0 + */ override def save(sc: SparkContext, path: String): Unit = { IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic) } + /* + * @since 1.4.0 + */ override protected def formatVersion: String = "1.0" } - +/* + * @since 1.4.0 + */ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] { import org.apache.spark.mllib.util.Loader._ @@ -200,6 +218,9 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] { } } + /* + * @since 1.4.0 + */ override def load(sc: SparkContext, path: String): IsotonicRegressionModel = { implicit val formats = DefaultFormats val (loadedClassName, version, metadata) = loadMetadata(sc, path) @@ -237,6 +258,8 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] { * Available from [[http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf]] * * @see [[http://en.wikipedia.org/wiki/Isotonic_regression Isotonic regression (Wikipedia)]] + * + * @since 1.3.0 */ @Experimental class IsotonicRegression private (private var isotonic: Boolean) extends Serializable { @@ -245,6 +268,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali * Constructs IsotonicRegression instance with default parameter isotonic = true. * * @return New instance of IsotonicRegression. + * + * @since 1.3.0 */ def this() = this(true) @@ -253,6 +278,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali * * @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence. * @return This instance of IsotonicRegression. + * + * @since 1.3.0 */ def setIsotonic(isotonic: Boolean): this.type = { this.isotonic = isotonic @@ -268,6 +295,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali * If multiple labels share the same feature value then they are ordered before * the algorithm is executed. * @return Isotonic regression model. + * + * @since 1.3.0 */ def run(input: RDD[(Double, Double, Double)]): IsotonicRegressionModel = { val preprocessedInput = if (isotonic) { @@ -293,6 +322,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali * If multiple labels share the same feature value then they are ordered before * the algorithm is executed. * @return Isotonic regression model. + * + * @since 1.3.0 */ def run(input: JavaRDD[(JDouble, JDouble, JDouble)]): IsotonicRegressionModel = { run(input.rdd.retag.asInstanceOf[RDD[(Double, Double, Double)]]) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala index d5fea822ad77..20967cc5ee8a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala @@ -28,9 +28,14 @@ import org.apache.spark.SparkException * * @param label Label for this data point. * @param features List of features for this data point. + * + * @since 0.8.0 */ @BeanInfo case class LabeledPoint(label: Double, features: Vector) { + /* + * @since 0.9.0 + */ override def toString: String = { s"($label,$features)" } @@ -38,11 +43,15 @@ case class LabeledPoint(label: Double, features: Vector) { /** * Parser for [[org.apache.spark.mllib.regression.LabeledPoint]]. + * + * @since 1.1.0 */ object LabeledPoint { /** * Parses a string resulted from `LabeledPoint#toString` into * an [[org.apache.spark.mllib.regression.LabeledPoint]]. + * + * @since 1.1.0 */ def parse(s: String): LabeledPoint = { if (s.startsWith("(")) { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala index 4f482384f0f3..03eaf89f2f9d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala @@ -30,6 +30,8 @@ import org.apache.spark.rdd.RDD * * @param weights Weights computed for every feature. * @param intercept Intercept computed for this model. + * + * @since 0.8.0 */ class LassoModel ( override val weights: Vector, @@ -44,6 +46,9 @@ class LassoModel ( weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept } + /* + * @since 1.3.0 + */ override def save(sc: SparkContext, path: String): Unit = { GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept) } @@ -51,8 +56,14 @@ class LassoModel ( override protected def formatVersion: String = "1.0" } +/* + * @since 1.3.0 + */ object LassoModel extends Loader[LassoModel] { + /* + * @since 1.3.0 + */ override def load(sc: SparkContext, path: String): LassoModel = { val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path) // Hard-code class name string in case it changes in the future @@ -77,6 +88,8 @@ object LassoModel extends Loader[LassoModel] { * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with * its corresponding right hand side label y. * See also the documentation for the precise formulation. + * + * @since 0.8.0 */ class LassoWithSGD private ( private var stepSize: Double, @@ -96,6 +109,8 @@ class LassoWithSGD private ( /** * Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100, * regParam: 0.01, miniBatchFraction: 1.0}. + * + * @since 0.8.0 */ def this() = this(1.0, 100, 0.01, 1.0) @@ -106,6 +121,8 @@ class LassoWithSGD private ( /** * Top-level methods for calling Lasso. + * + * @since 0.8.0 */ object LassoWithSGD { @@ -123,6 +140,8 @@ object LassoWithSGD { * @param miniBatchFraction Fraction of data to be used per iteration. * @param initialWeights Initial set of weights to be used. Array should be equal in size to * the number of features in the data. + * + * @since 0.8.0 */ def train( input: RDD[LabeledPoint], @@ -146,6 +165,8 @@ object LassoWithSGD { * @param stepSize Step size to be used for each iteration of gradient descent. * @param regParam Regularization parameter. * @param miniBatchFraction Fraction of data to be used per iteration. + * + * @since 0.8.0 */ def train( input: RDD[LabeledPoint], @@ -167,6 +188,8 @@ object LassoWithSGD { * @param regParam Regularization parameter. * @param numIterations Number of iterations of gradient descent to run. * @return a LassoModel which has the weights and offset from training. + * + * @since 0.8.0 */ def train( input: RDD[LabeledPoint], @@ -185,6 +208,8 @@ object LassoWithSGD { * matrix A as well as the corresponding right hand side label y * @param numIterations Number of iterations of gradient descent to run. * @return a LassoModel which has the weights and offset from training. + * + * @since 0.8.0 */ def train( input: RDD[LabeledPoint], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala index 9453c4f66c21..17719b17c80f 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala @@ -30,6 +30,7 @@ import org.apache.spark.rdd.RDD * * @param weights Weights computed for every feature. * @param intercept Intercept computed for this model. + * @since 0.8.0 */ class LinearRegressionModel ( override val weights: Vector, @@ -44,6 +45,9 @@ class LinearRegressionModel ( weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept } + /** + * @since 1.3.0 + */ override def save(sc: SparkContext, path: String): Unit = { GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept) } @@ -51,8 +55,14 @@ class LinearRegressionModel ( override protected def formatVersion: String = "1.0" } +/** + * @since 1.3.0 + */ object LinearRegressionModel extends Loader[LinearRegressionModel] { + /** + * @since 1.3.0 + */ override def load(sc: SparkContext, path: String): LinearRegressionModel = { val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path) // Hard-code class name string in case it changes in the future @@ -78,6 +88,8 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] { * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with * its corresponding right hand side label y. * See also the documentation for the precise formulation. + * + * @since 0.8.0 */ class LinearRegressionWithSGD private[mllib] ( private var stepSize: Double, @@ -95,6 +107,8 @@ class LinearRegressionWithSGD private[mllib] ( /** * Construct a LinearRegression object with default parameters: {stepSize: 1.0, * numIterations: 100, miniBatchFraction: 1.0}. + * + * @since 0.8.0 */ def this() = this(1.0, 100, 1.0) @@ -105,6 +119,8 @@ class LinearRegressionWithSGD private[mllib] ( /** * Top-level methods for calling LinearRegression. + * + * @since 0.8.0 */ object LinearRegressionWithSGD { @@ -121,6 +137,8 @@ object LinearRegressionWithSGD { * @param miniBatchFraction Fraction of data to be used per iteration. * @param initialWeights Initial set of weights to be used. Array should be equal in size to * the number of features in the data. + * + * @since 0.8.0 */ def train( input: RDD[LabeledPoint], @@ -142,6 +160,8 @@ object LinearRegressionWithSGD { * @param numIterations Number of iterations of gradient descent to run. * @param stepSize Step size to be used for each iteration of gradient descent. * @param miniBatchFraction Fraction of data to be used per iteration. + * + * @since 0.8.0 */ def train( input: RDD[LabeledPoint], @@ -161,6 +181,8 @@ object LinearRegressionWithSGD { * @param stepSize Step size to be used for each iteration of Gradient Descent. * @param numIterations Number of iterations of gradient descent to run. * @return a LinearRegressionModel which has the weights and offset from training. + * + * @since 0.8.0 */ def train( input: RDD[LabeledPoint], @@ -178,6 +200,8 @@ object LinearRegressionWithSGD { * matrix A as well as the corresponding right hand side label y * @param numIterations Number of iterations of gradient descent to run. * @return a LinearRegressionModel which has the weights and offset from training. + * + * @since 0.8.0 */ def train( input: RDD[LabeledPoint], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala index 214ac4d0ed7d..f4d27f52ff68 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala @@ -24,6 +24,9 @@ import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vector import org.apache.spark.rdd.RDD +/* + * @since 0.8.0 + */ @Experimental trait RegressionModel extends Serializable { /** @@ -31,6 +34,8 @@ trait RegressionModel extends Serializable { * * @param testData RDD representing data points to be predicted * @return RDD[Double] where each entry contains the corresponding prediction + * + * @since 0.8.0 */ def predict(testData: RDD[Vector]): RDD[Double] @@ -39,6 +44,8 @@ trait RegressionModel extends Serializable { * * @param testData array representing a single data point * @return Double prediction from the trained model + * + * @since 0.8.0 */ def predict(testData: Vector): Double @@ -46,6 +53,8 @@ trait RegressionModel extends Serializable { * Predict values for examples stored in a JavaRDD. * @param testData JavaRDD representing data points to be predicted * @return a JavaRDD[java.lang.Double] where each entry contains the corresponding prediction + * + * @since 1.0.0 */ def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] = predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]] diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala index 7d28ffad45c9..28f1d1b30fc4 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala @@ -31,6 +31,8 @@ import org.apache.spark.rdd.RDD * * @param weights Weights computed for every feature. * @param intercept Intercept computed for this model. + * + * @since 0.8.0 */ class RidgeRegressionModel ( override val weights: Vector, @@ -38,6 +40,9 @@ class RidgeRegressionModel ( extends GeneralizedLinearModel(weights, intercept) with RegressionModel with Serializable with Saveable with PMMLExportable { + /* + * @since 0.8.0 + */ override protected def predictPoint( dataMatrix: Vector, weightMatrix: Vector, @@ -45,15 +50,27 @@ class RidgeRegressionModel ( weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept } + /* + * @since 1.3.0 + */ override def save(sc: SparkContext, path: String): Unit = { GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept) } + /* + * @since 1.3.0 + */ override protected def formatVersion: String = "1.0" } +/* + * @since 1.3.0 + */ object RidgeRegressionModel extends Loader[RidgeRegressionModel] { + /* + * @since 1.3.0 + */ override def load(sc: SparkContext, path: String): RidgeRegressionModel = { val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path) // Hard-code class name string in case it changes in the future @@ -78,6 +95,8 @@ object RidgeRegressionModel extends Loader[RidgeRegressionModel] { * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with * its corresponding right hand side label y. * See also the documentation for the precise formulation. + * + * @since 0.8.0 */ class RidgeRegressionWithSGD private ( private var stepSize: Double, @@ -98,9 +117,14 @@ class RidgeRegressionWithSGD private ( /** * Construct a RidgeRegression object with default parameters: {stepSize: 1.0, numIterations: 100, * regParam: 0.01, miniBatchFraction: 1.0}. + * + * @since 0.8.0 */ def this() = this(1.0, 100, 0.01, 1.0) + /* + * @since 0.8.0 + */ override protected def createModel(weights: Vector, intercept: Double) = { new RidgeRegressionModel(weights, intercept) } @@ -108,6 +132,8 @@ class RidgeRegressionWithSGD private ( /** * Top-level methods for calling RidgeRegression. + * + * @since 0.8.0 */ object RidgeRegressionWithSGD { @@ -124,6 +150,8 @@ object RidgeRegressionWithSGD { * @param miniBatchFraction Fraction of data to be used per iteration. * @param initialWeights Initial set of weights to be used. Array should be equal in size to * the number of features in the data. + * + * @since 0.8.0 */ def train( input: RDD[LabeledPoint], @@ -146,6 +174,8 @@ object RidgeRegressionWithSGD { * @param stepSize Step size to be used for each iteration of gradient descent. * @param regParam Regularization parameter. * @param miniBatchFraction Fraction of data to be used per iteration. + * + * @since 0.8.0 */ def train( input: RDD[LabeledPoint], @@ -166,6 +196,8 @@ object RidgeRegressionWithSGD { * @param regParam Regularization parameter. * @param numIterations Number of iterations of gradient descent to run. * @return a RidgeRegressionModel which has the weights and offset from training. + * + * @since 0.8.0 */ def train( input: RDD[LabeledPoint], @@ -183,6 +215,8 @@ object RidgeRegressionWithSGD { * @param input RDD of (label, array of features) pairs. * @param numIterations Number of iterations of gradient descent to run. * @return a RidgeRegressionModel which has the weights and offset from training. + * + * @since 0.8.0 */ def train( input: RDD[LabeledPoint], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala index 141052ba813e..0bd46a0337db 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala @@ -53,6 +53,8 @@ import org.apache.spark.streaming.dstream.DStream * It is also ok to call trainOn on different streams; this will update * the model using each of the different sources, in sequence. * + * + * @since 1.1.0 */ @DeveloperApi abstract class StreamingLinearAlgorithm[ @@ -65,7 +67,10 @@ abstract class StreamingLinearAlgorithm[ /** The algorithm to use for updating. */ protected val algorithm: A - /** Return the latest model. */ + /** Return the latest model. + * + * @since 1.1.0 + */ def latestModel(): M = { model.get } @@ -77,6 +82,8 @@ abstract class StreamingLinearAlgorithm[ * batch of data from the stream. * * @param data DStream containing labeled data + * + * @since 1.1.0 */ def trainOn(data: DStream[LabeledPoint]): Unit = { if (model.isEmpty) { @@ -95,7 +102,10 @@ abstract class StreamingLinearAlgorithm[ } } - /** Java-friendly version of `trainOn`. */ + /** Java-friendly version of `trainOn`. + * + * @since 1.1.0 + */ def trainOn(data: JavaDStream[LabeledPoint]): Unit = trainOn(data.dstream) /** @@ -103,6 +113,8 @@ abstract class StreamingLinearAlgorithm[ * * @param data DStream containing feature vectors * @return DStream containing predictions + * + * @since 1.1.0 */ def predictOn(data: DStream[Vector]): DStream[Double] = { if (model.isEmpty) { @@ -111,7 +123,10 @@ abstract class StreamingLinearAlgorithm[ data.map{x => model.get.predict(x)} } - /** Java-friendly version of `predictOn`. */ + /** Java-friendly version of `predictOn`. + * + * @since 1.1.0 + */ def predictOn(data: JavaDStream[Vector]): JavaDStream[java.lang.Double] = { JavaDStream.fromDStream(predictOn(data.dstream).asInstanceOf[DStream[java.lang.Double]]) } @@ -121,6 +136,8 @@ abstract class StreamingLinearAlgorithm[ * @param data DStream containing feature vectors * @tparam K key type * @return DStream containing the input keys and the predictions as values + * + * @since 1.1.0 */ def predictOnValues[K: ClassTag](data: DStream[(K, Vector)]): DStream[(K, Double)] = { if (model.isEmpty) { @@ -130,7 +147,10 @@ abstract class StreamingLinearAlgorithm[ } - /** Java-friendly version of `predictOnValues`. */ + /** Java-friendly version of `predictOnValues`. + * + * @since 1.1.0 + */ def predictOnValues[K](data: JavaPairDStream[K, Vector]): JavaPairDStream[K, java.lang.Double] = { implicit val tag = fakeClassTag[K] JavaPairDStream.fromPairDStream( diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala index c6d04464a12b..ecbda922f3a7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala @@ -40,6 +40,7 @@ import org.apache.spark.mllib.linalg.Vector * .setInitialWeights(Vectors.dense(...)) * .trainOn(DStream) * + * @since 1.1.0 */ @Experimental class StreamingLinearRegressionWithSGD private[mllib] ( @@ -54,6 +55,8 @@ class StreamingLinearRegressionWithSGD private[mllib] ( * {stepSize: 0.1, numIterations: 50, miniBatchFraction: 1.0}. * Initial weights must be set before using trainOn or predictOn * (see `StreamingLinearAlgorithm`) + * + * @since 1.1.0 */ def this() = this(0.1, 50, 1.0) @@ -61,31 +64,46 @@ class StreamingLinearRegressionWithSGD private[mllib] ( protected var model: Option[LinearRegressionModel] = None - /** Set the step size for gradient descent. Default: 0.1. */ + /** Set the step size for gradient descent. Default: 0.1. + * + * @since 1.1.0 + */ def setStepSize(stepSize: Double): this.type = { this.algorithm.optimizer.setStepSize(stepSize) this } - /** Set the number of iterations of gradient descent to run per update. Default: 50. */ + /** Set the number of iterations of gradient descent to run per update. Default: 50. + * + * @since 1.1.0 + */ def setNumIterations(numIterations: Int): this.type = { this.algorithm.optimizer.setNumIterations(numIterations) this } - /** Set the fraction of each batch to use for updates. Default: 1.0. */ + /** Set the fraction of each batch to use for updates. Default: 1.0. + * + * @since 1.1.0 + */ def setMiniBatchFraction(miniBatchFraction: Double): this.type = { this.algorithm.optimizer.setMiniBatchFraction(miniBatchFraction) this } - /** Set the initial weights. */ + /** Set the initial weights. + * + * @since 1.1.0 + */ def setInitialWeights(initialWeights: Vector): this.type = { this.model = Some(algorithm.createModel(initialWeights, 0.0)) this } - /** Set the convergence tolerance. */ + /** Set the convergence tolerance. + * + * @since 1.5.0 + */ def setConvergenceTol(tolerance: Double): this.type = { this.algorithm.optimizer.setConvergenceTol(tolerance) this From 1a0365fca17c7e1d6dfd754e58d6f0fe6992e793 Mon Sep 17 00:00:00 2001 From: Prayag Chandran Date: Sun, 19 Jul 2015 18:11:37 -0400 Subject: [PATCH 2/4] Reformating and adding a few more tags --- .../GeneralizedLinearAlgorithm.scala | 11 ++++- .../mllib/regression/IsotonicRegression.scala | 4 +- .../mllib/regression/LinearRegression.scala | 1 + .../regression/StreamingLinearAlgorithm.scala | 36 ++++++++------- .../StreamingLinearRegressionWithSGD.scala | 45 ++++++++++--------- 5 files changed, 58 insertions(+), 39 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala index 7927c2a22a74..99247bd0867a 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala @@ -47,6 +47,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double * @param dataMatrix Row vector containing the features for this data point * @param weightMatrix Column vector containing the weights of the model * @param intercept Intercept of the model. + * + * @since 0.8.0 */ protected def predictPoint(dataMatrix: Vector, weightMatrix: Vector, intercept: Double): Double @@ -105,7 +107,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel] protected val validators: Seq[RDD[LabeledPoint] => Boolean] = List() - /** The optimizer to solve the problem. + /** + * The optimizer to solve the problem. * * @since 1.0.0 */ @@ -161,7 +164,11 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel] this } - /* Create a model given the weights and intercept */ + /** + * Create a model given the weights and intercept + * + * @since 0.8.0 + */ protected def createModel(weights: Vector, intercept: Double): M /** diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala index 242f21803d90..1458b1ec3e06 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala @@ -61,7 +61,8 @@ class IsotonicRegressionModel ( assertOrdered(boundaries) assertOrdered(predictions)(predictionOrd) - /** A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter. + /** + * A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter. * * @since 1.4.0 */ @@ -158,6 +159,7 @@ class IsotonicRegressionModel ( /** A convenient method for boundaries called by the Python API. */ private[mllib] def predictionVector: Vector = Vectors.dense(predictions) + /* * @since 1.4.0 */ diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala index 17719b17c80f..a1537f51803c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala @@ -30,6 +30,7 @@ import org.apache.spark.rdd.RDD * * @param weights Weights computed for every feature. * @param intercept Intercept computed for this model. + * * @since 0.8.0 */ class LinearRegressionModel ( diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala index 0bd46a0337db..0609fe10f665 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala @@ -67,10 +67,11 @@ abstract class StreamingLinearAlgorithm[ /** The algorithm to use for updating. */ protected val algorithm: A - /** Return the latest model. - * - * @since 1.1.0 - */ + /** + * Return the latest model. + * + * @since 1.1.0 + */ def latestModel(): M = { model.get } @@ -102,10 +103,11 @@ abstract class StreamingLinearAlgorithm[ } } - /** Java-friendly version of `trainOn`. - * - * @since 1.1.0 - */ + /** + * Java-friendly version of `trainOn`. + * + * @since 1.1.0 + */ def trainOn(data: JavaDStream[LabeledPoint]): Unit = trainOn(data.dstream) /** @@ -123,10 +125,11 @@ abstract class StreamingLinearAlgorithm[ data.map{x => model.get.predict(x)} } - /** Java-friendly version of `predictOn`. - * - * @since 1.1.0 - */ + /** + * Java-friendly version of `predictOn`. + * + * @since 1.1.0 + */ def predictOn(data: JavaDStream[Vector]): JavaDStream[java.lang.Double] = { JavaDStream.fromDStream(predictOn(data.dstream).asInstanceOf[DStream[java.lang.Double]]) } @@ -147,10 +150,11 @@ abstract class StreamingLinearAlgorithm[ } - /** Java-friendly version of `predictOnValues`. - * - * @since 1.1.0 - */ + /** + * Java-friendly version of `predictOnValues`. + * + * @since 1.1.0 + */ def predictOnValues[K](data: JavaPairDStream[K, Vector]): JavaPairDStream[K, java.lang.Double] = { implicit val tag = fakeClassTag[K] JavaPairDStream.fromPairDStream( diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala index ecbda922f3a7..45741bc4beec 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala @@ -64,46 +64,51 @@ class StreamingLinearRegressionWithSGD private[mllib] ( protected var model: Option[LinearRegressionModel] = None - /** Set the step size for gradient descent. Default: 0.1. - * - * @since 1.1.0 - */ + /** + * Set the step size for gradient descent. Default: 0.1. + * + * @since 1.1.0 + */ def setStepSize(stepSize: Double): this.type = { this.algorithm.optimizer.setStepSize(stepSize) this } - /** Set the number of iterations of gradient descent to run per update. Default: 50. - * - * @since 1.1.0 - */ + /** + * Set the number of iterations of gradient descent to run per update. Default: 50. + * + * @since 1.1.0 + */ def setNumIterations(numIterations: Int): this.type = { this.algorithm.optimizer.setNumIterations(numIterations) this } - /** Set the fraction of each batch to use for updates. Default: 1.0. - * - * @since 1.1.0 - */ + /** + * Set the fraction of each batch to use for updates. Default: 1.0. + * + * @since 1.1.0 + */ def setMiniBatchFraction(miniBatchFraction: Double): this.type = { this.algorithm.optimizer.setMiniBatchFraction(miniBatchFraction) this } - /** Set the initial weights. - * - * @since 1.1.0 - */ + /** + * Set the initial weights. + * + * @since 1.1.0 + */ def setInitialWeights(initialWeights: Vector): this.type = { this.model = Some(algorithm.createModel(initialWeights, 0.0)) this } - /** Set the convergence tolerance. - * - * @since 1.5.0 - */ + /** + * Set the convergence tolerance. + * + * @since 1.5.0 + */ def setConvergenceTol(tolerance: Double): this.type = { this.algorithm.optimizer.setConvergenceTol(tolerance) this From 6c6d58477429a433871bbda32957f0a6802c03fa Mon Sep 17 00:00:00 2001 From: Prayag Chandran Date: Fri, 14 Aug 2015 14:26:41 -0400 Subject: [PATCH 3/4] Corrected a few tags. Removed few unnecessary tags --- .../regression/GeneralizedLinearAlgorithm.scala | 12 +++--------- .../mllib/regression/IsotonicRegression.scala | 14 +------------- .../spark/mllib/regression/LabeledPoint.scala | 3 --- .../org/apache/spark/mllib/regression/Lasso.scala | 6 +----- .../spark/mllib/regression/LinearRegression.scala | 6 +----- .../spark/mllib/regression/RegressionModel.scala | 4 ++-- .../spark/mllib/regression/RidgeRegression.scala | 13 ------------- .../regression/StreamingLinearAlgorithm.scala | 6 +++--- .../StreamingLinearRegressionWithSGD.scala | 14 -------------- 9 files changed, 11 insertions(+), 67 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala index 99247bd0867a..2980b94de35b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala @@ -47,8 +47,6 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double * @param dataMatrix Row vector containing the features for this data point * @param weightMatrix Column vector containing the weights of the model * @param intercept Intercept of the model. - * - * @since 0.8.0 */ protected def predictPoint(dataMatrix: Vector, weightMatrix: Vector, intercept: Double): Double @@ -58,7 +56,7 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double * @param testData RDD representing data points to be predicted * @return RDD[Double] where each entry contains the corresponding prediction * - * @since 0.8.0 + * @since 1.0.0 */ def predict(testData: RDD[Vector]): RDD[Double] = { // A small optimization to avoid serializing the entire model. Only the weightsMatrix @@ -78,7 +76,7 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double * @param testData array representing a single data point * @return Double prediction from the trained model * - * @since 0.8.0 + * @since 1.0.0 */ def predict(testData: Vector): Double = { predictPoint(testData, weights, intercept) @@ -86,8 +84,6 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double /** * Print a summary of the model. - * - * @since 1.2.0 */ override def toString: String = { s"${this.getClass.getName}: intercept = ${intercept}, numFeatures = ${weights.size}" @@ -166,8 +162,6 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel] /** * Create a model given the weights and intercept - * - * @since 0.8.0 */ protected def createModel(weights: Vector, intercept: Double): M @@ -237,7 +231,7 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel] * Run the algorithm with the configured parameters on an input RDD * of LabeledPoint entries starting from the initial weights provided. * - * @since 0.8.0 + * @since 1.0.0 */ def run(input: RDD[LabeledPoint], initialWeights: Vector): M = { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala index 1458b1ec3e06..a5f8755b17b3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala @@ -167,11 +167,9 @@ class IsotonicRegressionModel ( IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic) } - /* - * @since 1.4.0 - */ override protected def formatVersion: String = "1.0" } + /* * @since 1.4.0 */ @@ -260,8 +258,6 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] { * Available from [[http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf]] * * @see [[http://en.wikipedia.org/wiki/Isotonic_regression Isotonic regression (Wikipedia)]] - * - * @since 1.3.0 */ @Experimental class IsotonicRegression private (private var isotonic: Boolean) extends Serializable { @@ -270,8 +266,6 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali * Constructs IsotonicRegression instance with default parameter isotonic = true. * * @return New instance of IsotonicRegression. - * - * @since 1.3.0 */ def this() = this(true) @@ -280,8 +274,6 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali * * @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence. * @return This instance of IsotonicRegression. - * - * @since 1.3.0 */ def setIsotonic(isotonic: Boolean): this.type = { this.isotonic = isotonic @@ -297,8 +289,6 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali * If multiple labels share the same feature value then they are ordered before * the algorithm is executed. * @return Isotonic regression model. - * - * @since 1.3.0 */ def run(input: RDD[(Double, Double, Double)]): IsotonicRegressionModel = { val preprocessedInput = if (isotonic) { @@ -324,8 +314,6 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali * If multiple labels share the same feature value then they are ordered before * the algorithm is executed. * @return Isotonic regression model. - * - * @since 1.3.0 */ def run(input: JavaRDD[(JDouble, JDouble, JDouble)]): IsotonicRegressionModel = { run(input.rdd.retag.asInstanceOf[RDD[(Double, Double, Double)]]) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala index 20967cc5ee8a..8b51011eeb29 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala @@ -33,9 +33,6 @@ import org.apache.spark.SparkException */ @BeanInfo case class LabeledPoint(label: Double, features: Vector) { - /* - * @since 0.9.0 - */ override def toString: String = { s"($label,$features)" } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala index 03eaf89f2f9d..b9cbe693ec25 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala @@ -88,8 +88,6 @@ object LassoModel extends Loader[LassoModel] { * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with * its corresponding right hand side label y. * See also the documentation for the precise formulation. - * - * @since 0.8.0 */ class LassoWithSGD private ( private var stepSize: Double, @@ -109,8 +107,6 @@ class LassoWithSGD private ( /** * Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100, * regParam: 0.01, miniBatchFraction: 1.0}. - * - * @since 0.8.0 */ def this() = this(1.0, 100, 0.01, 1.0) @@ -141,7 +137,7 @@ object LassoWithSGD { * @param initialWeights Initial set of weights to be used. Array should be equal in size to * the number of features in the data. * - * @since 0.8.0 + * @since 1.0.0 */ def train( input: RDD[LabeledPoint], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala index a1537f51803c..fb5c220daaed 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala @@ -89,8 +89,6 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] { * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with * its corresponding right hand side label y. * See also the documentation for the precise formulation. - * - * @since 0.8.0 */ class LinearRegressionWithSGD private[mllib] ( private var stepSize: Double, @@ -108,8 +106,6 @@ class LinearRegressionWithSGD private[mllib] ( /** * Construct a LinearRegression object with default parameters: {stepSize: 1.0, * numIterations: 100, miniBatchFraction: 1.0}. - * - * @since 0.8.0 */ def this() = this(1.0, 100, 1.0) @@ -139,7 +135,7 @@ object LinearRegressionWithSGD { * @param initialWeights Initial set of weights to be used. Array should be equal in size to * the number of features in the data. * - * @since 0.8.0 + * @since 1.0.0 */ def train( input: RDD[LabeledPoint], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala index f4d27f52ff68..69aac4ab1ba3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala @@ -35,7 +35,7 @@ trait RegressionModel extends Serializable { * @param testData RDD representing data points to be predicted * @return RDD[Double] where each entry contains the corresponding prediction * - * @since 0.8.0 + * @since 1.0.0 */ def predict(testData: RDD[Vector]): RDD[Double] @@ -45,7 +45,7 @@ trait RegressionModel extends Serializable { * @param testData array representing a single data point * @return Double prediction from the trained model * - * @since 0.8.0 + * @since 1.0.0 */ def predict(testData: Vector): Double diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala index 28f1d1b30fc4..c6d2e71cde44 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala @@ -40,9 +40,6 @@ class RidgeRegressionModel ( extends GeneralizedLinearModel(weights, intercept) with RegressionModel with Serializable with Saveable with PMMLExportable { - /* - * @since 0.8.0 - */ override protected def predictPoint( dataMatrix: Vector, weightMatrix: Vector, @@ -57,9 +54,6 @@ class RidgeRegressionModel ( GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept) } - /* - * @since 1.3.0 - */ override protected def formatVersion: String = "1.0" } @@ -95,8 +89,6 @@ object RidgeRegressionModel extends Loader[RidgeRegressionModel] { * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with * its corresponding right hand side label y. * See also the documentation for the precise formulation. - * - * @since 0.8.0 */ class RidgeRegressionWithSGD private ( private var stepSize: Double, @@ -117,14 +109,9 @@ class RidgeRegressionWithSGD private ( /** * Construct a RidgeRegression object with default parameters: {stepSize: 1.0, numIterations: 100, * regParam: 0.01, miniBatchFraction: 1.0}. - * - * @since 0.8.0 */ def this() = this(1.0, 100, 0.01, 1.0) - /* - * @since 0.8.0 - */ override protected def createModel(weights: Vector, intercept: Double) = { new RidgeRegressionModel(weights, intercept) } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala index 0609fe10f665..a2ab95c47476 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala @@ -84,7 +84,7 @@ abstract class StreamingLinearAlgorithm[ * * @param data DStream containing labeled data * - * @since 1.1.0 + * @since 1.3.0 */ def trainOn(data: DStream[LabeledPoint]): Unit = { if (model.isEmpty) { @@ -106,7 +106,7 @@ abstract class StreamingLinearAlgorithm[ /** * Java-friendly version of `trainOn`. * - * @since 1.1.0 + * @since 1.3.0 */ def trainOn(data: JavaDStream[LabeledPoint]): Unit = trainOn(data.dstream) @@ -153,7 +153,7 @@ abstract class StreamingLinearAlgorithm[ /** * Java-friendly version of `predictOnValues`. * - * @since 1.1.0 + * @since 1.3.0 */ def predictOnValues[K](data: JavaPairDStream[K, Vector]): JavaPairDStream[K, java.lang.Double] = { implicit val tag = fakeClassTag[K] diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala index 45741bc4beec..537a05274eec 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala @@ -39,8 +39,6 @@ import org.apache.spark.mllib.linalg.Vector * .setNumIterations(10) * .setInitialWeights(Vectors.dense(...)) * .trainOn(DStream) - * - * @since 1.1.0 */ @Experimental class StreamingLinearRegressionWithSGD private[mllib] ( @@ -55,8 +53,6 @@ class StreamingLinearRegressionWithSGD private[mllib] ( * {stepSize: 0.1, numIterations: 50, miniBatchFraction: 1.0}. * Initial weights must be set before using trainOn or predictOn * (see `StreamingLinearAlgorithm`) - * - * @since 1.1.0 */ def this() = this(0.1, 50, 1.0) @@ -66,8 +62,6 @@ class StreamingLinearRegressionWithSGD private[mllib] ( /** * Set the step size for gradient descent. Default: 0.1. - * - * @since 1.1.0 */ def setStepSize(stepSize: Double): this.type = { this.algorithm.optimizer.setStepSize(stepSize) @@ -76,8 +70,6 @@ class StreamingLinearRegressionWithSGD private[mllib] ( /** * Set the number of iterations of gradient descent to run per update. Default: 50. - * - * @since 1.1.0 */ def setNumIterations(numIterations: Int): this.type = { this.algorithm.optimizer.setNumIterations(numIterations) @@ -86,8 +78,6 @@ class StreamingLinearRegressionWithSGD private[mllib] ( /** * Set the fraction of each batch to use for updates. Default: 1.0. - * - * @since 1.1.0 */ def setMiniBatchFraction(miniBatchFraction: Double): this.type = { this.algorithm.optimizer.setMiniBatchFraction(miniBatchFraction) @@ -96,8 +86,6 @@ class StreamingLinearRegressionWithSGD private[mllib] ( /** * Set the initial weights. - * - * @since 1.1.0 */ def setInitialWeights(initialWeights: Vector): this.type = { this.model = Some(algorithm.createModel(initialWeights, 0.0)) @@ -106,8 +94,6 @@ class StreamingLinearRegressionWithSGD private[mllib] ( /** * Set the convergence tolerance. - * - * @since 1.5.0 */ def setConvergenceTol(tolerance: Double): this.type = { this.algorithm.optimizer.setConvergenceTol(tolerance) From fa4dda2b01c04c91c65c5a1059020a180778c5cc Mon Sep 17 00:00:00 2001 From: Prayag Chandran Date: Fri, 14 Aug 2015 15:06:25 -0400 Subject: [PATCH 4/4] Re-formatting --- .../apache/spark/mllib/regression/IsotonicRegression.scala | 6 +++--- .../scala/org/apache/spark/mllib/regression/Lasso.scala | 6 +++--- .../org/apache/spark/mllib/regression/RegressionModel.scala | 2 +- .../org/apache/spark/mllib/regression/RidgeRegression.scala | 6 +++--- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala index a5f8755b17b3..8995591d9e8c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala @@ -160,7 +160,7 @@ class IsotonicRegressionModel ( /** A convenient method for boundaries called by the Python API. */ private[mllib] def predictionVector: Vector = Vectors.dense(predictions) - /* + /** * @since 1.4.0 */ override def save(sc: SparkContext, path: String): Unit = { @@ -170,7 +170,7 @@ class IsotonicRegressionModel ( override protected def formatVersion: String = "1.0" } -/* +/** * @since 1.4.0 */ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] { @@ -218,7 +218,7 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] { } } - /* + /** * @since 1.4.0 */ override def load(sc: SparkContext, path: String): IsotonicRegressionModel = { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala index b9cbe693ec25..03eb589b05a0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala @@ -46,7 +46,7 @@ class LassoModel ( weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept } - /* + /** * @since 1.3.0 */ override def save(sc: SparkContext, path: String): Unit = { @@ -56,12 +56,12 @@ class LassoModel ( override protected def formatVersion: String = "1.0" } -/* +/** * @since 1.3.0 */ object LassoModel extends Loader[LassoModel] { - /* + /** * @since 1.3.0 */ override def load(sc: SparkContext, path: String): LassoModel = { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala index 69aac4ab1ba3..b097fd38fdd8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala @@ -24,7 +24,7 @@ import org.apache.spark.api.java.JavaRDD import org.apache.spark.mllib.linalg.Vector import org.apache.spark.rdd.RDD -/* +/** * @since 0.8.0 */ @Experimental diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala index c6d2e71cde44..5bced6b4b7b5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala @@ -47,7 +47,7 @@ class RidgeRegressionModel ( weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept } - /* + /** * @since 1.3.0 */ override def save(sc: SparkContext, path: String): Unit = { @@ -57,12 +57,12 @@ class RidgeRegressionModel ( override protected def formatVersion: String = "1.0" } -/* +/** * @since 1.3.0 */ object RidgeRegressionModel extends Loader[RidgeRegressionModel] { - /* + /** * @since 1.3.0 */ override def load(sc: SparkContext, path: String): RidgeRegressionModel = {