From 89fdb66fdc91dcd84cb98c3d1fe39d6240239e1c Mon Sep 17 00:00:00 2001
From: Prayag Chandran <prayagchandran@gmail.com>
Date: Sun, 19 Jul 2015 16:53:28 -0400
Subject: [PATCH 1/4] SPARK-8916 [Documentation, MLlib] Add @since tags to
 mllib.regression

---
 .../GeneralizedLinearAlgorithm.scala          | 31 ++++++++++++++--
 .../mllib/regression/IsotonicRegression.scala | 37 +++++++++++++++++--
 .../spark/mllib/regression/LabeledPoint.scala |  9 +++++
 .../apache/spark/mllib/regression/Lasso.scala | 25 +++++++++++++
 .../mllib/regression/LinearRegression.scala   | 24 ++++++++++++
 .../mllib/regression/RegressionModel.scala    |  9 +++++
 .../mllib/regression/RidgeRegression.scala    | 34 +++++++++++++++++
 .../regression/StreamingLinearAlgorithm.scala | 28 ++++++++++++--
 .../StreamingLinearRegressionWithSGD.scala    | 28 +++++++++++---
 9 files changed, 209 insertions(+), 16 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 6709bd79bc82..7927c2a22a74 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -34,6 +34,8 @@ import org.apache.spark.storage.StorageLevel
  *
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
+ *
+ * @since 0.8.0
  */
 @DeveloperApi
 abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double)
@@ -53,6 +55,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
    *
    * @param testData RDD representing data points to be predicted
    * @return RDD[Double] where each entry contains the corresponding prediction
+   *
+   * @since 0.8.0
    */
   def predict(testData: RDD[Vector]): RDD[Double] = {
     // A small optimization to avoid serializing the entire model. Only the weightsMatrix
@@ -71,6 +75,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
    *
    * @param testData array representing a single data point
    * @return Double prediction from the trained model
+   *
+   * @since 0.8.0
    */
   def predict(testData: Vector): Double = {
     predictPoint(testData, weights, intercept)
@@ -78,6 +84,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
 
   /**
    * Print a summary of the model.
+   *
+   * @since 1.2.0
    */
   override def toString: String = {
     s"${this.getClass.getName}: intercept = ${intercept}, numFeatures = ${weights.size}"
@@ -88,6 +96,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
  * :: DeveloperApi ::
  * GeneralizedLinearAlgorithm implements methods to train a Generalized Linear Model (GLM).
  * This class should be extended with an Optimizer to create a new GLM.
+ *
+ * @since 0.8.0
  */
 @DeveloperApi
 abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
@@ -95,7 +105,10 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
 
   protected val validators: Seq[RDD[LabeledPoint] => Boolean] = List()
 
-  /** The optimizer to solve the problem. */
+  /** The optimizer to solve the problem.
+   *
+   * @since 1.0.0
+   */
   def optimizer: Optimizer
 
   /** Whether to add intercept (default: false). */
@@ -130,6 +143,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
 
   /**
    * The dimension of training features.
+   *
+   * @since 1.4.0
    */
   def getNumFeatures: Int = this.numFeatures
 
@@ -146,19 +161,21 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
     this
   }
 
-  /**
-   * Create a model given the weights and intercept
-   */
+  /* Create a model given the weights and intercept */
   protected def createModel(weights: Vector, intercept: Double): M
 
   /**
    * Get if the algorithm uses addIntercept
+   *
+   * @since 1.4.0
    */
   def isAddIntercept: Boolean = this.addIntercept
 
   /**
    * Set if the algorithm should add an intercept. Default false.
    * We set the default to false because adding the intercept will cause memory allocation.
+   *
+   * @since 0.8.0
    */
   def setIntercept(addIntercept: Boolean): this.type = {
     this.addIntercept = addIntercept
@@ -167,6 +184,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
 
   /**
    * Set if the algorithm should validate data before training. Default true.
+   *
+   * @since 0.8.0
    */
   def setValidateData(validateData: Boolean): this.type = {
     this.validateData = validateData
@@ -176,6 +195,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
   /**
    * Run the algorithm with the configured parameters on an input
    * RDD of LabeledPoint entries.
+   *
+   * @since 0.8.0
    */
   def run(input: RDD[LabeledPoint]): M = {
     if (numFeatures < 0) {
@@ -208,6 +229,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
   /**
    * Run the algorithm with the configured parameters on an input RDD
    * of LabeledPoint entries starting from the initial weights provided.
+   *
+   * @since 0.8.0
    */
   def run(input: RDD[LabeledPoint], initialWeights: Vector): M = {
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index f3b46c75c05f..242f21803d90 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -46,6 +46,8 @@ import org.apache.spark.sql.SQLContext
  * @param predictions Array of predictions associated to the boundaries at the same index.
  *                    Results of isotonic regression and therefore monotone.
  * @param isotonic indicates whether this is isotonic or antitonic.
+ *
+ * @since 1.3.0
  */
 @Experimental
 class IsotonicRegressionModel (
@@ -59,7 +61,10 @@ class IsotonicRegressionModel (
   assertOrdered(boundaries)
   assertOrdered(predictions)(predictionOrd)
 
-  /** A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter. */
+  /** A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter.
+   *
+   * @since 1.4.0
+   */
   def this(boundaries: java.lang.Iterable[Double],
       predictions: java.lang.Iterable[Double],
       isotonic: java.lang.Boolean) = {
@@ -83,6 +88,8 @@ class IsotonicRegressionModel (
    *
    * @param testData Features to be labeled.
    * @return Predicted labels.
+   *
+   * @since 1.3.0
    */
   def predict(testData: RDD[Double]): RDD[Double] = {
     testData.map(predict)
@@ -94,6 +101,8 @@ class IsotonicRegressionModel (
    *
    * @param testData Features to be labeled.
    * @return Predicted labels.
+   *
+   * @since 1.3.0
    */
   def predict(testData: JavaDoubleRDD): JavaDoubleRDD = {
     JavaDoubleRDD.fromRDD(predict(testData.rdd.retag.asInstanceOf[RDD[Double]]))
@@ -114,6 +123,8 @@ class IsotonicRegressionModel (
    *         3) If testData falls between two values in boundary array then prediction is treated
    *           as piecewise linear function and interpolated value is returned. In case there are
    *           multiple values with the same boundary then the same rules as in 2) are used.
+   *
+   * @since 1.3.0
    */
   def predict(testData: Double): Double = {
 
@@ -147,14 +158,21 @@ class IsotonicRegressionModel (
 
   /** A convenient method for boundaries called by the Python API. */
   private[mllib] def predictionVector: Vector = Vectors.dense(predictions)
-
+  /*
+   * @since 1.4.0
+   */
   override def save(sc: SparkContext, path: String): Unit = {
     IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic)
   }
 
+  /*
+   * @since 1.4.0
+   */
   override protected def formatVersion: String = "1.0"
 }
-
+/*
+ * @since 1.4.0
+ */
 object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
 
   import org.apache.spark.mllib.util.Loader._
@@ -200,6 +218,9 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
     }
   }
 
+  /*
+   * @since 1.4.0
+   */
   override def load(sc: SparkContext, path: String): IsotonicRegressionModel = {
     implicit val formats = DefaultFormats
     val (loadedClassName, version, metadata) = loadMetadata(sc, path)
@@ -237,6 +258,8 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
  *   Available from [[http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf]]
  *
  * @see [[http://en.wikipedia.org/wiki/Isotonic_regression Isotonic regression (Wikipedia)]]
+ *
+ * @since 1.3.0
  */
 @Experimental
 class IsotonicRegression private (private var isotonic: Boolean) extends Serializable {
@@ -245,6 +268,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    * Constructs IsotonicRegression instance with default parameter isotonic = true.
    *
    * @return New instance of IsotonicRegression.
+   *
+   * @since 1.3.0
    */
   def this() = this(true)
 
@@ -253,6 +278,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    *
    * @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence.
    * @return This instance of IsotonicRegression.
+   *
+   * @since 1.3.0
    */
   def setIsotonic(isotonic: Boolean): this.type = {
     this.isotonic = isotonic
@@ -268,6 +295,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    *              If multiple labels share the same feature value then they are ordered before
    *              the algorithm is executed.
    * @return Isotonic regression model.
+   *
+   * @since 1.3.0
    */
   def run(input: RDD[(Double, Double, Double)]): IsotonicRegressionModel = {
     val preprocessedInput = if (isotonic) {
@@ -293,6 +322,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    *              If multiple labels share the same feature value then they are ordered before
    *              the algorithm is executed.
    * @return Isotonic regression model.
+   *
+   * @since 1.3.0
    */
   def run(input: JavaRDD[(JDouble, JDouble, JDouble)]): IsotonicRegressionModel = {
     run(input.rdd.retag.asInstanceOf[RDD[(Double, Double, Double)]])
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
index d5fea822ad77..20967cc5ee8a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
@@ -28,9 +28,14 @@ import org.apache.spark.SparkException
  *
  * @param label Label for this data point.
  * @param features List of features for this data point.
+ *
+ * @since 0.8.0
  */
 @BeanInfo
 case class LabeledPoint(label: Double, features: Vector) {
+  /*
+   * @since 0.9.0
+   */
   override def toString: String = {
     s"($label,$features)"
   }
@@ -38,11 +43,15 @@ case class LabeledPoint(label: Double, features: Vector) {
 
 /**
  * Parser for [[org.apache.spark.mllib.regression.LabeledPoint]].
+ *
+ * @since 1.1.0
  */
 object LabeledPoint {
   /**
    * Parses a string resulted from `LabeledPoint#toString` into
    * an [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *
+   * @since 1.1.0
    */
   def parse(s: String): LabeledPoint = {
     if (s.startsWith("(")) {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index 4f482384f0f3..03eaf89f2f9d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -30,6 +30,8 @@ import org.apache.spark.rdd.RDD
  *
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
+ *
+ * @since 0.8.0
  */
 class LassoModel (
     override val weights: Vector,
@@ -44,6 +46,9 @@ class LassoModel (
     weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
   }
 
+  /*
+   * @since 1.3.0
+   */
   override def save(sc: SparkContext, path: String): Unit = {
     GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
   }
@@ -51,8 +56,14 @@ class LassoModel (
   override protected def formatVersion: String = "1.0"
 }
 
+/*
+ * @since 1.3.0
+ */
 object LassoModel extends Loader[LassoModel] {
 
+  /*
+   * @since 1.3.0
+   */
   override def load(sc: SparkContext, path: String): LassoModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     // Hard-code class name string in case it changes in the future
@@ -77,6 +88,8 @@ object LassoModel extends Loader[LassoModel] {
  * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
+ *
+ * @since 0.8.0
  */
 class LassoWithSGD private (
     private var stepSize: Double,
@@ -96,6 +109,8 @@ class LassoWithSGD private (
   /**
    * Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100,
    * regParam: 0.01, miniBatchFraction: 1.0}.
+   *
+   * @since 0.8.0
    */
   def this() = this(1.0, 100, 0.01, 1.0)
 
@@ -106,6 +121,8 @@ class LassoWithSGD private (
 
 /**
  * Top-level methods for calling Lasso.
+ *
+ * @since 0.8.0
  */
 object LassoWithSGD {
 
@@ -123,6 +140,8 @@ object LassoWithSGD {
    * @param miniBatchFraction Fraction of data to be used per iteration.
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -146,6 +165,8 @@ object LassoWithSGD {
    * @param stepSize Step size to be used for each iteration of gradient descent.
    * @param regParam Regularization parameter.
    * @param miniBatchFraction Fraction of data to be used per iteration.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -167,6 +188,8 @@ object LassoWithSGD {
    * @param regParam Regularization parameter.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LassoModel which has the weights and offset from training.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -185,6 +208,8 @@ object LassoWithSGD {
    *              matrix A as well as the corresponding right hand side label y
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LassoModel which has the weights and offset from training.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 9453c4f66c21..17719b17c80f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -30,6 +30,7 @@ import org.apache.spark.rdd.RDD
  *
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
+ * @since 0.8.0
  */
 class LinearRegressionModel (
     override val weights: Vector,
@@ -44,6 +45,9 @@ class LinearRegressionModel (
     weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
   }
 
+  /**
+   * @since 1.3.0
+   */
   override def save(sc: SparkContext, path: String): Unit = {
     GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
   }
@@ -51,8 +55,14 @@ class LinearRegressionModel (
   override protected def formatVersion: String = "1.0"
 }
 
+/**
+ * @since 1.3.0
+ */
 object LinearRegressionModel extends Loader[LinearRegressionModel] {
 
+  /**
+   * @since 1.3.0
+   */
   override def load(sc: SparkContext, path: String): LinearRegressionModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     // Hard-code class name string in case it changes in the future
@@ -78,6 +88,8 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] {
  * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
+ *
+ * @since 0.8.0
  */
 class LinearRegressionWithSGD private[mllib] (
     private var stepSize: Double,
@@ -95,6 +107,8 @@ class LinearRegressionWithSGD private[mllib] (
   /**
    * Construct a LinearRegression object with default parameters: {stepSize: 1.0,
    * numIterations: 100, miniBatchFraction: 1.0}.
+   *
+   * @since 0.8.0
    */
   def this() = this(1.0, 100, 1.0)
 
@@ -105,6 +119,8 @@ class LinearRegressionWithSGD private[mllib] (
 
 /**
  * Top-level methods for calling LinearRegression.
+ *
+ * @since 0.8.0
  */
 object LinearRegressionWithSGD {
 
@@ -121,6 +137,8 @@ object LinearRegressionWithSGD {
    * @param miniBatchFraction Fraction of data to be used per iteration.
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -142,6 +160,8 @@ object LinearRegressionWithSGD {
    * @param numIterations Number of iterations of gradient descent to run.
    * @param stepSize Step size to be used for each iteration of gradient descent.
    * @param miniBatchFraction Fraction of data to be used per iteration.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -161,6 +181,8 @@ object LinearRegressionWithSGD {
    * @param stepSize Step size to be used for each iteration of Gradient Descent.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LinearRegressionModel which has the weights and offset from training.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -178,6 +200,8 @@ object LinearRegressionWithSGD {
    *              matrix A as well as the corresponding right hand side label y
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LinearRegressionModel which has the weights and offset from training.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
index 214ac4d0ed7d..f4d27f52ff68 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
@@ -24,6 +24,9 @@ import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.rdd.RDD
 
+/*
+ * @since 0.8.0
+ */
 @Experimental
 trait RegressionModel extends Serializable {
   /**
@@ -31,6 +34,8 @@ trait RegressionModel extends Serializable {
    *
    * @param testData RDD representing data points to be predicted
    * @return RDD[Double] where each entry contains the corresponding prediction
+   *
+   * @since 0.8.0
    */
   def predict(testData: RDD[Vector]): RDD[Double]
 
@@ -39,6 +44,8 @@ trait RegressionModel extends Serializable {
    *
    * @param testData array representing a single data point
    * @return Double prediction from the trained model
+   *
+   * @since 0.8.0
    */
   def predict(testData: Vector): Double
 
@@ -46,6 +53,8 @@ trait RegressionModel extends Serializable {
    * Predict values for examples stored in a JavaRDD.
    * @param testData JavaRDD representing data points to be predicted
    * @return a JavaRDD[java.lang.Double] where each entry contains the corresponding prediction
+   *
+   * @since 1.0.0
    */
   def predict(testData: JavaRDD[Vector]): JavaRDD[java.lang.Double] =
     predict(testData.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 7d28ffad45c9..28f1d1b30fc4 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -31,6 +31,8 @@ import org.apache.spark.rdd.RDD
  *
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
+ *
+ * @since 0.8.0
  */
 class RidgeRegressionModel (
     override val weights: Vector,
@@ -38,6 +40,9 @@ class RidgeRegressionModel (
   extends GeneralizedLinearModel(weights, intercept)
   with RegressionModel with Serializable with Saveable with PMMLExportable {
 
+  /*
+   * @since 0.8.0
+   */
   override protected def predictPoint(
       dataMatrix: Vector,
       weightMatrix: Vector,
@@ -45,15 +50,27 @@ class RidgeRegressionModel (
     weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
   }
 
+  /*
+   * @since 1.3.0
+   */
   override def save(sc: SparkContext, path: String): Unit = {
     GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
   }
 
+  /*
+   * @since 1.3.0
+   */
   override protected def formatVersion: String = "1.0"
 }
 
+/*
+ * @since 1.3.0
+ */
 object RidgeRegressionModel extends Loader[RidgeRegressionModel] {
 
+  /*
+   * @since 1.3.0
+   */
   override def load(sc: SparkContext, path: String): RidgeRegressionModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     // Hard-code class name string in case it changes in the future
@@ -78,6 +95,8 @@ object RidgeRegressionModel extends Loader[RidgeRegressionModel] {
  * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
+ *
+ * @since 0.8.0
  */
 class RidgeRegressionWithSGD private (
     private var stepSize: Double,
@@ -98,9 +117,14 @@ class RidgeRegressionWithSGD private (
   /**
    * Construct a RidgeRegression object with default parameters: {stepSize: 1.0, numIterations: 100,
    * regParam: 0.01, miniBatchFraction: 1.0}.
+   *
+   * @since 0.8.0
    */
   def this() = this(1.0, 100, 0.01, 1.0)
 
+  /*
+  * @since 0.8.0
+  */
   override protected def createModel(weights: Vector, intercept: Double) = {
     new RidgeRegressionModel(weights, intercept)
   }
@@ -108,6 +132,8 @@ class RidgeRegressionWithSGD private (
 
 /**
  * Top-level methods for calling RidgeRegression.
+ *
+ * @since 0.8.0
  */
 object RidgeRegressionWithSGD {
 
@@ -124,6 +150,8 @@ object RidgeRegressionWithSGD {
    * @param miniBatchFraction Fraction of data to be used per iteration.
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -146,6 +174,8 @@ object RidgeRegressionWithSGD {
    * @param stepSize Step size to be used for each iteration of gradient descent.
    * @param regParam Regularization parameter.
    * @param miniBatchFraction Fraction of data to be used per iteration.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -166,6 +196,8 @@ object RidgeRegressionWithSGD {
    * @param regParam Regularization parameter.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a RidgeRegressionModel which has the weights and offset from training.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -183,6 +215,8 @@ object RidgeRegressionWithSGD {
    * @param input RDD of (label, array of features) pairs.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a RidgeRegressionModel which has the weights and offset from training.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
index 141052ba813e..0bd46a0337db 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
@@ -53,6 +53,8 @@ import org.apache.spark.streaming.dstream.DStream
  * It is also ok to call trainOn on different streams; this will update
  * the model using each of the different sources, in sequence.
  *
+ *
+ * @since 1.1.0
  */
 @DeveloperApi
 abstract class StreamingLinearAlgorithm[
@@ -65,7 +67,10 @@ abstract class StreamingLinearAlgorithm[
   /** The algorithm to use for updating. */
   protected val algorithm: A
 
-  /** Return the latest model. */
+  /** Return the latest model.
+  *
+  * @since 1.1.0
+  */
   def latestModel(): M = {
     model.get
   }
@@ -77,6 +82,8 @@ abstract class StreamingLinearAlgorithm[
    * batch of data from the stream.
    *
    * @param data DStream containing labeled data
+   *
+   * @since 1.1.0
    */
   def trainOn(data: DStream[LabeledPoint]): Unit = {
     if (model.isEmpty) {
@@ -95,7 +102,10 @@ abstract class StreamingLinearAlgorithm[
     }
   }
 
-  /** Java-friendly version of `trainOn`. */
+  /** Java-friendly version of `trainOn`.
+  *
+  * @since 1.1.0
+  */
   def trainOn(data: JavaDStream[LabeledPoint]): Unit = trainOn(data.dstream)
 
   /**
@@ -103,6 +113,8 @@ abstract class StreamingLinearAlgorithm[
    *
    * @param data DStream containing feature vectors
    * @return DStream containing predictions
+   *
+   * @since 1.1.0
    */
   def predictOn(data: DStream[Vector]): DStream[Double] = {
     if (model.isEmpty) {
@@ -111,7 +123,10 @@ abstract class StreamingLinearAlgorithm[
     data.map{x => model.get.predict(x)}
   }
 
-  /** Java-friendly version of `predictOn`. */
+  /** Java-friendly version of `predictOn`.
+  *
+  * @since 1.1.0
+  */
   def predictOn(data: JavaDStream[Vector]): JavaDStream[java.lang.Double] = {
     JavaDStream.fromDStream(predictOn(data.dstream).asInstanceOf[DStream[java.lang.Double]])
   }
@@ -121,6 +136,8 @@ abstract class StreamingLinearAlgorithm[
    * @param data DStream containing feature vectors
    * @tparam K key type
    * @return DStream containing the input keys and the predictions as values
+   *
+   * @since 1.1.0
    */
   def predictOnValues[K: ClassTag](data: DStream[(K, Vector)]): DStream[(K, Double)] = {
     if (model.isEmpty) {
@@ -130,7 +147,10 @@ abstract class StreamingLinearAlgorithm[
   }
 
 
-  /** Java-friendly version of `predictOnValues`. */
+  /** Java-friendly version of `predictOnValues`.
+  *
+  * @since 1.1.0
+  */
   def predictOnValues[K](data: JavaPairDStream[K, Vector]): JavaPairDStream[K, java.lang.Double] = {
     implicit val tag = fakeClassTag[K]
     JavaPairDStream.fromPairDStream(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
index c6d04464a12b..ecbda922f3a7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
@@ -40,6 +40,7 @@ import org.apache.spark.mllib.linalg.Vector
  *    .setInitialWeights(Vectors.dense(...))
  *    .trainOn(DStream)
  *
+ * @since 1.1.0
  */
 @Experimental
 class StreamingLinearRegressionWithSGD private[mllib] (
@@ -54,6 +55,8 @@ class StreamingLinearRegressionWithSGD private[mllib] (
    * {stepSize: 0.1, numIterations: 50, miniBatchFraction: 1.0}.
    * Initial weights must be set before using trainOn or predictOn
    * (see `StreamingLinearAlgorithm`)
+   *
+   * @since 1.1.0
    */
   def this() = this(0.1, 50, 1.0)
 
@@ -61,31 +64,46 @@ class StreamingLinearRegressionWithSGD private[mllib] (
 
   protected var model: Option[LinearRegressionModel] = None
 
-  /** Set the step size for gradient descent. Default: 0.1. */
+  /** Set the step size for gradient descent. Default: 0.1.
+  *
+  * @since 1.1.0
+  */
   def setStepSize(stepSize: Double): this.type = {
     this.algorithm.optimizer.setStepSize(stepSize)
     this
   }
 
-  /** Set the number of iterations of gradient descent to run per update. Default: 50. */
+  /** Set the number of iterations of gradient descent to run per update. Default: 50.
+  *
+  * @since 1.1.0
+  */
   def setNumIterations(numIterations: Int): this.type = {
     this.algorithm.optimizer.setNumIterations(numIterations)
     this
   }
 
-  /** Set the fraction of each batch to use for updates. Default: 1.0. */
+  /** Set the fraction of each batch to use for updates. Default: 1.0.
+  *
+  * @since 1.1.0
+  */
   def setMiniBatchFraction(miniBatchFraction: Double): this.type = {
     this.algorithm.optimizer.setMiniBatchFraction(miniBatchFraction)
     this
   }
 
-  /** Set the initial weights. */
+  /** Set the initial weights.
+  *
+  * @since 1.1.0
+  */
   def setInitialWeights(initialWeights: Vector): this.type = {
     this.model = Some(algorithm.createModel(initialWeights, 0.0))
     this
   }
 
-  /** Set the convergence tolerance. */
+  /** Set the convergence tolerance.
+  *
+  * @since 1.5.0
+  */
   def setConvergenceTol(tolerance: Double): this.type = {
     this.algorithm.optimizer.setConvergenceTol(tolerance)
     this

From 1a0365fca17c7e1d6dfd754e58d6f0fe6992e793 Mon Sep 17 00:00:00 2001
From: Prayag Chandran <prayagchandran@gmail.com>
Date: Sun, 19 Jul 2015 18:11:37 -0400
Subject: [PATCH 2/4] Reformating and adding a few more tags

---
 .../GeneralizedLinearAlgorithm.scala          | 11 ++++-
 .../mllib/regression/IsotonicRegression.scala |  4 +-
 .../mllib/regression/LinearRegression.scala   |  1 +
 .../regression/StreamingLinearAlgorithm.scala | 36 ++++++++-------
 .../StreamingLinearRegressionWithSGD.scala    | 45 ++++++++++---------
 5 files changed, 58 insertions(+), 39 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 7927c2a22a74..99247bd0867a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -47,6 +47,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
    * @param dataMatrix Row vector containing the features for this data point
    * @param weightMatrix Column vector containing the weights of the model
    * @param intercept Intercept of the model.
+   *
+   * @since 0.8.0
    */
   protected def predictPoint(dataMatrix: Vector, weightMatrix: Vector, intercept: Double): Double
 
@@ -105,7 +107,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
 
   protected val validators: Seq[RDD[LabeledPoint] => Boolean] = List()
 
-  /** The optimizer to solve the problem.
+  /**
+   * The optimizer to solve the problem.
    *
    * @since 1.0.0
    */
@@ -161,7 +164,11 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
     this
   }
 
-  /* Create a model given the weights and intercept */
+  /**
+   * Create a model given the weights and intercept
+   *
+   * @since 0.8.0
+   */
   protected def createModel(weights: Vector, intercept: Double): M
 
   /**
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index 242f21803d90..1458b1ec3e06 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -61,7 +61,8 @@ class IsotonicRegressionModel (
   assertOrdered(boundaries)
   assertOrdered(predictions)(predictionOrd)
 
-  /** A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter.
+  /**
+   * A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter.
    *
    * @since 1.4.0
    */
@@ -158,6 +159,7 @@ class IsotonicRegressionModel (
 
   /** A convenient method for boundaries called by the Python API. */
   private[mllib] def predictionVector: Vector = Vectors.dense(predictions)
+
   /*
    * @since 1.4.0
    */
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index 17719b17c80f..a1537f51803c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -30,6 +30,7 @@ import org.apache.spark.rdd.RDD
  *
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
+ *
  * @since 0.8.0
  */
 class LinearRegressionModel (
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
index 0bd46a0337db..0609fe10f665 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
@@ -67,10 +67,11 @@ abstract class StreamingLinearAlgorithm[
   /** The algorithm to use for updating. */
   protected val algorithm: A
 
-  /** Return the latest model.
-  *
-  * @since 1.1.0
-  */
+  /**
+   * Return the latest model.
+   *
+   * @since 1.1.0
+   */
   def latestModel(): M = {
     model.get
   }
@@ -102,10 +103,11 @@ abstract class StreamingLinearAlgorithm[
     }
   }
 
-  /** Java-friendly version of `trainOn`.
-  *
-  * @since 1.1.0
-  */
+  /**
+   * Java-friendly version of `trainOn`.
+   *
+   * @since 1.1.0
+   */
   def trainOn(data: JavaDStream[LabeledPoint]): Unit = trainOn(data.dstream)
 
   /**
@@ -123,10 +125,11 @@ abstract class StreamingLinearAlgorithm[
     data.map{x => model.get.predict(x)}
   }
 
-  /** Java-friendly version of `predictOn`.
-  *
-  * @since 1.1.0
-  */
+  /**
+   * Java-friendly version of `predictOn`.
+   *
+   * @since 1.1.0
+   */
   def predictOn(data: JavaDStream[Vector]): JavaDStream[java.lang.Double] = {
     JavaDStream.fromDStream(predictOn(data.dstream).asInstanceOf[DStream[java.lang.Double]])
   }
@@ -147,10 +150,11 @@ abstract class StreamingLinearAlgorithm[
   }
 
 
-  /** Java-friendly version of `predictOnValues`.
-  *
-  * @since 1.1.0
-  */
+  /**
+   * Java-friendly version of `predictOnValues`.
+   *
+   * @since 1.1.0
+   */
   def predictOnValues[K](data: JavaPairDStream[K, Vector]): JavaPairDStream[K, java.lang.Double] = {
     implicit val tag = fakeClassTag[K]
     JavaPairDStream.fromPairDStream(
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
index ecbda922f3a7..45741bc4beec 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
@@ -64,46 +64,51 @@ class StreamingLinearRegressionWithSGD private[mllib] (
 
   protected var model: Option[LinearRegressionModel] = None
 
-  /** Set the step size for gradient descent. Default: 0.1.
-  *
-  * @since 1.1.0
-  */
+  /**
+   * Set the step size for gradient descent. Default: 0.1.
+   *
+   * @since 1.1.0
+   */
   def setStepSize(stepSize: Double): this.type = {
     this.algorithm.optimizer.setStepSize(stepSize)
     this
   }
 
-  /** Set the number of iterations of gradient descent to run per update. Default: 50.
-  *
-  * @since 1.1.0
-  */
+  /**
+   * Set the number of iterations of gradient descent to run per update. Default: 50.
+   *
+   * @since 1.1.0
+   */
   def setNumIterations(numIterations: Int): this.type = {
     this.algorithm.optimizer.setNumIterations(numIterations)
     this
   }
 
-  /** Set the fraction of each batch to use for updates. Default: 1.0.
-  *
-  * @since 1.1.0
-  */
+  /**
+   * Set the fraction of each batch to use for updates. Default: 1.0.
+   *
+   * @since 1.1.0
+   */
   def setMiniBatchFraction(miniBatchFraction: Double): this.type = {
     this.algorithm.optimizer.setMiniBatchFraction(miniBatchFraction)
     this
   }
 
-  /** Set the initial weights.
-  *
-  * @since 1.1.0
-  */
+  /**
+   * Set the initial weights.
+   *
+   * @since 1.1.0
+   */
   def setInitialWeights(initialWeights: Vector): this.type = {
     this.model = Some(algorithm.createModel(initialWeights, 0.0))
     this
   }
 
-  /** Set the convergence tolerance.
-  *
-  * @since 1.5.0
-  */
+  /**
+   * Set the convergence tolerance.
+   *
+   * @since 1.5.0
+   */
   def setConvergenceTol(tolerance: Double): this.type = {
     this.algorithm.optimizer.setConvergenceTol(tolerance)
     this

From 6c6d58477429a433871bbda32957f0a6802c03fa Mon Sep 17 00:00:00 2001
From: Prayag Chandran <prayagchandran@gmail.com>
Date: Fri, 14 Aug 2015 14:26:41 -0400
Subject: [PATCH 3/4] Corrected a few tags. Removed few unnecessary tags

---
 .../regression/GeneralizedLinearAlgorithm.scala    | 12 +++---------
 .../mllib/regression/IsotonicRegression.scala      | 14 +-------------
 .../spark/mllib/regression/LabeledPoint.scala      |  3 ---
 .../org/apache/spark/mllib/regression/Lasso.scala  |  6 +-----
 .../spark/mllib/regression/LinearRegression.scala  |  6 +-----
 .../spark/mllib/regression/RegressionModel.scala   |  4 ++--
 .../spark/mllib/regression/RidgeRegression.scala   | 13 -------------
 .../regression/StreamingLinearAlgorithm.scala      |  6 +++---
 .../StreamingLinearRegressionWithSGD.scala         | 14 --------------
 9 files changed, 11 insertions(+), 67 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
index 99247bd0867a..2980b94de35b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -47,8 +47,6 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
    * @param dataMatrix Row vector containing the features for this data point
    * @param weightMatrix Column vector containing the weights of the model
    * @param intercept Intercept of the model.
-   *
-   * @since 0.8.0
    */
   protected def predictPoint(dataMatrix: Vector, weightMatrix: Vector, intercept: Double): Double
 
@@ -58,7 +56,7 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
    * @param testData RDD representing data points to be predicted
    * @return RDD[Double] where each entry contains the corresponding prediction
    *
-   * @since 0.8.0
+   * @since 1.0.0
    */
   def predict(testData: RDD[Vector]): RDD[Double] = {
     // A small optimization to avoid serializing the entire model. Only the weightsMatrix
@@ -78,7 +76,7 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
    * @param testData array representing a single data point
    * @return Double prediction from the trained model
    *
-   * @since 0.8.0
+   * @since 1.0.0
    */
   def predict(testData: Vector): Double = {
     predictPoint(testData, weights, intercept)
@@ -86,8 +84,6 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
 
   /**
    * Print a summary of the model.
-   *
-   * @since 1.2.0
    */
   override def toString: String = {
     s"${this.getClass.getName}: intercept = ${intercept}, numFeatures = ${weights.size}"
@@ -166,8 +162,6 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
 
   /**
    * Create a model given the weights and intercept
-   *
-   * @since 0.8.0
    */
   protected def createModel(weights: Vector, intercept: Double): M
 
@@ -237,7 +231,7 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
    * Run the algorithm with the configured parameters on an input RDD
    * of LabeledPoint entries starting from the initial weights provided.
    *
-   * @since 0.8.0
+   * @since 1.0.0
    */
   def run(input: RDD[LabeledPoint], initialWeights: Vector): M = {
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index 1458b1ec3e06..a5f8755b17b3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -167,11 +167,9 @@ class IsotonicRegressionModel (
     IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic)
   }
 
-  /*
-   * @since 1.4.0
-   */
   override protected def formatVersion: String = "1.0"
 }
+
 /*
  * @since 1.4.0
  */
@@ -260,8 +258,6 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
  *   Available from [[http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf]]
  *
  * @see [[http://en.wikipedia.org/wiki/Isotonic_regression Isotonic regression (Wikipedia)]]
- *
- * @since 1.3.0
  */
 @Experimental
 class IsotonicRegression private (private var isotonic: Boolean) extends Serializable {
@@ -270,8 +266,6 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    * Constructs IsotonicRegression instance with default parameter isotonic = true.
    *
    * @return New instance of IsotonicRegression.
-   *
-   * @since 1.3.0
    */
   def this() = this(true)
 
@@ -280,8 +274,6 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    *
    * @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence.
    * @return This instance of IsotonicRegression.
-   *
-   * @since 1.3.0
    */
   def setIsotonic(isotonic: Boolean): this.type = {
     this.isotonic = isotonic
@@ -297,8 +289,6 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    *              If multiple labels share the same feature value then they are ordered before
    *              the algorithm is executed.
    * @return Isotonic regression model.
-   *
-   * @since 1.3.0
    */
   def run(input: RDD[(Double, Double, Double)]): IsotonicRegressionModel = {
     val preprocessedInput = if (isotonic) {
@@ -324,8 +314,6 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    *              If multiple labels share the same feature value then they are ordered before
    *              the algorithm is executed.
    * @return Isotonic regression model.
-   *
-   * @since 1.3.0
    */
   def run(input: JavaRDD[(JDouble, JDouble, JDouble)]): IsotonicRegressionModel = {
     run(input.rdd.retag.asInstanceOf[RDD[(Double, Double, Double)]])
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
index 20967cc5ee8a..8b51011eeb29 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
@@ -33,9 +33,6 @@ import org.apache.spark.SparkException
  */
 @BeanInfo
 case class LabeledPoint(label: Double, features: Vector) {
-  /*
-   * @since 0.9.0
-   */
   override def toString: String = {
     s"($label,$features)"
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index 03eaf89f2f9d..b9cbe693ec25 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -88,8 +88,6 @@ object LassoModel extends Loader[LassoModel] {
  * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
- *
- * @since 0.8.0
  */
 class LassoWithSGD private (
     private var stepSize: Double,
@@ -109,8 +107,6 @@ class LassoWithSGD private (
   /**
    * Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100,
    * regParam: 0.01, miniBatchFraction: 1.0}.
-   *
-   * @since 0.8.0
    */
   def this() = this(1.0, 100, 0.01, 1.0)
 
@@ -141,7 +137,7 @@ object LassoWithSGD {
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
    *
-   * @since 0.8.0
+   * @since 1.0.0
    */
   def train(
       input: RDD[LabeledPoint],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
index a1537f51803c..fb5c220daaed 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala
@@ -89,8 +89,6 @@ object LinearRegressionModel extends Loader[LinearRegressionModel] {
  * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
- *
- * @since 0.8.0
  */
 class LinearRegressionWithSGD private[mllib] (
     private var stepSize: Double,
@@ -108,8 +106,6 @@ class LinearRegressionWithSGD private[mllib] (
   /**
    * Construct a LinearRegression object with default parameters: {stepSize: 1.0,
    * numIterations: 100, miniBatchFraction: 1.0}.
-   *
-   * @since 0.8.0
    */
   def this() = this(1.0, 100, 1.0)
 
@@ -139,7 +135,7 @@ object LinearRegressionWithSGD {
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
    *
-   * @since 0.8.0
+   * @since 1.0.0
    */
   def train(
       input: RDD[LabeledPoint],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
index f4d27f52ff68..69aac4ab1ba3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
@@ -35,7 +35,7 @@ trait RegressionModel extends Serializable {
    * @param testData RDD representing data points to be predicted
    * @return RDD[Double] where each entry contains the corresponding prediction
    *
-   * @since 0.8.0
+   * @since 1.0.0
    */
   def predict(testData: RDD[Vector]): RDD[Double]
 
@@ -45,7 +45,7 @@ trait RegressionModel extends Serializable {
    * @param testData array representing a single data point
    * @return Double prediction from the trained model
    *
-   * @since 0.8.0
+   * @since 1.0.0
    */
   def predict(testData: Vector): Double
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index 28f1d1b30fc4..c6d2e71cde44 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -40,9 +40,6 @@ class RidgeRegressionModel (
   extends GeneralizedLinearModel(weights, intercept)
   with RegressionModel with Serializable with Saveable with PMMLExportable {
 
-  /*
-   * @since 0.8.0
-   */
   override protected def predictPoint(
       dataMatrix: Vector,
       weightMatrix: Vector,
@@ -57,9 +54,6 @@ class RidgeRegressionModel (
     GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
   }
 
-  /*
-   * @since 1.3.0
-   */
   override protected def formatVersion: String = "1.0"
 }
 
@@ -95,8 +89,6 @@ object RidgeRegressionModel extends Loader[RidgeRegressionModel] {
  * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
- *
- * @since 0.8.0
  */
 class RidgeRegressionWithSGD private (
     private var stepSize: Double,
@@ -117,14 +109,9 @@ class RidgeRegressionWithSGD private (
   /**
    * Construct a RidgeRegression object with default parameters: {stepSize: 1.0, numIterations: 100,
    * regParam: 0.01, miniBatchFraction: 1.0}.
-   *
-   * @since 0.8.0
    */
   def this() = this(1.0, 100, 0.01, 1.0)
 
-  /*
-  * @since 0.8.0
-  */
   override protected def createModel(weights: Vector, intercept: Double) = {
     new RidgeRegressionModel(weights, intercept)
   }
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
index 0609fe10f665..a2ab95c47476 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearAlgorithm.scala
@@ -84,7 +84,7 @@ abstract class StreamingLinearAlgorithm[
    *
    * @param data DStream containing labeled data
    *
-   * @since 1.1.0
+   * @since 1.3.0
    */
   def trainOn(data: DStream[LabeledPoint]): Unit = {
     if (model.isEmpty) {
@@ -106,7 +106,7 @@ abstract class StreamingLinearAlgorithm[
   /**
    * Java-friendly version of `trainOn`.
    *
-   * @since 1.1.0
+   * @since 1.3.0
    */
   def trainOn(data: JavaDStream[LabeledPoint]): Unit = trainOn(data.dstream)
 
@@ -153,7 +153,7 @@ abstract class StreamingLinearAlgorithm[
   /**
    * Java-friendly version of `predictOnValues`.
    *
-   * @since 1.1.0
+   * @since 1.3.0
    */
   def predictOnValues[K](data: JavaPairDStream[K, Vector]): JavaPairDStream[K, java.lang.Double] = {
     implicit val tag = fakeClassTag[K]
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
index 45741bc4beec..537a05274eec 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/StreamingLinearRegressionWithSGD.scala
@@ -39,8 +39,6 @@ import org.apache.spark.mllib.linalg.Vector
  *    .setNumIterations(10)
  *    .setInitialWeights(Vectors.dense(...))
  *    .trainOn(DStream)
- *
- * @since 1.1.0
  */
 @Experimental
 class StreamingLinearRegressionWithSGD private[mllib] (
@@ -55,8 +53,6 @@ class StreamingLinearRegressionWithSGD private[mllib] (
    * {stepSize: 0.1, numIterations: 50, miniBatchFraction: 1.0}.
    * Initial weights must be set before using trainOn or predictOn
    * (see `StreamingLinearAlgorithm`)
-   *
-   * @since 1.1.0
    */
   def this() = this(0.1, 50, 1.0)
 
@@ -66,8 +62,6 @@ class StreamingLinearRegressionWithSGD private[mllib] (
 
   /**
    * Set the step size for gradient descent. Default: 0.1.
-   *
-   * @since 1.1.0
    */
   def setStepSize(stepSize: Double): this.type = {
     this.algorithm.optimizer.setStepSize(stepSize)
@@ -76,8 +70,6 @@ class StreamingLinearRegressionWithSGD private[mllib] (
 
   /**
    * Set the number of iterations of gradient descent to run per update. Default: 50.
-   *
-   * @since 1.1.0
    */
   def setNumIterations(numIterations: Int): this.type = {
     this.algorithm.optimizer.setNumIterations(numIterations)
@@ -86,8 +78,6 @@ class StreamingLinearRegressionWithSGD private[mllib] (
 
   /**
    * Set the fraction of each batch to use for updates. Default: 1.0.
-   *
-   * @since 1.1.0
    */
   def setMiniBatchFraction(miniBatchFraction: Double): this.type = {
     this.algorithm.optimizer.setMiniBatchFraction(miniBatchFraction)
@@ -96,8 +86,6 @@ class StreamingLinearRegressionWithSGD private[mllib] (
 
   /**
    * Set the initial weights.
-   *
-   * @since 1.1.0
    */
   def setInitialWeights(initialWeights: Vector): this.type = {
     this.model = Some(algorithm.createModel(initialWeights, 0.0))
@@ -106,8 +94,6 @@ class StreamingLinearRegressionWithSGD private[mllib] (
 
   /**
    * Set the convergence tolerance.
-   *
-   * @since 1.5.0
    */
   def setConvergenceTol(tolerance: Double): this.type = {
     this.algorithm.optimizer.setConvergenceTol(tolerance)

From fa4dda2b01c04c91c65c5a1059020a180778c5cc Mon Sep 17 00:00:00 2001
From: Prayag Chandran <prayagchandran@gmail.com>
Date: Fri, 14 Aug 2015 15:06:25 -0400
Subject: [PATCH 4/4] Re-formatting

---
 .../apache/spark/mllib/regression/IsotonicRegression.scala  | 6 +++---
 .../scala/org/apache/spark/mllib/regression/Lasso.scala     | 6 +++---
 .../org/apache/spark/mllib/regression/RegressionModel.scala | 2 +-
 .../org/apache/spark/mllib/regression/RidgeRegression.scala | 6 +++---
 4 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
index a5f8755b17b3..8995591d9e8c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -160,7 +160,7 @@ class IsotonicRegressionModel (
   /** A convenient method for boundaries called by the Python API. */
   private[mllib] def predictionVector: Vector = Vectors.dense(predictions)
 
-  /*
+  /**
    * @since 1.4.0
    */
   override def save(sc: SparkContext, path: String): Unit = {
@@ -170,7 +170,7 @@ class IsotonicRegressionModel (
   override protected def formatVersion: String = "1.0"
 }
 
-/*
+/**
  * @since 1.4.0
  */
 object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
@@ -218,7 +218,7 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
     }
   }
 
-  /*
+  /**
    * @since 1.4.0
    */
   override def load(sc: SparkContext, path: String): IsotonicRegressionModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
index b9cbe693ec25..03eb589b05a0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -46,7 +46,7 @@ class LassoModel (
     weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
   }
 
-  /*
+  /**
    * @since 1.3.0
    */
   override def save(sc: SparkContext, path: String): Unit = {
@@ -56,12 +56,12 @@ class LassoModel (
   override protected def formatVersion: String = "1.0"
 }
 
-/*
+/**
  * @since 1.3.0
  */
 object LassoModel extends Loader[LassoModel] {
 
-  /*
+  /**
    * @since 1.3.0
    */
   override def load(sc: SparkContext, path: String): LassoModel = {
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
index 69aac4ab1ba3..b097fd38fdd8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RegressionModel.scala
@@ -24,7 +24,7 @@ import org.apache.spark.api.java.JavaRDD
 import org.apache.spark.mllib.linalg.Vector
 import org.apache.spark.rdd.RDD
 
-/*
+/**
  * @since 0.8.0
  */
 @Experimental
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
index c6d2e71cde44..5bced6b4b7b5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala
@@ -47,7 +47,7 @@ class RidgeRegressionModel (
     weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
   }
 
-  /*
+  /**
    * @since 1.3.0
    */
   override def save(sc: SparkContext, path: String): Unit = {
@@ -57,12 +57,12 @@ class RidgeRegressionModel (
   override protected def formatVersion: String = "1.0"
 }
 
-/*
+/**
  * @since 1.3.0
  */
 object RidgeRegressionModel extends Loader[RidgeRegressionModel] {
 
-  /*
+  /**
    * @since 1.3.0
    */
   override def load(sc: SparkContext, path: String): RidgeRegressionModel = {