apache · prayagchandran · Jul 19, 2015 · Jul 19, 2015 · Aug 14, 2015 · Aug 14, 2015
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/GeneralizedLinearAlgorithm.scala
@@ -34,6 +34,8 @@ import org.apache.spark.storage.StorageLevel
  *
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
+ *
+ * @since 0.8.0
  */
 @DeveloperApi
 abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double)
@@ -53,6 +55,8 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
    *
    * @param testData RDD representing data points to be predicted
    * @return RDD[Double] where each entry contains the corresponding prediction
+   *
+   * @since 0.8.0
    */
   def predict(testData: RDD[Vector]): RDD[Double] = {
     // A small optimization to avoid serializing the entire model. Only the weightsMatrix
@@ -71,13 +75,17 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
    *
    * @param testData array representing a single data point
    * @return Double prediction from the trained model
+   *
+   * @since 0.8.0
    */
   def predict(testData: Vector): Double = {
     predictPoint(testData, weights, intercept)
   }
 
   /**
    * Print a summary of the model.
+   *
+   * @since 1.2.0
    */
   override def toString: String = {
     s"${this.getClass.getName}: intercept = ${intercept}, numFeatures = ${weights.size}"
@@ -88,14 +96,19 @@ abstract class GeneralizedLinearModel(val weights: Vector, val intercept: Double
  * :: DeveloperApi ::
  * GeneralizedLinearAlgorithm implements methods to train a Generalized Linear Model (GLM).
  * This class should be extended with an Optimizer to create a new GLM.
+ *
+ * @since 0.8.0
  */
 @DeveloperApi
 abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
   extends Logging with Serializable {
 
   protected val validators: Seq[RDD[LabeledPoint] => Boolean] = List()
 
-  /** The optimizer to solve the problem. */
+  /** The optimizer to solve the problem.
+   *
+   * @since 1.0.0
+   */
   def optimizer: Optimizer
 
   /** Whether to add intercept (default: false). */
@@ -130,6 +143,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
 
   /**
    * The dimension of training features.
+   *
+   * @since 1.4.0
    */
   def getNumFeatures: Int = this.numFeatures
 
@@ -146,19 +161,21 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
     this
   }
 
-  /**
-   * Create a model given the weights and intercept
-   */
+  /* Create a model given the weights and intercept */
   protected def createModel(weights: Vector, intercept: Double): M
 
   /**
    * Get if the algorithm uses addIntercept
+   *
+   * @since 1.4.0
    */
   def isAddIntercept: Boolean = this.addIntercept
 
   /**
    * Set if the algorithm should add an intercept. Default false.
    * We set the default to false because adding the intercept will cause memory allocation.
+   *
+   * @since 0.8.0
    */
   def setIntercept(addIntercept: Boolean): this.type = {
     this.addIntercept = addIntercept
@@ -167,6 +184,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
 
   /**
    * Set if the algorithm should validate data before training. Default true.
+   *
+   * @since 0.8.0
    */
   def setValidateData(validateData: Boolean): this.type = {
     this.validateData = validateData
@@ -176,6 +195,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
   /**
    * Run the algorithm with the configured parameters on an input
    * RDD of LabeledPoint entries.
+   *
+   * @since 0.8.0
    */
   def run(input: RDD[LabeledPoint]): M = {
     if (numFeatures < 0) {
@@ -208,6 +229,8 @@ abstract class GeneralizedLinearAlgorithm[M <: GeneralizedLinearModel]
   /**
    * Run the algorithm with the configured parameters on an input RDD
    * of LabeledPoint entries starting from the initial weights provided.
+   *
+   * @since 0.8.0
    */
   def run(input: RDD[LabeledPoint], initialWeights: Vector): M = {
 

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala
@@ -46,6 +46,8 @@ import org.apache.spark.sql.SQLContext
  * @param predictions Array of predictions associated to the boundaries at the same index.
  *                    Results of isotonic regression and therefore monotone.
  * @param isotonic indicates whether this is isotonic or antitonic.
+ *
+ * @since 1.3.0
  */
 @Experimental
 class IsotonicRegressionModel (
@@ -59,7 +61,10 @@ class IsotonicRegressionModel (
   assertOrdered(boundaries)
   assertOrdered(predictions)(predictionOrd)
 
-  /** A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter. */
+  /** A Java-friendly constructor that takes two Iterable parameters and one Boolean parameter.
+   *
+   * @since 1.4.0
+   */
   def this(boundaries: java.lang.Iterable[Double],
       predictions: java.lang.Iterable[Double],
       isotonic: java.lang.Boolean) = {
@@ -83,6 +88,8 @@ class IsotonicRegressionModel (
    *
    * @param testData Features to be labeled.
    * @return Predicted labels.
+   *
+   * @since 1.3.0
    */
   def predict(testData: RDD[Double]): RDD[Double] = {
     testData.map(predict)
@@ -94,6 +101,8 @@ class IsotonicRegressionModel (
    *
    * @param testData Features to be labeled.
    * @return Predicted labels.
+   *
+   * @since 1.3.0
    */
   def predict(testData: JavaDoubleRDD): JavaDoubleRDD = {
     JavaDoubleRDD.fromRDD(predict(testData.rdd.retag.asInstanceOf[RDD[Double]]))
@@ -114,6 +123,8 @@ class IsotonicRegressionModel (
    *         3) If testData falls between two values in boundary array then prediction is treated
    *           as piecewise linear function and interpolated value is returned. In case there are
    *           multiple values with the same boundary then the same rules as in 2) are used.
+   *
+   * @since 1.3.0
    */
   def predict(testData: Double): Double = {
 
@@ -147,14 +158,21 @@ class IsotonicRegressionModel (
 
   /** A convenient method for boundaries called by the Python API. */
   private[mllib] def predictionVector: Vector = Vectors.dense(predictions)
-
+  /*
+   * @since 1.4.0
+   */
   override def save(sc: SparkContext, path: String): Unit = {
     IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic)
   }
 
+  /*
+   * @since 1.4.0
+   */
   override protected def formatVersion: String = "1.0"
 }
-
+/*
+ * @since 1.4.0
+ */
 object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
 
   import org.apache.spark.mllib.util.Loader._
@@ -200,6 +218,9 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
     }
   }
 
+  /*
+   * @since 1.4.0
+   */
   override def load(sc: SparkContext, path: String): IsotonicRegressionModel = {
     implicit val formats = DefaultFormats
     val (loadedClassName, version, metadata) = loadMetadata(sc, path)
@@ -237,6 +258,8 @@ object IsotonicRegressionModel extends Loader[IsotonicRegressionModel] {
  *   Available from [[http://softlib.rice.edu/pub/CRPC-TRs/reports/CRPC-TR96640.pdf]]
  *
  * @see [[http://en.wikipedia.org/wiki/Isotonic_regression Isotonic regression (Wikipedia)]]
+ *
+ * @since 1.3.0
  */
 @Experimental
 class IsotonicRegression private (private var isotonic: Boolean) extends Serializable {
@@ -245,6 +268,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    * Constructs IsotonicRegression instance with default parameter isotonic = true.
    *
    * @return New instance of IsotonicRegression.
+   *
+   * @since 1.3.0
    */
   def this() = this(true)
 
@@ -253,6 +278,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    *
    * @param isotonic Isotonic (increasing) or antitonic (decreasing) sequence.
    * @return This instance of IsotonicRegression.
+   *
+   * @since 1.3.0
    */
   def setIsotonic(isotonic: Boolean): this.type = {
     this.isotonic = isotonic
@@ -268,6 +295,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    *              If multiple labels share the same feature value then they are ordered before
    *              the algorithm is executed.
    * @return Isotonic regression model.
+   *
+   * @since 1.3.0
    */
   def run(input: RDD[(Double, Double, Double)]): IsotonicRegressionModel = {
     val preprocessedInput = if (isotonic) {
@@ -293,6 +322,8 @@ class IsotonicRegression private (private var isotonic: Boolean) extends Seriali
    *              If multiple labels share the same feature value then they are ordered before
    *              the algorithm is executed.
    * @return Isotonic regression model.
+   *
+   * @since 1.3.0
    */
   def run(input: JavaRDD[(JDouble, JDouble, JDouble)]): IsotonicRegressionModel = {
     run(input.rdd.retag.asInstanceOf[RDD[(Double, Double, Double)]])

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LabeledPoint.scala
@@ -28,21 +28,30 @@ import org.apache.spark.SparkException
  *
  * @param label Label for this data point.
  * @param features List of features for this data point.
+ *
+ * @since 0.8.0
  */
 @BeanInfo
 case class LabeledPoint(label: Double, features: Vector) {
+  /*
+   * @since 0.9.0
+   */
   override def toString: String = {
     s"($label,$features)"
   }
 }
 
 /**
  * Parser for [[org.apache.spark.mllib.regression.LabeledPoint]].
+ *
+ * @since 1.1.0
  */
 object LabeledPoint {
   /**
    * Parses a string resulted from `LabeledPoint#toString` into
    * an [[org.apache.spark.mllib.regression.LabeledPoint]].
+   *
+   * @since 1.1.0
    */
   def parse(s: String): LabeledPoint = {
     if (s.startsWith("(")) {

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala
@@ -30,6 +30,8 @@ import org.apache.spark.rdd.RDD
  *
  * @param weights Weights computed for every feature.
  * @param intercept Intercept computed for this model.
+ *
+ * @since 0.8.0
  */
 class LassoModel (
     override val weights: Vector,
@@ -44,15 +46,24 @@ class LassoModel (
     weightMatrix.toBreeze.dot(dataMatrix.toBreeze) + intercept
   }
 
+  /*
+   * @since 1.3.0
+   */
   override def save(sc: SparkContext, path: String): Unit = {
     GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept)
   }
 
   override protected def formatVersion: String = "1.0"
 }
 
+/*
+ * @since 1.3.0
+ */
 object LassoModel extends Loader[LassoModel] {
 
+  /*
+   * @since 1.3.0
+   */
   override def load(sc: SparkContext, path: String): LassoModel = {
     val (loadedClassName, version, metadata) = Loader.loadMetadata(sc, path)
     // Hard-code class name string in case it changes in the future
@@ -77,6 +88,8 @@ object LassoModel extends Loader[LassoModel] {
  * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
  * its corresponding right hand side label y.
  * See also the documentation for the precise formulation.
+ *
+ * @since 0.8.0
  */
 class LassoWithSGD private (
     private var stepSize: Double,
@@ -96,6 +109,8 @@ class LassoWithSGD private (
   /**
    * Construct a Lasso object with default parameters: {stepSize: 1.0, numIterations: 100,
    * regParam: 0.01, miniBatchFraction: 1.0}.
+   *
+   * @since 0.8.0
    */
   def this() = this(1.0, 100, 0.01, 1.0)
 
@@ -106,6 +121,8 @@ class LassoWithSGD private (
 
 /**
  * Top-level methods for calling Lasso.
+ *
+ * @since 0.8.0
  */
 object LassoWithSGD {
 
@@ -123,6 +140,8 @@ object LassoWithSGD {
    * @param miniBatchFraction Fraction of data to be used per iteration.
    * @param initialWeights Initial set of weights to be used. Array should be equal in size to
    *        the number of features in the data.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -146,6 +165,8 @@ object LassoWithSGD {
    * @param stepSize Step size to be used for each iteration of gradient descent.
    * @param regParam Regularization parameter.
    * @param miniBatchFraction Fraction of data to be used per iteration.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -167,6 +188,8 @@ object LassoWithSGD {
    * @param regParam Regularization parameter.
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LassoModel which has the weights and offset from training.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],
@@ -185,6 +208,8 @@ object LassoWithSGD {
    *              matrix A as well as the corresponding right hand side label y
    * @param numIterations Number of iterations of gradient descent to run.
    * @return a LassoModel which has the weights and offset from training.
+   *
+   * @since 0.8.0
    */
   def train(
       input: RDD[LabeledPoint],