Skip to content

Commit d703372

Browse files
committed
[SPARK-10234] [MLLIB] update since version in mllib.clustering
Same as #8421 but for `mllib.clustering`. cc feynmanliang yu-iskw Author: Xiangrui Meng <[email protected]> Closes #8435 from mengxr/SPARK-10234.
1 parent c3a5484 commit d703372

File tree

7 files changed

+44
-23
lines changed

7 files changed

+44
-23
lines changed

mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixture.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,7 @@ import org.apache.spark.util.Utils
5353
* @param maxIterations The maximum number of iterations to perform
5454
*/
5555
@Experimental
56+
@Since("1.3.0")
5657
class GaussianMixture private (
5758
private var k: Int,
5859
private var convergenceTol: Double,

mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,9 @@ import org.apache.spark.sql.{SQLContext, Row}
4646
*/
4747
@Since("1.3.0")
4848
@Experimental
49-
class GaussianMixtureModel(
50-
val weights: Array[Double],
51-
val gaussians: Array[MultivariateGaussian]) extends Serializable with Saveable {
49+
class GaussianMixtureModel @Since("1.3.0") (
50+
@Since("1.3.0") val weights: Array[Double],
51+
@Since("1.3.0") val gaussians: Array[MultivariateGaussian]) extends Serializable with Saveable {
5252

5353
require(weights.length == gaussians.length, "Length of weight and Gaussian arrays must match")
5454

@@ -178,7 +178,7 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
178178
(weight, new MultivariateGaussian(mu, sigma))
179179
}.unzip
180180

181-
return new GaussianMixtureModel(weights.toArray, gaussians.toArray)
181+
new GaussianMixtureModel(weights.toArray, gaussians.toArray)
182182
}
183183
}
184184

mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeans.scala

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ import org.apache.spark.util.random.XORShiftRandom
3737
* This is an iterative algorithm that will make multiple passes over the data, so any RDDs given
3838
* to it should be cached by the user.
3939
*/
40+
@Since("0.8.0")
4041
class KMeans private (
4142
private var k: Int,
4243
private var maxIterations: Int,

mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ import org.apache.spark.sql.Row
3737
* A clustering model for K-means. Each point belongs to the cluster with the closest center.
3838
*/
3939
@Since("0.8.0")
40-
class KMeansModel (
41-
val clusterCenters: Array[Vector]) extends Saveable with Serializable with PMMLExportable {
40+
class KMeansModel @Since("1.1.0") (@Since("1.0.0") val clusterCenters: Array[Vector])
41+
extends Saveable with Serializable with PMMLExportable {
4242

4343
/**
4444
* A Java-friendly constructor that takes an Iterable of Vectors.

mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -43,12 +43,15 @@ import org.apache.spark.util.BoundedPriorityQueue
4343
* including local and distributed data structures.
4444
*/
4545
@Experimental
46+
@Since("1.3.0")
4647
abstract class LDAModel private[clustering] extends Saveable {
4748

4849
/** Number of topics */
50+
@Since("1.3.0")
4951
def k: Int
5052

5153
/** Vocabulary size (number of terms or terms in the vocabulary) */
54+
@Since("1.3.0")
5255
def vocabSize: Int
5356

5457
/**
@@ -57,6 +60,7 @@ abstract class LDAModel private[clustering] extends Saveable {
5760
*
5861
* This is the parameter to a Dirichlet distribution.
5962
*/
63+
@Since("1.5.0")
6064
def docConcentration: Vector
6165

6266
/**
@@ -68,6 +72,7 @@ abstract class LDAModel private[clustering] extends Saveable {
6872
* Note: The topics' distributions over terms are called "beta" in the original LDA paper
6973
* by Blei et al., but are called "phi" in many later papers such as Asuncion et al., 2009.
7074
*/
75+
@Since("1.5.0")
7176
def topicConcentration: Double
7277

7378
/**
@@ -81,6 +86,7 @@ abstract class LDAModel private[clustering] extends Saveable {
8186
* This is a matrix of size vocabSize x k, where each column is a topic.
8287
* No guarantees are given about the ordering of the topics.
8388
*/
89+
@Since("1.3.0")
8490
def topicsMatrix: Matrix
8591

8692
/**
@@ -91,6 +97,7 @@ abstract class LDAModel private[clustering] extends Saveable {
9197
* (term indices, term weights in topic).
9298
* Each topic's terms are sorted in order of decreasing weight.
9399
*/
100+
@Since("1.3.0")
94101
def describeTopics(maxTermsPerTopic: Int): Array[(Array[Int], Array[Double])]
95102

96103
/**
@@ -102,6 +109,7 @@ abstract class LDAModel private[clustering] extends Saveable {
102109
* (term indices, term weights in topic).
103110
* Each topic's terms are sorted in order of decreasing weight.
104111
*/
112+
@Since("1.3.0")
105113
def describeTopics(): Array[(Array[Int], Array[Double])] = describeTopics(vocabSize)
106114

107115
/* TODO (once LDA can be trained with Strings or given a dictionary)
@@ -185,10 +193,11 @@ abstract class LDAModel private[clustering] extends Saveable {
185193
* @param topics Inferred topics (vocabSize x k matrix).
186194
*/
187195
@Experimental
196+
@Since("1.3.0")
188197
class LocalLDAModel private[clustering] (
189-
val topics: Matrix,
190-
override val docConcentration: Vector,
191-
override val topicConcentration: Double,
198+
@Since("1.3.0") val topics: Matrix,
199+
@Since("1.5.0") override val docConcentration: Vector,
200+
@Since("1.5.0") override val topicConcentration: Double,
192201
override protected[clustering] val gammaShape: Double = 100)
193202
extends LDAModel with Serializable {
194203

@@ -376,6 +385,7 @@ class LocalLDAModel private[clustering] (
376385
}
377386

378387
@Experimental
388+
@Since("1.5.0")
379389
object LocalLDAModel extends Loader[LocalLDAModel] {
380390

381391
private object SaveLoadV1_0 {
@@ -479,13 +489,14 @@ object LocalLDAModel extends Loader[LocalLDAModel] {
479489
* than the [[LocalLDAModel]].
480490
*/
481491
@Experimental
492+
@Since("1.3.0")
482493
class DistributedLDAModel private[clustering] (
483494
private[clustering] val graph: Graph[LDA.TopicCounts, LDA.TokenCount],
484495
private[clustering] val globalTopicTotals: LDA.TopicCounts,
485-
val k: Int,
486-
val vocabSize: Int,
487-
override val docConcentration: Vector,
488-
override val topicConcentration: Double,
496+
@Since("1.3.0") val k: Int,
497+
@Since("1.3.0") val vocabSize: Int,
498+
@Since("1.5.0") override val docConcentration: Vector,
499+
@Since("1.5.0") override val topicConcentration: Double,
489500
private[spark] val iterationTimes: Array[Double],
490501
override protected[clustering] val gammaShape: Double = 100)
491502
extends LDAModel {
@@ -603,6 +614,7 @@ class DistributedLDAModel private[clustering] (
603614
* (term indices, topic indices). Note that terms will be omitted if not present in
604615
* the document.
605616
*/
617+
@Since("1.5.0")
606618
lazy val topicAssignments: RDD[(Long, Array[Int], Array[Int])] = {
607619
// For reference, compare the below code with the core part of EMLDAOptimizer.next().
608620
val eta = topicConcentration
@@ -634,6 +646,7 @@ class DistributedLDAModel private[clustering] (
634646
}
635647

636648
/** Java-friendly version of [[topicAssignments]] */
649+
@Since("1.5.0")
637650
lazy val javaTopicAssignments: JavaRDD[(java.lang.Long, Array[Int], Array[Int])] = {
638651
topicAssignments.asInstanceOf[RDD[(java.lang.Long, Array[Int], Array[Int])]].toJavaRDD()
639652
}
@@ -770,6 +783,7 @@ class DistributedLDAModel private[clustering] (
770783

771784

772785
@Experimental
786+
@Since("1.5.0")
773787
object DistributedLDAModel extends Loader[DistributedLDAModel] {
774788

775789
private object SaveLoadV1_0 {

mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,10 @@ import org.apache.spark.{Logging, SparkContext, SparkException}
4242
*/
4343
@Since("1.3.0")
4444
@Experimental
45-
class PowerIterationClusteringModel(
46-
val k: Int,
47-
val assignments: RDD[PowerIterationClustering.Assignment]) extends Saveable with Serializable {
45+
class PowerIterationClusteringModel @Since("1.3.0") (
46+
@Since("1.3.0") val k: Int,
47+
@Since("1.3.0") val assignments: RDD[PowerIterationClustering.Assignment])
48+
extends Saveable with Serializable {
4849

4950
@Since("1.4.0")
5051
override def save(sc: SparkContext, path: String): Unit = {
@@ -56,6 +57,8 @@ class PowerIterationClusteringModel(
5657

5758
@Since("1.4.0")
5859
object PowerIterationClusteringModel extends Loader[PowerIterationClusteringModel] {
60+
61+
@Since("1.4.0")
5962
override def load(sc: SparkContext, path: String): PowerIterationClusteringModel = {
6063
PowerIterationClusteringModel.SaveLoadV1_0.load(sc, path)
6164
}
@@ -120,6 +123,7 @@ object PowerIterationClusteringModel extends Loader[PowerIterationClusteringMode
120123
* @see [[http://en.wikipedia.org/wiki/Spectral_clustering Spectral clustering (Wikipedia)]]
121124
*/
122125
@Experimental
126+
@Since("1.3.0")
123127
class PowerIterationClustering private[clustering] (
124128
private var k: Int,
125129
private var maxIterations: Int,

mllib/src/main/scala/org/apache/spark/mllib/clustering/StreamingKMeans.scala

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,10 @@ import org.apache.spark.util.random.XORShiftRandom
6666
*/
6767
@Since("1.2.0")
6868
@Experimental
69-
class StreamingKMeansModel(
70-
override val clusterCenters: Array[Vector],
71-
val clusterWeights: Array[Double]) extends KMeansModel(clusterCenters) with Logging {
69+
class StreamingKMeansModel @Since("1.2.0") (
70+
@Since("1.2.0") override val clusterCenters: Array[Vector],
71+
@Since("1.2.0") val clusterWeights: Array[Double])
72+
extends KMeansModel(clusterCenters) with Logging {
7273

7374
/**
7475
* Perform a k-means update on a batch of data.
@@ -168,10 +169,10 @@ class StreamingKMeansModel(
168169
*/
169170
@Since("1.2.0")
170171
@Experimental
171-
class StreamingKMeans(
172-
var k: Int,
173-
var decayFactor: Double,
174-
var timeUnit: String) extends Logging with Serializable {
172+
class StreamingKMeans @Since("1.2.0") (
173+
@Since("1.2.0") var k: Int,
174+
@Since("1.2.0") var decayFactor: Double,
175+
@Since("1.2.0") var timeUnit: String) extends Logging with Serializable {
175176

176177
@Since("1.2.0")
177178
def this() = this(2, 1.0, StreamingKMeans.BATCHES)

0 commit comments

Comments
 (0)