Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ object GraphGenerators extends Logging {
* A random graph generator using the R-MAT model, proposed in
* "R-MAT: A Recursive Model for Graph Mining" by Chakrabarti et al.
*
* See [[http://www.cs.cmu.edu/~christos/PUBLICATIONS/siam04.pdf]].
* See http://www.cs.cmu.edu/~christos/PUBLICATIONS/siam04.pdf.
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was valid scaladoc but doesn't convert to valid javadoc; it's a genjavadoc limitation

*/
def rmatGraph(sc: SparkContext, requestedNumVertices: Int, numEdges: Int): Graph[Int, Int] = {
// let N = requestedNumVertices
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ sealed trait Vector extends Serializable {

/**
* Returns a hash code value for the vector. The hash code is based on its size and its first 128
* nonzero entries, using a hash algorithm similar to [[java.util.Arrays.hashCode]].
* nonzero entries, using a hash algorithm similar to `java.util.Arrays.hashCode`.
*/
override def hashCode(): Int = {
// This is a reference implementation. It calls return in foreachActive, which is slow.
Expand Down
2 changes: 1 addition & 1 deletion mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ object Pipeline extends MLReadable[Pipeline] {
}
}

/** Methods for [[MLReader]] and [[MLWriter]] shared between [[Pipeline]] and [[PipelineModel]] */
/** Methods for `MLReader` and `MLWriter` shared between [[Pipeline]] and [[PipelineModel]] */
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Several of these instances are because for whatever reason visibility is wrong in the generated javadoc and so it fails. It seemed more worth it to zap the error than retain these links.

private[ml] object SharedReadWrite {

import org.json4s.JsonDSL._
Expand Down
2 changes: 1 addition & 1 deletion mllib/src/main/scala/org/apache/spark/ml/Predictor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType,
}

/**
* Transforms dataset by reading from [[featuresCol]], calling [[predict()]], and storing
* Transforms dataset by reading from [[featuresCol]], calling `predict`, and storing
* the predictions as a new column [[predictionCol]].
*
* @param dataset input dataset
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ private[spark] trait ClassifierParams
* Single-label binary or multiclass classification.
* Classes are indexed {0, 1, ..., numClasses - 1}.
*
* @tparam FeaturesType Type of input features. E.g., [[Vector]]
* @tparam FeaturesType Type of input features. E.g., `Vector`
* @tparam E Concrete Estimator type
* @tparam M Concrete Model type
*/
Expand Down Expand Up @@ -134,7 +134,7 @@ abstract class Classifier[
* Model produced by a [[Classifier]].
* Classes are indexed {0, 1, ..., numClasses - 1}.
*
* @tparam FeaturesType Type of input features. E.g., [[Vector]]
* @tparam FeaturesType Type of input features. E.g., `Vector`
* @tparam M Concrete Model type
*/
@DeveloperApi
Expand All @@ -151,7 +151,7 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur
* Transforms dataset by reading from [[featuresCol]], and appending new columns as specified by
* parameters:
* - predicted labels as [[predictionCol]] of type [[Double]]
* - raw predictions (confidences) as [[rawPredictionCol]] of type [[Vector]].
* - raw predictions (confidences) as [[rawPredictionCol]] of type `Vector`.
*
* @param dataset input dataset
* @return transformed dataset
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ import org.apache.spark.sql.Dataset


/**
* [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] learning algorithm
* Decision tree learning algorithm (http://en.wikipedia.org/wiki/Decision_tree_learning)
* for classification.
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
Expand Down Expand Up @@ -135,7 +135,7 @@ object DecisionTreeClassifier extends DefaultParamsReadable[DecisionTreeClassifi
}

/**
* [[http://en.wikipedia.org/wiki/Decision_tree_learning Decision tree]] model for classification.
* Decision tree model (http://en.wikipedia.org/wiki/Decision_tree_learning) for classification.
* It supports both binary and multiclass labels, as well as both continuous and categorical
* features.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.DoubleType

/**
* [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
* Gradient-Boosted Trees (GBTs) (http://en.wikipedia.org/wiki/Gradient_boosting)
* learning algorithm for classification.
* It supports binary labels, as well as both continuous and categorical features.
* Note: Multiclass labels are not currently supported.
Expand Down Expand Up @@ -158,7 +158,7 @@ object GBTClassifier extends DefaultParamsReadable[GBTClassifier] {
}

/**
* [[http://en.wikipedia.org/wiki/Gradient_boosting Gradient-Boosted Trees (GBTs)]]
* Gradient-Boosted Trees (GBTs) (http://en.wikipedia.org/wiki/Gradient_boosting)
* model for classification.
* It supports binary labels, as well as both continuous and categorical features.
* Note: Multiclass labels are not currently supported.
Expand Down Expand Up @@ -233,8 +233,8 @@ class GBTClassificationModel private[ml](
* The importance vector is normalized to sum to 1. This method is suggested by Hastie et al.
* (Hastie, Tibshirani, Friedman. "The Elements of Statistical Learning, 2nd Edition." 2001.)
* and follows the implementation from scikit-learn.
*
* @see [[DecisionTreeClassificationModel.featureImportances]]

* See `DecisionTreeClassificationModel.featureImportances`
*/
@Since("2.0.0")
lazy val featureImportances: Vector = TreeEnsembleModel.featureImportances(trees, numFeatures)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -858,18 +858,18 @@ class BinaryLogisticRegressionSummary private[classification] (
* Returns the receiver operating characteristic (ROC) curve,
* which is a Dataframe having two fields (FPR, TPR)
* with (0.0, 0.0) prepended and (1.0, 1.0) appended to it.
* See http://en.wikipedia.org/wiki/Receiver_operating_characteristic
*
* Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
* Note: This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
* This will change in later Spark versions.
* @see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
*/
@Since("1.5.0")
@transient lazy val roc: DataFrame = binaryMetrics.roc().toDF("FPR", "TPR")

/**
* Computes the area under the receiver operating characteristic (ROC) curve.
*
* Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
* Note: This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
* This will change in later Spark versions.
*/
@Since("1.5.0")
Expand All @@ -879,7 +879,7 @@ class BinaryLogisticRegressionSummary private[classification] (
* Returns the precision-recall curve, which is a Dataframe containing
* two fields recall, precision with (0.0, 1.0) prepended to it.
*
* Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
* Note: This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
* This will change in later Spark versions.
*/
@Since("1.5.0")
Expand All @@ -888,7 +888,7 @@ class BinaryLogisticRegressionSummary private[classification] (
/**
* Returns a dataframe with two fields (threshold, F-Measure) curve with beta = 1.0.
*
* Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
* Note: This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
* This will change in later Spark versions.
*/
@Since("1.5.0")
Expand All @@ -901,7 +901,7 @@ class BinaryLogisticRegressionSummary private[classification] (
* Every possible probability obtained in transforming the dataset are used
* as thresholds used in calculating the precision.
*
* Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
* Note: This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
* This will change in later Spark versions.
*/
@Since("1.5.0")
Expand All @@ -914,7 +914,7 @@ class BinaryLogisticRegressionSummary private[classification] (
* Every possible probability obtained in transforming the dataset are used
* as thresholds used in calculating the recall.
*
* Note: This ignores instance weights (setting all to 1.0) from [[LogisticRegression.weightCol]].
* Note: This ignores instance weights (setting all to 1.0) from `LogisticRegression.weightCol`.
* This will change in later Spark versions.
*/
@Since("1.5.0")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ private[classification] trait ProbabilisticClassifierParams
*
* Single-label binary or multiclass classifier which can output class conditional probabilities.
*
* @tparam FeaturesType Type of input features. E.g., [[Vector]]
* @tparam FeaturesType Type of input features. E.g., `Vector`
* @tparam E Concrete Estimator type
* @tparam M Concrete Model type
*/
Expand All @@ -70,7 +70,7 @@ abstract class ProbabilisticClassifier[
* Model produced by a [[ProbabilisticClassifier]].
* Classes are indexed {0, 1, ..., numClasses - 1}.
*
* @tparam FeaturesType Type of input features. E.g., [[Vector]]
* @tparam FeaturesType Type of input features. E.g., `Vector`
* @tparam M Concrete Model type
*/
@DeveloperApi
Expand All @@ -89,8 +89,8 @@ abstract class ProbabilisticClassificationModel[
* Transforms dataset by reading from [[featuresCol]], and appending new columns as specified by
* parameters:
* - predicted labels as [[predictionCol]] of type [[Double]]
* - raw predictions (confidences) as [[rawPredictionCol]] of type [[Vector]]
* - probability of each class as [[probabilityCol]] of type [[Vector]].
* - raw predictions (confidences) as [[rawPredictionCol]] of type `Vector`
* - probability of each class as [[probabilityCol]] of type `Vector`.
*
* @param dataset input dataset
* @return transformed dataset
Expand Down Expand Up @@ -210,7 +210,7 @@ private[ml] object ProbabilisticClassificationModel {
/**
* Normalize a vector of raw predictions to be a multinomial probability vector, in place.
*
* The input raw predictions should be >= 0.
* The input raw predictions should be nonnegative.
* The output vector sums to 1, unless the input vector is all-0 (in which case the output is
* all-0 too).
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ abstract class Evaluator extends Params {
def evaluate(dataset: Dataset[_]): Double

/**
* Indicates whether the metric returned by [[evaluate()]] should be maximized (true, default)
* Indicates whether the metric returned by `evaluate` should be maximized (true, default)
* or minimized (false).
* A given evaluator may support multiple metrics which may be maximized or minimized.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ private[feature] trait ChiSqSelectorParams extends Params

/**
* Number of features that selector will select (ordered by statistic value descending). If the
* number of features is < numTopFeatures, then this will select all features. The default value
* of numTopFeatures is 50.
* number of features is less than numTopFeatures, then this will select all features.
* The default value of numTopFeatures is 50.
* @group param
*/
final val numTopFeatures = new IntParam(this, "numTopFeatures",
Expand Down
13 changes: 6 additions & 7 deletions mllib/src/main/scala/org/apache/spark/ml/param/params.scala
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ trait Params extends Identifiable with Serializable {
*
* This only needs to check for interactions between parameters.
* Parameter value checks which do not depend on other parameters are handled by
* [[Param.validate()]]. This method does not handle input/output column parameters;
* `Param.validate()`. This method does not handle input/output column parameters;
* those are checked during schema validation.
* @deprecated Will be removed in 2.1.0. All the checks should be merged into transformSchema
*/
Expand Down Expand Up @@ -580,8 +580,7 @@ trait Params extends Identifiable with Serializable {
}

/**
* Explains all params of this instance.
* @see [[explainParam()]]
* Explains all params of this instance. See `explainParam()`.
*/
def explainParams(): String = {
params.map(explainParam).mkString("\n")
Expand Down Expand Up @@ -678,7 +677,7 @@ trait Params extends Identifiable with Serializable {
/**
* Sets default values for a list of params.
*
* Note: Java developers should use the single-parameter [[setDefault()]].
* Note: Java developers should use the single-parameter `setDefault`.
* Annotating this with varargs can cause compilation failures due to a Scala compiler bug.
* See SPARK-9268.
*
Expand Down Expand Up @@ -712,8 +711,7 @@ trait Params extends Identifiable with Serializable {
/**
* Creates a copy of this instance with the same UID and some extra params.
* Subclasses should implement this method and set the return type properly.
*
* @see [[defaultCopy()]]
* See `defaultCopy()`.
*/
def copy(extra: ParamMap): Params

Expand All @@ -730,7 +728,8 @@ trait Params extends Identifiable with Serializable {
/**
* Extracts the embedded default param values and user-supplied values, and then merges them with
* extra values from input into a flat param map, where the latter value is used if there exist
* conflicts, i.e., with ordering: default param values < user-supplied values < extra.
* conflicts, i.e., with ordering:
* default param values less than user-supplied values less than extra.
*/
final def extractParamMap(extra: ParamMap): ParamMap = {
defaultParamMap ++ paramMap ++ extra
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter w
with HasPredictionCol with HasCheckpointInterval with HasSeed {

/**
* Param for rank of the matrix factorization (>= 1).
* Param for rank of the matrix factorization (positive).
* Default: 10
* @group param
*/
Expand All @@ -109,7 +109,7 @@ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter w
def getRank: Int = $(rank)

/**
* Param for number of user blocks (>= 1).
* Param for number of user blocks (positive).
* Default: 10
* @group param
*/
Expand All @@ -120,7 +120,7 @@ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter w
def getNumUserBlocks: Int = $(numUserBlocks)

/**
* Param for number of item blocks (>= 1).
* Param for number of item blocks (positive).
* Default: 10
* @group param
*/
Expand All @@ -141,7 +141,7 @@ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter w
def getImplicitPrefs: Boolean = $(implicitPrefs)

/**
* Param for the alpha parameter in the implicit preference formulation (>= 0).
* Param for the alpha parameter in the implicit preference formulation (nonnegative).
* Default: 1.0
* @group param
*/
Expand Down Expand Up @@ -174,7 +174,7 @@ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter w

/**
* Param for StorageLevel for intermediate datasets. Pass in a string representation of
* [[StorageLevel]]. Cannot be "NONE".
* `StorageLevel`. Cannot be "NONE".
* Default: "MEMORY_AND_DISK".
*
* @group expertParam
Expand All @@ -188,7 +188,7 @@ private[recommendation] trait ALSParams extends ALSModelParams with HasMaxIter w

/**
* Param for StorageLevel for ALS model factors. Pass in a string representation of
* [[StorageLevel]].
* `StorageLevel`.
* Default: "MEMORY_AND_DISK".
*
* @group expertParam
Expand Down Expand Up @@ -351,11 +351,11 @@ object ALSModel extends MLReadable[ALSModel] {
*
* For implicit preference data, the algorithm used is based on
* "Collaborative Filtering for Implicit Feedback Datasets", available at
* [[http://dx.doi.org/10.1109/ICDM.2008.22]], adapted for the blocked approach used here.
* http://dx.doi.org/10.1109/ICDM.2008.22, adapted for the blocked approach used here.
*
* Essentially instead of finding the low-rank approximations to the rating matrix `R`,
* this finds the approximations for a preference matrix `P` where the elements of `P` are 1 if
* r > 0 and 0 if r <= 0. The ratings then act as 'confidence' values related to strength of
* r &gt; 0 and 0 if r &lt;= 0. The ratings then act as 'confidence' values related to strength of
* indicated user
* preferences rather than explicit ratings given to items.
*/
Expand Down
4 changes: 2 additions & 2 deletions mllib/src/main/scala/org/apache/spark/ml/tree/Node.scala
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ class LeafNode private[ml] (
* Internal Decision Tree node.
* @param prediction Prediction this node would make if it were a leaf node
* @param impurity Impurity measure at this node (for training data)
* @param gain Information gain value.
* Values < 0 indicate missing values; this quirk will be removed with future updates.
* @param gain Information gain value. Values less than 0 indicate missing values;
* this quirk will be removed with future updates.
* @param leftChild Left-hand child node
* @param rightChild Right-hand child node
* @param split Information about the test used to split to the left or right child.
Expand Down
4 changes: 2 additions & 2 deletions mllib/src/main/scala/org/apache/spark/ml/tree/Split.scala
Original file line number Diff line number Diff line change
Expand Up @@ -151,8 +151,8 @@ class CategoricalSplit private[ml] (
/**
* Split which tests a continuous feature.
* @param featureIndex Index of the feature to test
* @param threshold If the feature value is <= this threshold, then the split goes left.
* Otherwise, it goes right.
* @param threshold If the feature value is less than or equal to this threshold, then the
* split goes left. Otherwise, it goes right.
*/
class ContinuousSplit private[ml] (override val featureIndex: Int, val threshold: Double)
extends Split {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,12 +415,12 @@ private[ml] object EnsembleModelReadWrite {
/**
* Helper method for loading a tree ensemble from disk.
* This reconstructs all trees, returning the root nodes.
* @param path Path given to [[saveImpl()]]
* @param path Path given to `saveImpl`
* @param className Class name for ensemble model type
* @param treeClassName Class name for tree model type in the ensemble
* @return (ensemble metadata, array over trees of (tree metadata, root node)),
* where the root node is linked with all descendents
* @see [[saveImpl()]] for how the model was saved
* @see `saveImpl` for how the model was saved
*/
def loadImpl(
path: String,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,8 +173,8 @@ object ChiSqSelectorModel extends Loader[ChiSqSelectorModel] {
* Creates a ChiSquared feature selector.
* @param numTopFeatures number of features that selector will select
* (ordered by statistic value descending)
* Note that if the number of features is < numTopFeatures, then this will
* select all features.
* Note that if the number of features is less than numTopFeatures,
* then this will select all features.
*/
@Since("1.3.0")
class ChiSqSelector @Since("1.3.0") (
Expand Down
Loading