update docs in scala, python, R

apache · mgaido91 · Aug 26, 2018 · Aug 26, 2018 · Aug 27, 2018 · Aug 27, 2018
commit 2407e05d4bfbbe71359cf5a57a856ab5514998cb
diff --git a/R/pkg/R/mllib_fpm.R b/R/pkg/R/mllib_fpm.R
@@ -116,10 +116,11 @@ setMethod("spark.freqItemsets", signature(object = "FPGrowthModel"),
 # Get association rules.
 
 #' @return A \code{SparkDataFrame} with association rules.
-#'         The \code{SparkDataFrame} contains three columns:
+#'         The \code{SparkDataFrame} contains four columns:
 #'         \code{antecedent} (an array of the same type as the input column),
 #'         \code{consequent} (an array of the same type as the input column),
-#'         and \code{condfidence} (confidence).
+#'         \code{condfidence} (confidence for the rule)
+#'         and \code{lift} (lift for the rule)
 #' @rdname spark.fpGrowth
 #' @aliases associationRules,FPGrowthModel-method
 #' @note spark.associationRules(FPGrowthModel) since 2.2.0

diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -248,9 +248,9 @@ class FPGrowthModel private[ml] (
   @transient private var _cachedRules: DataFrame = _
 
   /**
-   * Get association rules fitted using the minConfidence. Returns a dataframe
-   * with three fields, "antecedent", "consequent" and "confidence", where "antecedent" and
-   * "consequent" are Array[T] and "confidence" is Double.
+   * Get association rules fitted using the minConfidence. Returns a dataframe with four fields,
+   * "antecedent", "consequent", "confidence" and "lift", where "antecedent" and "consequent" are
+   * Array[T], whereas "confidence" and "lift" are Double.
    */
   @Since("2.2.0")
   @transient def associationRules: DataFrame = {
@@ -381,8 +381,8 @@ private[fpm] object AssociationRules {
    * @param freqCol column name for appearance count of the frequent itemsets
    * @param minConfidence minimum confidence for generating the association rules
    * @param itemSupport map containing an item and its support
-   * @return a DataFrame("antecedent"[Array], "consequent"[Array], "confidence"[Double])
-   *         containing the association rules.
+   * @return a DataFrame("antecedent"[Array], "consequent"[Array], "confidence"[Double],
+   *         "lift" [Double]) containing the association rules.
    */
   def getAssociationRulesFromFP[T: ClassTag](
         dataset: Dataset[_],

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala
@@ -135,6 +135,9 @@ object AssociationRules {
     @Since("1.5.0")
     def confidence: Double = freqUnion / freqAntecedent
 
+    /**
+     * Returns the lift of the rule.
+     */
     @Since("2.4.0")
     def lift: Option[Double] = freqConsequent.map(fCons => confidence / fCons)
 

diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
@@ -145,10 +145,11 @@ def freqItemsets(self):
     @since("2.2.0")
     def associationRules(self):
         """
-        DataFrame with three columns:
+        DataFrame with four columns:
         * `antecedent`  - Array of the same type as the input column.
         * `consequent`  - Array of the same type as the input column.
         * `confidence`  - Confidence for the rule (`DoubleType`).
+        * `lift`        - Lift for the rule (`DoubleType`).
         """
         return self._call_java("associationRules")