fix python and R test

apache · hhbyyh · Mar 11, 2017 · Mar 11, 2017 · Mar 11, 2017 · Mar 12, 2017
commit eb25f287393ccffeb1a5905f7752298808945a2c
diff --git a/R/pkg/tests/fulltests/test_mllib_fpm.R b/R/pkg/tests/fulltests/test_mllib_fpm.R
@@ -44,7 +44,8 @@ test_that("spark.fpGrowth", {
   expected_association_rules <- data.frame(
     antecedent = I(list(list("2"), list("3"))),
     consequent = I(list(list("1"), list("1"))),
-    confidence = c(1, 1)
+    confidence = c(1, 1),
+    support = c(0.75, 0.5)
   )
 
   expect_equivalent(expected_association_rules, collect(spark.associationRules(model)))

diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py
@@ -186,29 +186,29 @@ class FPGrowth(JavaEstimator, HasItemsCol, HasPredictionCol,
     |[z]                     |
     |[x, z, y, r, q, t, p]   |
     +------------------------+
-    >>> fp = FPGrowth(minSupport=0.2, minConfidence=0.7)
+    >>> fp = FPGrowth(minSupport=0.4, minConfidence=0.7)
     >>> fpm = fp.fit(data)
     >>> fpm.freqItemsets.show(5)
-    +---------+----+
-    |    items|freq|
-    +---------+----+
-    |      [s]|   3|
-    |   [s, x]|   3|
-    |[s, x, z]|   2|
-    |   [s, z]|   2|
-    |      [r]|   3|
-    +---------+----+
+    +------+----+
+    | items|freq|
+    +------+----+
+    |   [s]|   3|
+    |[s, x]|   3|
+    |   [r]|   3|
+    |   [y]|   3|
+    |[y, x]|   3|
+    +------+----+
     only showing top 5 rows
     >>> fpm.associationRules.show(5)
-    +----------+----------+----------+
-    |antecedent|consequent|confidence|
-    +----------+----------+----------+
-    |    [t, s]|       [y]|       1.0|
-    |    [t, s]|       [x]|       1.0|
-    |    [t, s]|       [z]|       1.0|
-    |       [p]|       [r]|       1.0|
-    |       [p]|       [z]|       1.0|
-    +----------+----------+----------+
+    +----------+----------+----------+-------+
+    |antecedent|consequent|confidence|support|
+    +----------+----------+----------+-------+
+    |       [t]|       [y]|       1.0|    0.5|
+    |       [t]|       [x]|       1.0|    0.5|
+    |       [t]|       [z]|       1.0|    0.5|
+    | [y, t, x]|       [z]|       1.0|    0.5|
+    |       [x]|       [s]|      0.75|    0.5|
+    +----------+----------+----------+-------+
     only showing top 5 rows
     >>> new_data = spark.createDataFrame([(["t", "s"], )], ["items"])
     >>> sorted(fpm.transform(new_data).first().prediction)

diff --git a/python/pyspark/ml/tests.py b/python/pyspark/ml/tests.py
@@ -1283,8 +1283,8 @@ def test_association_rules(self):
         fpm = fp.fit(self.data)
 
         expected_association_rules = self.spark.createDataFrame(
-            [([3], [1], 1.0), ([2], [1], 1.0)],
-            ["antecedent", "consequent", "confidence"]
+            [([3], [1], 1.0, 0.5), ([2], [1], 1.0, 0.75)],
+            ["antecedent", "consequent", "confidence", "support"]
         )
         actual_association_rules = fpm.associationRules