change transform to filter

apache · hhbyyh · Mar 1, 2017 · Mar 2, 2017 · Mar 11, 2017 · Mar 10, 2017
commit ca12877c7f7e224268e145c3e8c4c37413596d66
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/FPGrowthExample.scala
@@ -34,15 +34,13 @@ import org.apache.spark.sql.SparkSession
 object FPGrowthExample {
 
   def main(args: Array[String]): Unit = {
-
     val spark = SparkSession
       .builder
       .appName(s"${this.getClass.getSimpleName}")
       .getOrCreate()
     import spark.implicits._
 
     // $example on$
-    // Loads data.
     val dataset = spark.createDataset(Seq(
       "1 2 5",
       "1 2 3 5",
@@ -53,16 +51,15 @@ object FPGrowthExample {
     val fpgrowth = new FPGrowth().setMinSupport(0.5).setMinConfidence(0.6)
     val model = fpgrowth.fit(dataset)
 
-    // get frequent itemsets.
+    // Display frequent itemsets.
     model.freqItemsets.show()
 
-    // get generated association rules.
+    // Display generated association rules.
     model.associationRules.show()
 
     // transform examines the input items against all the association rules and summarize the
     // consequents as prediction
     model.transform(dataset).show()
-
     // $example off$
 
     spark.stop()

diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -240,13 +240,8 @@ class FPGrowthModel private[ml] (
     val predictUDF = udf((items: Seq[_]) => {
       if (items != null) {
         val itemset = items.toSet
-        brRules.value.flatMap { rule =>
-          if (rule._1.forall(item => itemset.contains(item))) {
-            rule._2.filter(item => !itemset.contains(item))
-          } else {
-            Seq.empty
-          }
-        }
+        brRules.value.filter(_._1.forall(itemset.contains))
+          .flatMap(_._2.filter(!itemset.contains(_)))
       } else {
         Seq.empty
       }.distinct }, dt)