Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,24 @@ import org.apache.spark.rdd.RDD
*
* Generates association rules from a [[RDD[FreqItemset[Item]]]. This method only generates
* association rules which have a single item as the consequent.
*
* @since 1.5.0
*/
@Experimental
class AssociationRules private[fpm] (
private var minConfidence: Double) extends Logging with Serializable {

/**
* Constructs a default instance with default parameters {minConfidence = 0.8}.
*
* @since 1.5.0
*/
def this() = this(0.8)

/**
* Sets the minimal confidence (default: `0.8`).
*
* @since 1.5.0
*/
def setMinConfidence(minConfidence: Double): this.type = {
require(minConfidence >= 0.0 && minConfidence <= 1.0)
Expand All @@ -54,6 +60,8 @@ class AssociationRules private[fpm] (
* Computes the association rules with confidence above [[minConfidence]].
* @param freqItemsets frequent itemset model obtained from [[FPGrowth]]
* @return a [[Set[Rule[Item]]] containing the assocation rules.
*
* @since 1.5.0
*/
def run[Item: ClassTag](freqItemsets: RDD[FreqItemset[Item]]): RDD[Rule[Item]] = {
// For candidate rule X => Y, generate (X, (Y, freq(X union Y)))
Expand Down Expand Up @@ -90,6 +98,8 @@ object AssociationRules {
* @param antecedent hypotheses of the rule
* @param consequent conclusion of the rule
* @tparam Item item type
*
* @since 1.5.0
*/
@Experimental
class Rule[Item] private[fpm] (
Expand Down
19 changes: 19 additions & 0 deletions mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala
Original file line number Diff line number Diff line change
Expand Up @@ -38,12 +38,15 @@ import org.apache.spark.storage.StorageLevel
* Model trained by [[FPGrowth]], which holds frequent itemsets.
* @param freqItemsets frequent itemset, which is an RDD of [[FreqItemset]]
* @tparam Item item type
*
* @since 1.3.0
*/
@Experimental
class FPGrowthModel[Item: ClassTag](val freqItemsets: RDD[FreqItemset[Item]]) extends Serializable {
/**
* Generates association rules for the [[Item]]s in [[freqItemsets]].
* @param confidence minimal confidence of the rules produced
* @since 1.5.0
*/
def generateAssociationRules(confidence: Double): RDD[AssociationRules.Rule[Item]] = {
val associationRules = new AssociationRules(confidence)
Expand All @@ -67,6 +70,8 @@ class FPGrowthModel[Item: ClassTag](val freqItemsets: RDD[FreqItemset[Item]]) ex
*
* @see [[http://en.wikipedia.org/wiki/Association_rule_learning Association rule learning
* (Wikipedia)]]
*
* @since 1.3.0
*/
@Experimental
class FPGrowth private (
Expand All @@ -76,11 +81,15 @@ class FPGrowth private (
/**
* Constructs a default instance with default parameters {minSupport: `0.3`, numPartitions: same
* as the input data}.
*
* @since 1.3.0
*/
def this() = this(0.3, -1)

/**
* Sets the minimal support level (default: `0.3`).
*
* @since 1.3.0
*/
def setMinSupport(minSupport: Double): this.type = {
this.minSupport = minSupport
Expand All @@ -89,6 +98,8 @@ class FPGrowth private (

/**
* Sets the number of partitions used by parallel FP-growth (default: same as input data).
*
* @since 1.3.0
*/
def setNumPartitions(numPartitions: Int): this.type = {
this.numPartitions = numPartitions
Expand All @@ -99,6 +110,8 @@ class FPGrowth private (
* Computes an FP-Growth model that contains frequent itemsets.
* @param data input data set, each element contains a transaction
* @return an [[FPGrowthModel]]
*
* @since 1.3.0
*/
def run[Item: ClassTag](data: RDD[Array[Item]]): FPGrowthModel[Item] = {
if (data.getStorageLevel == StorageLevel.NONE) {
Expand Down Expand Up @@ -199,6 +212,8 @@ class FPGrowth private (

/**
* :: Experimental ::
*
* @since 1.3.0
*/
@Experimental
object FPGrowth {
Expand All @@ -208,11 +223,15 @@ object FPGrowth {
* @param items items in this itemset. Java users should call [[FreqItemset#javaItems]] instead.
* @param freq frequency
* @tparam Item item type
*
* @since 1.3.0
*/
class FreqItemset[Item](val items: Array[Item], val freq: Long) extends Serializable {

/**
* Returns items in a Java List.
*
* @since 1.3.0
*/
def javaItems: java.util.List[Item] = {
items.toList.asJava
Expand Down