You are viewing a plain text version of this content. The canonical link for it is here.

Posted to user@mahout.apache.org by Sébastien Noir <in...@blackos.com> on 2011/11/21 10:49:37 UTC

Trouble understanding how to use the FP_Growth algorithm

Hi!

I'm currently trying to understand how to use the implementation of the FPGrowth algoritm (see : https://cwiki.apache.org/MAHOUT/parallel-frequent-pattern-mining.html).

Currently, I'm just trying it with stupid data, and scala code. The problem is that it output only single item itemset.
I probably missed something. Could you give me a hint?

By the way, the code below is scala (calling java implementation directly!). It that is a problem, I can translate it to java...

sample outuput :

freqList :Buffer((bier,15), (bread,12), (milk,11), (butter,6))
10:47:44,688 INFO  ~ Number of unique items 4
10:47:44,688 INFO  ~ Number of unique pruned items 4
10:47:44,688 INFO  ~ Number of Nodes in the FP Tree: 0
10:47:44,688 INFO  ~ Mining FTree Tree for all patterns with 3
updater : FPGrowth Algorithm for a given feature: 3
butter:[butter] : 6
10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 6
10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 2
updater : FPGrowth Algorithm for a given feature: 2
updater : FPGrowth Algorithm for a given feature: 3
milk:[milk] : 11
10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 11
10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 1
updater : FPGrowth Algorithm for a given feature: 1
updater : FPGrowth Algorithm for a given feature: 2
updater : FPGrowth Algorithm for a given feature: 3
bread:[bread] : 12
10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 12
10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 0
updater : FPGrowth Algorithm for a given feature: 0
updater : FPGrowth Algorithm for a given feature: 1
updater : FPGrowth Algorithm for a given feature: 2
updater : FPGrowth Algorithm for a given feature: 3
bier:[bier] : 15
10:47:44,691 INFO  ~ Found 1 Patterns with Least Support 15

code :


    import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth
    import java.util.HashSet
    import org.apache.mahout.common.iterator.StringRecordIterator
    import org.apache.mahout.common.iterator.FileLineIterable
    import org.apache.mahout.fpm.pfpgrowth.convertors._
    import org.apache.mahout.fpm.pfpgrowth.convertors.integer._
    import org.apache.mahout.fpm.pfpgrowth.convertors.string._
    import org.apache.hadoop.io.SequenceFile.Writer
    import org.apache.mahout.fpm.pfpgrowth.convertors.StatusUpdater
    import org.apache.hadoop.mapred.OutputCollector
    import scala.collection.JavaConversions._
    import java.util.{ List => JList }
    import org.apache.mahout.common.{ Pair => JPair }
    import java.lang.{ Long => JLong }
    import org.apache.hadoop.io.{ Text => JText }

    val minSupport = 1L
    val k: Int = 50
    val fps: FPGrowth[String] = new FPGrowth[String]()

    val milk = "milk"
    val bread = "bread"
    val butter = "butter"
    val bier = "bier"

    val transactionStream: Iterator[JPair[JList[String], JLong]] = Iterator(
      new JPair(List(milk, bread), 1L),
      new JPair(List(butter), 1L),
      new JPair(List(bier), 10L),
      new JPair(List(milk, bread, butter), 5L),
      new JPair(List(milk, bread, bier), 5L),
      new JPair(List(bread), 1L)
    )

    val frequencies: Collection[JPair[String, JLong]] = fps.generateFList(
      transactionStream, minSupport.toInt)

    println("freqList :" + frequencies)

    var returnableFeatures: Collection[String] = List(
      milk, bread, butter, bier)

    var output: OutputCollector[String, JList[JPair[JList[String], JLong]]] = (
      new OutputCollector[String, JList[JPair[JList[String], JLong]]] {
        def collect(x1: String,
                    x2: JList[JPair[JList[String], JLong]]) = {
          println(x1 + ":" +
            x2.map(pair => "[" + pair.getFirst.mkString(",") + "] : " +
              pair.getSecond).mkString("; "))
        }
      }
    )

    val updater: StatusUpdater = new StatusUpdater {
      def update(status: String) = println("updater : " + status)
    }

    fps.generateTopKFrequentPatterns(
      transactionStream,
      frequencies,
      minSupport,
      k,
      null, //returnableFeatures
      output,
      updater)

Re: Trouble understanding how to use the FP_Growth algorithm

Posted by 戴清灏 <ro...@gmail.com>.

 only single item itemset?
Maybe your dataset is too sparse or you may lower your support value.
(default is 50)

Regards,
Q

2011/11/21 Sébastien Noir <in...@blackos.com>

> Hi!
>
> I'm currently trying to understand how to use the implementation of the
> FPGrowth algoritm (see :
> https://cwiki.apache.org/MAHOUT/parallel-frequent-pattern-mining.html).
>
> Currently, I'm just trying it with stupid data, and scala code. The
> problem is that it output only single item itemset.
> I probably missed something. Could you give me a hint?
>
> By the way, the code below is scala (calling java implementation
> directly!). It that is a problem, I can translate it to java...
>
> sample outuput :
>
> freqList :Buffer((bier,15), (bread,12), (milk,11), (butter,6))
> 10:47:44,688 INFO  ~ Number of unique items 4
> 10:47:44,688 INFO  ~ Number of unique pruned items 4
> 10:47:44,688 INFO  ~ Number of Nodes in the FP Tree: 0
> 10:47:44,688 INFO  ~ Mining FTree Tree for all patterns with 3
> updater : FPGrowth Algorithm for a given feature: 3
> butter:[butter] : 6
> 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 6
> 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 2
> updater : FPGrowth Algorithm for a given feature: 2
> updater : FPGrowth Algorithm for a given feature: 3
> milk:[milk] : 11
> 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 11
> 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 1
> updater : FPGrowth Algorithm for a given feature: 1
> updater : FPGrowth Algorithm for a given feature: 2
> updater : FPGrowth Algorithm for a given feature: 3
> bread:[bread] : 12
> 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 12
> 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 0
> updater : FPGrowth Algorithm for a given feature: 0
> updater : FPGrowth Algorithm for a given feature: 1
> updater : FPGrowth Algorithm for a given feature: 2
> updater : FPGrowth Algorithm for a given feature: 3
> bier:[bier] : 15
> 10:47:44,691 INFO  ~ Found 1 Patterns with Least Support 15
>
> code :
>
>
>    import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth
>    import java.util.HashSet
>    import org.apache.mahout.common.iterator.StringRecordIterator
>    import org.apache.mahout.common.iterator.FileLineIterable
>    import org.apache.mahout.fpm.pfpgrowth.convertors._
>    import org.apache.mahout.fpm.pfpgrowth.convertors.integer._
>    import org.apache.mahout.fpm.pfpgrowth.convertors.string._
>    import org.apache.hadoop.io.SequenceFile.Writer
>    import org.apache.mahout.fpm.pfpgrowth.convertors.StatusUpdater
>    import org.apache.hadoop.mapred.OutputCollector
>    import scala.collection.JavaConversions._
>    import java.util.{ List => JList }
>    import org.apache.mahout.common.{ Pair => JPair }
>    import java.lang.{ Long => JLong }
>    import org.apache.hadoop.io.{ Text => JText }
>
>    val minSupport = 1L
>    val k: Int = 50
>    val fps: FPGrowth[String] = new FPGrowth[String]()
>
>    val milk = "milk"
>    val bread = "bread"
>    val butter = "butter"
>    val bier = "bier"
>
>    val transactionStream: Iterator[JPair[JList[String], JLong]] = Iterator(
>      new JPair(List(milk, bread), 1L),
>      new JPair(List(butter), 1L),
>      new JPair(List(bier), 10L),
>      new JPair(List(milk, bread, butter), 5L),
>      new JPair(List(milk, bread, bier), 5L),
>      new JPair(List(bread), 1L)
>    )
>
>    val frequencies: Collection[JPair[String, JLong]] = fps.generateFList(
>      transactionStream, minSupport.toInt)
>
>    println("freqList :" + frequencies)
>
>    var returnableFeatures: Collection[String] = List(
>      milk, bread, butter, bier)
>
>    var output: OutputCollector[String, JList[JPair[JList[String], JLong]]]
> = (
>      new OutputCollector[String, JList[JPair[JList[String], JLong]]] {
>        def collect(x1: String,
>                    x2: JList[JPair[JList[String], JLong]]) = {
>          println(x1 + ":" +
>            x2.map(pair => "[" + pair.getFirst.mkString(",") + "] : " +
>              pair.getSecond).mkString("; "))
>        }
>      }
>    )
>
>    val updater: StatusUpdater = new StatusUpdater {
>      def update(status: String) = println("updater : " + status)
>    }
>
>    fps.generateTopKFrequentPatterns(
>      transactionStream,
>      frequencies,
>      minSupport,
>      k,
>      null, //returnableFeatures
>      output,
>      updater)
>
>
>
>
>
>
>
>

Re: Trouble understanding how to use the FP_Growth algorithm

Posted by 戴清灏 <ro...@gmail.com>.

Now it's morning in China.
Morning! I have waken up.
You may try this way:
There is another sequential implementation of fp-growth by Borgelt.
Link is here: http://www.borgelt.net/fpgrowth.html
You may down load it. After compiling, you can try to run this on the same
dataset with the same arguments.
If you get a same result, then mahout may had been working well.
If not, try to checkout more of the log.

Regards,
Q


2011/11/22 Grant Ingersoll <gs...@apache.org>

> Could you try comparing your dataset when using the bin/mahout process and
> report back here?
>
> On Nov 21, 2011, at 4:49 AM, Sébastien Noir wrote:
>
> > Hi!
> >
> > I'm currently trying to understand how to use the implementation of the
> FPGrowth algoritm (see :
> https://cwiki.apache.org/MAHOUT/parallel-frequent-pattern-mining.html).
> >
> > Currently, I'm just trying it with stupid data, and scala code. The
> problem is that it output only single item itemset.
> > I probably missed something. Could you give me a hint?
> >
> > By the way, the code below is scala (calling java implementation
> directly!). It that is a problem, I can translate it to java...
> >
> > sample outuput :
> >
> > freqList :Buffer((bier,15), (bread,12), (milk,11), (butter,6))
> > 10:47:44,688 INFO  ~ Number of unique items 4
> > 10:47:44,688 INFO  ~ Number of unique pruned items 4
> > 10:47:44,688 INFO  ~ Number of Nodes in the FP Tree: 0
> > 10:47:44,688 INFO  ~ Mining FTree Tree for all patterns with 3
> > updater : FPGrowth Algorithm for a given feature: 3
> > butter:[butter] : 6
> > 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 6
> > 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 2
> > updater : FPGrowth Algorithm for a given feature: 2
> > updater : FPGrowth Algorithm for a given feature: 3
> > milk:[milk] : 11
> > 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 11
> > 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 1
> > updater : FPGrowth Algorithm for a given feature: 1
> > updater : FPGrowth Algorithm for a given feature: 2
> > updater : FPGrowth Algorithm for a given feature: 3
> > bread:[bread] : 12
> > 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 12
> > 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 0
> > updater : FPGrowth Algorithm for a given feature: 0
> > updater : FPGrowth Algorithm for a given feature: 1
> > updater : FPGrowth Algorithm for a given feature: 2
> > updater : FPGrowth Algorithm for a given feature: 3
> > bier:[bier] : 15
> > 10:47:44,691 INFO  ~ Found 1 Patterns with Least Support 15
> >
> > code :
> >
> >
> >    import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth
> >    import java.util.HashSet
> >    import org.apache.mahout.common.iterator.StringRecordIterator
> >    import org.apache.mahout.common.iterator.FileLineIterable
> >    import org.apache.mahout.fpm.pfpgrowth.convertors._
> >    import org.apache.mahout.fpm.pfpgrowth.convertors.integer._
> >    import org.apache.mahout.fpm.pfpgrowth.convertors.string._
> >    import org.apache.hadoop.io.SequenceFile.Writer
> >    import org.apache.mahout.fpm.pfpgrowth.convertors.StatusUpdater
> >    import org.apache.hadoop.mapred.OutputCollector
> >    import scala.collection.JavaConversions._
> >    import java.util.{ List => JList }
> >    import org.apache.mahout.common.{ Pair => JPair }
> >    import java.lang.{ Long => JLong }
> >    import org.apache.hadoop.io.{ Text => JText }
> >
> >    val minSupport = 1L
> >    val k: Int = 50
> >    val fps: FPGrowth[String] = new FPGrowth[String]()
> >
> >    val milk = "milk"
> >    val bread = "bread"
> >    val butter = "butter"
> >    val bier = "bier"
> >
> >    val transactionStream: Iterator[JPair[JList[String], JLong]] =
> Iterator(
> >      new JPair(List(milk, bread), 1L),
> >      new JPair(List(butter), 1L),
> >      new JPair(List(bier), 10L),
> >      new JPair(List(milk, bread, butter), 5L),
> >      new JPair(List(milk, bread, bier), 5L),
> >      new JPair(List(bread), 1L)
> >    )
> >
> >    val frequencies: Collection[JPair[String, JLong]] = fps.generateFList(
> >      transactionStream, minSupport.toInt)
> >
> >    println("freqList :" + frequencies)
> >
> >    var returnableFeatures: Collection[String] = List(
> >      milk, bread, butter, bier)
> >
> >    var output: OutputCollector[String, JList[JPair[JList[String],
> JLong]]] = (
> >      new OutputCollector[String, JList[JPair[JList[String], JLong]]] {
> >        def collect(x1: String,
> >                    x2: JList[JPair[JList[String], JLong]]) = {
> >          println(x1 + ":" +
> >            x2.map(pair => "[" + pair.getFirst.mkString(",") + "] : " +
> >              pair.getSecond).mkString("; "))
> >        }
> >      }
> >    )
> >
> >    val updater: StatusUpdater = new StatusUpdater {
> >      def update(status: String) = println("updater : " + status)
> >    }
> >
> >    fps.generateTopKFrequentPatterns(
> >      transactionStream,
> >      frequencies,
> >      minSupport,
> >      k,
> >      null, //returnableFeatures
> >      output,
> >      updater)
> >
> >
> >
> >
> >
> >
> >
>
> --------------------------------------------
> Grant Ingersoll
> http://www.lucidimagination.com
>
>
>
>

Re: Trouble understanding how to use the FP_Growth algorithm

Posted by Sébastien Noir <in...@blackos.com>.

Hi again!

The problem is solved. The code was buggy.
Details here : http://stackoverflow.com/questions/8215375/why-does-apache-mahout-frequent-pattern-minnig-algorithm-return-only-1-item-item

Greetings from switzerland,
Sébastien

> Hi again!
> 
> I tried the command line. The outuput is NOT the same.
> 
> Sample Data is (in file:) :
> 1	bier butter bread
> 2	bier bread
> 3	bier butter
> 4	bier milk bread butter
> 5	bread bier
> 6	bier milk butter
> 
> Sample session / output (logging removed):
> $ ./mahout fpg -i /Users/snoir/Desktop/SampleFPData.txt -o patterns -k 50 -method sequential -regex '[\ ]' -s 2
> 
> INFO: Dumping Patterns for Feature: milk 
> ([butter, milk],2)
> 
> INFO: Dumping Patterns for Feature: bread 
> ([bread],3), ([bread, butter],2)
> 
> INFO: Dumping Patterns for Feature: butter 
> ([butter],4), ([butter, milk],2), ([bread, butter],2)
> 
> To my understanding, the command line output is correct. The code version gives a bad result.
> 
> Comments welcome!
> 
> Best,
> Sébastien
> 
> 
> 
> On 21 nov. 2011, at 21:59, Grant Ingersoll wrote:
> 
>> Could you try comparing your dataset when using the bin/mahout process and report back here?
>> 
>> On Nov 21, 2011, at 4:49 AM, Sébastien Noir wrote:
>> 
>>> Hi!
>>> 
>>> I'm currently trying to understand how to use the implementation of the FPGrowth algoritm (see : https://cwiki.apache.org/MAHOUT/parallel-frequent-pattern-mining.html).
>>> 
>>> Currently, I'm just trying it with stupid data, and scala code. The problem is that it output only single item itemset.
>>> I probably missed something. Could you give me a hint?
>>> 
>>> By the way, the code below is scala (calling java implementation directly!). It that is a problem, I can translate it to java...
>>> 
>>> sample outuput :
>>> 
>>> freqList :Buffer((bier,15), (bread,12), (milk,11), (butter,6))
>>> 10:47:44,688 INFO  ~ Number of unique items 4
>>> 10:47:44,688 INFO  ~ Number of unique pruned items 4
>>> 10:47:44,688 INFO  ~ Number of Nodes in the FP Tree: 0
>>> 10:47:44,688 INFO  ~ Mining FTree Tree for all patterns with 3
>>> updater : FPGrowth Algorithm for a given feature: 3
>>> butter:[butter] : 6
>>> 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 6
>>> 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 2
>>> updater : FPGrowth Algorithm for a given feature: 2
>>> updater : FPGrowth Algorithm for a given feature: 3
>>> milk:[milk] : 11
>>> 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 11
>>> 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 1
>>> updater : FPGrowth Algorithm for a given feature: 1
>>> updater : FPGrowth Algorithm for a given feature: 2
>>> updater : FPGrowth Algorithm for a given feature: 3
>>> bread:[bread] : 12
>>> 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 12
>>> 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 0
>>> updater : FPGrowth Algorithm for a given feature: 0
>>> updater : FPGrowth Algorithm for a given feature: 1
>>> updater : FPGrowth Algorithm for a given feature: 2
>>> updater : FPGrowth Algorithm for a given feature: 3
>>> bier:[bier] : 15
>>> 10:47:44,691 INFO  ~ Found 1 Patterns with Least Support 15
>>> 
>>> code :
>>> 
>>> 
>>>  import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth
>>>  import java.util.HashSet
>>>  import org.apache.mahout.common.iterator.StringRecordIterator
>>>  import org.apache.mahout.common.iterator.FileLineIterable
>>>  import org.apache.mahout.fpm.pfpgrowth.convertors._
>>>  import org.apache.mahout.fpm.pfpgrowth.convertors.integer._
>>>  import org.apache.mahout.fpm.pfpgrowth.convertors.string._
>>>  import org.apache.hadoop.io.SequenceFile.Writer
>>>  import org.apache.mahout.fpm.pfpgrowth.convertors.StatusUpdater
>>>  import org.apache.hadoop.mapred.OutputCollector
>>>  import scala.collection.JavaConversions._
>>>  import java.util.{ List => JList }
>>>  import org.apache.mahout.common.{ Pair => JPair }
>>>  import java.lang.{ Long => JLong }
>>>  import org.apache.hadoop.io.{ Text => JText }
>>> 
>>>  val minSupport = 1L
>>>  val k: Int = 50
>>>  val fps: FPGrowth[String] = new FPGrowth[String]()
>>> 
>>>  val milk = "milk"
>>>  val bread = "bread"
>>>  val butter = "butter"
>>>  val bier = "bier"
>>> 
>>>  val transactionStream: Iterator[JPair[JList[String], JLong]] = Iterator(
>>>    new JPair(List(milk, bread), 1L),
>>>    new JPair(List(butter), 1L),
>>>    new JPair(List(bier), 10L),
>>>    new JPair(List(milk, bread, butter), 5L),
>>>    new JPair(List(milk, bread, bier), 5L),
>>>    new JPair(List(bread), 1L)
>>>  )
>>> 
>>>  val frequencies: Collection[JPair[String, JLong]] = fps.generateFList(
>>>    transactionStream, minSupport.toInt)
>>> 
>>>  println("freqList :" + frequencies)
>>> 
>>>  var returnableFeatures: Collection[String] = List(
>>>    milk, bread, butter, bier)
>>> 
>>>  var output: OutputCollector[String, JList[JPair[JList[String], JLong]]] = (
>>>    new OutputCollector[String, JList[JPair[JList[String], JLong]]] {
>>>      def collect(x1: String,
>>>                  x2: JList[JPair[JList[String], JLong]]) = {
>>>        println(x1 + ":" +
>>>          x2.map(pair => "[" + pair.getFirst.mkString(",") + "] : " +
>>>            pair.getSecond).mkString("; "))
>>>      }
>>>    }
>>>  )
>>> 
>>>  val updater: StatusUpdater = new StatusUpdater {
>>>    def update(status: String) = println("updater : " + status)
>>>  }
>>> 
>>>  fps.generateTopKFrequentPatterns(
>>>    transactionStream,
>>>    frequencies,
>>>    minSupport,
>>>    k,
>>>    null, //returnableFeatures
>>>    output,
>>>    updater)
>>> 
>>> 
>>> 	
>>> 
>>> 
>>> 
>>> 
>> 
>> --------------------------------------------
>> Grant Ingersoll
>> http://www.lucidimagination.com
>> 
>> 
>> 
>

Re: Trouble understanding how to use the FP_Growth algorithm

Posted by Sébastien Noir <in...@blackos.com>.

Hi again!

I tried the command line. The outuput is NOT the same.

Sample Data is (in file:) :
1	bier butter bread
2	bier bread
3	bier butter
4	bier milk bread butter
5	bread bier
6	bier milk butter

Sample session / output (logging removed):
$ ./mahout fpg -i /Users/snoir/Desktop/SampleFPData.txt -o patterns -k 50 -method sequential -regex '[\ ]' -s 2

INFO: Dumping Patterns for Feature: milk 
([butter, milk],2)

INFO: Dumping Patterns for Feature: bread 
([bread],3), ([bread, butter],2)

INFO: Dumping Patterns for Feature: butter 
([butter],4), ([butter, milk],2), ([bread, butter],2)

To my understanding, the command line output is correct. The code version gives a bad result.

Comments welcome!

Best,
Sébastien



On 21 nov. 2011, at 21:59, Grant Ingersoll wrote:

> Could you try comparing your dataset when using the bin/mahout process and report back here?
> 
> On Nov 21, 2011, at 4:49 AM, Sébastien Noir wrote:
> 
>> Hi!
>> 
>> I'm currently trying to understand how to use the implementation of the FPGrowth algoritm (see : https://cwiki.apache.org/MAHOUT/parallel-frequent-pattern-mining.html).
>> 
>> Currently, I'm just trying it with stupid data, and scala code. The problem is that it output only single item itemset.
>> I probably missed something. Could you give me a hint?
>> 
>> By the way, the code below is scala (calling java implementation directly!). It that is a problem, I can translate it to java...
>> 
>> sample outuput :
>> 
>> freqList :Buffer((bier,15), (bread,12), (milk,11), (butter,6))
>> 10:47:44,688 INFO  ~ Number of unique items 4
>> 10:47:44,688 INFO  ~ Number of unique pruned items 4
>> 10:47:44,688 INFO  ~ Number of Nodes in the FP Tree: 0
>> 10:47:44,688 INFO  ~ Mining FTree Tree for all patterns with 3
>> updater : FPGrowth Algorithm for a given feature: 3
>> butter:[butter] : 6
>> 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 6
>> 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 2
>> updater : FPGrowth Algorithm for a given feature: 2
>> updater : FPGrowth Algorithm for a given feature: 3
>> milk:[milk] : 11
>> 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 11
>> 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 1
>> updater : FPGrowth Algorithm for a given feature: 1
>> updater : FPGrowth Algorithm for a given feature: 2
>> updater : FPGrowth Algorithm for a given feature: 3
>> bread:[bread] : 12
>> 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 12
>> 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 0
>> updater : FPGrowth Algorithm for a given feature: 0
>> updater : FPGrowth Algorithm for a given feature: 1
>> updater : FPGrowth Algorithm for a given feature: 2
>> updater : FPGrowth Algorithm for a given feature: 3
>> bier:[bier] : 15
>> 10:47:44,691 INFO  ~ Found 1 Patterns with Least Support 15
>> 
>> code :
>> 
>> 
>>   import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth
>>   import java.util.HashSet
>>   import org.apache.mahout.common.iterator.StringRecordIterator
>>   import org.apache.mahout.common.iterator.FileLineIterable
>>   import org.apache.mahout.fpm.pfpgrowth.convertors._
>>   import org.apache.mahout.fpm.pfpgrowth.convertors.integer._
>>   import org.apache.mahout.fpm.pfpgrowth.convertors.string._
>>   import org.apache.hadoop.io.SequenceFile.Writer
>>   import org.apache.mahout.fpm.pfpgrowth.convertors.StatusUpdater
>>   import org.apache.hadoop.mapred.OutputCollector
>>   import scala.collection.JavaConversions._
>>   import java.util.{ List => JList }
>>   import org.apache.mahout.common.{ Pair => JPair }
>>   import java.lang.{ Long => JLong }
>>   import org.apache.hadoop.io.{ Text => JText }
>> 
>>   val minSupport = 1L
>>   val k: Int = 50
>>   val fps: FPGrowth[String] = new FPGrowth[String]()
>> 
>>   val milk = "milk"
>>   val bread = "bread"
>>   val butter = "butter"
>>   val bier = "bier"
>> 
>>   val transactionStream: Iterator[JPair[JList[String], JLong]] = Iterator(
>>     new JPair(List(milk, bread), 1L),
>>     new JPair(List(butter), 1L),
>>     new JPair(List(bier), 10L),
>>     new JPair(List(milk, bread, butter), 5L),
>>     new JPair(List(milk, bread, bier), 5L),
>>     new JPair(List(bread), 1L)
>>   )
>> 
>>   val frequencies: Collection[JPair[String, JLong]] = fps.generateFList(
>>     transactionStream, minSupport.toInt)
>> 
>>   println("freqList :" + frequencies)
>> 
>>   var returnableFeatures: Collection[String] = List(
>>     milk, bread, butter, bier)
>> 
>>   var output: OutputCollector[String, JList[JPair[JList[String], JLong]]] = (
>>     new OutputCollector[String, JList[JPair[JList[String], JLong]]] {
>>       def collect(x1: String,
>>                   x2: JList[JPair[JList[String], JLong]]) = {
>>         println(x1 + ":" +
>>           x2.map(pair => "[" + pair.getFirst.mkString(",") + "] : " +
>>             pair.getSecond).mkString("; "))
>>       }
>>     }
>>   )
>> 
>>   val updater: StatusUpdater = new StatusUpdater {
>>     def update(status: String) = println("updater : " + status)
>>   }
>> 
>>   fps.generateTopKFrequentPatterns(
>>     transactionStream,
>>     frequencies,
>>     minSupport,
>>     k,
>>     null, //returnableFeatures
>>     output,
>>     updater)
>> 
>> 
>> 	
>> 
>> 
>> 
>> 
> 
> --------------------------------------------
> Grant Ingersoll
> http://www.lucidimagination.com
> 
> 
>

Re: Trouble understanding how to use the FP_Growth algorithm

Posted by Grant Ingersoll <gs...@apache.org>.

Could you try comparing your dataset when using the bin/mahout process and report back here?

On Nov 21, 2011, at 4:49 AM, Sébastien Noir wrote:

> Hi!
> 
> I'm currently trying to understand how to use the implementation of the FPGrowth algoritm (see : https://cwiki.apache.org/MAHOUT/parallel-frequent-pattern-mining.html).
> 
> Currently, I'm just trying it with stupid data, and scala code. The problem is that it output only single item itemset.
> I probably missed something. Could you give me a hint?
> 
> By the way, the code below is scala (calling java implementation directly!). It that is a problem, I can translate it to java...
> 
> sample outuput :
> 
> freqList :Buffer((bier,15), (bread,12), (milk,11), (butter,6))
> 10:47:44,688 INFO  ~ Number of unique items 4
> 10:47:44,688 INFO  ~ Number of unique pruned items 4
> 10:47:44,688 INFO  ~ Number of Nodes in the FP Tree: 0
> 10:47:44,688 INFO  ~ Mining FTree Tree for all patterns with 3
> updater : FPGrowth Algorithm for a given feature: 3
> butter:[butter] : 6
> 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 6
> 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 2
> updater : FPGrowth Algorithm for a given feature: 2
> updater : FPGrowth Algorithm for a given feature: 3
> milk:[milk] : 11
> 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 11
> 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 1
> updater : FPGrowth Algorithm for a given feature: 1
> updater : FPGrowth Algorithm for a given feature: 2
> updater : FPGrowth Algorithm for a given feature: 3
> bread:[bread] : 12
> 10:47:44,690 INFO  ~ Found 1 Patterns with Least Support 12
> 10:47:44,690 INFO  ~ Mining FTree Tree for all patterns with 0
> updater : FPGrowth Algorithm for a given feature: 0
> updater : FPGrowth Algorithm for a given feature: 1
> updater : FPGrowth Algorithm for a given feature: 2
> updater : FPGrowth Algorithm for a given feature: 3
> bier:[bier] : 15
> 10:47:44,691 INFO  ~ Found 1 Patterns with Least Support 15
> 
> code :
> 
> 
>    import org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth
>    import java.util.HashSet
>    import org.apache.mahout.common.iterator.StringRecordIterator
>    import org.apache.mahout.common.iterator.FileLineIterable
>    import org.apache.mahout.fpm.pfpgrowth.convertors._
>    import org.apache.mahout.fpm.pfpgrowth.convertors.integer._
>    import org.apache.mahout.fpm.pfpgrowth.convertors.string._
>    import org.apache.hadoop.io.SequenceFile.Writer
>    import org.apache.mahout.fpm.pfpgrowth.convertors.StatusUpdater
>    import org.apache.hadoop.mapred.OutputCollector
>    import scala.collection.JavaConversions._
>    import java.util.{ List => JList }
>    import org.apache.mahout.common.{ Pair => JPair }
>    import java.lang.{ Long => JLong }
>    import org.apache.hadoop.io.{ Text => JText }
> 
>    val minSupport = 1L
>    val k: Int = 50
>    val fps: FPGrowth[String] = new FPGrowth[String]()
> 
>    val milk = "milk"
>    val bread = "bread"
>    val butter = "butter"
>    val bier = "bier"
> 
>    val transactionStream: Iterator[JPair[JList[String], JLong]] = Iterator(
>      new JPair(List(milk, bread), 1L),
>      new JPair(List(butter), 1L),
>      new JPair(List(bier), 10L),
>      new JPair(List(milk, bread, butter), 5L),
>      new JPair(List(milk, bread, bier), 5L),
>      new JPair(List(bread), 1L)
>    )
> 
>    val frequencies: Collection[JPair[String, JLong]] = fps.generateFList(
>      transactionStream, minSupport.toInt)
> 
>    println("freqList :" + frequencies)
> 
>    var returnableFeatures: Collection[String] = List(
>      milk, bread, butter, bier)
> 
>    var output: OutputCollector[String, JList[JPair[JList[String], JLong]]] = (
>      new OutputCollector[String, JList[JPair[JList[String], JLong]]] {
>        def collect(x1: String,
>                    x2: JList[JPair[JList[String], JLong]]) = {
>          println(x1 + ":" +
>            x2.map(pair => "[" + pair.getFirst.mkString(",") + "] : " +
>              pair.getSecond).mkString("; "))
>        }
>      }
>    )
> 
>    val updater: StatusUpdater = new StatusUpdater {
>      def update(status: String) = println("updater : " + status)
>    }
> 
>    fps.generateTopKFrequentPatterns(
>      transactionStream,
>      frequencies,
>      minSupport,
>      k,
>      null, //returnableFeatures
>      output,
>      updater)
> 
> 
> 	
> 
> 
> 
> 

--------------------------------------------
Grant Ingersoll
http://www.lucidimagination.com