You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by ra...@apache.org on 2017/11/28 21:37:56 UTC

mahout git commit: NOJIRA Fix LastFM CCO Row Cardinality Bug closes apache/mahout#351

Repository: mahout
Updated Branches:
  refs/heads/master 1d198100a -> defbbd20f


NOJIRA Fix LastFM CCO Row Cardinality Bug closes apache/mahout#351


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/defbbd20
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/defbbd20
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/defbbd20

Branch: refs/heads/master
Commit: defbbd20f78c7b9e0bcc3a81d3d79d76be32cf23
Parents: 1d19810
Author: Trevor a.k.a @rawkintrevo <tr...@gmail.com>
Authored: Tue Nov 28 15:37:29 2017 -0600
Committer: Trevor a.k.a @rawkintrevo <tr...@gmail.com>
Committed: Tue Nov 28 15:37:29 2017 -0600

----------------------------------------------------------------------
 .../docs/tutorials/cco-lastfm/cco-lastfm.scala  | 33 ++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/defbbd20/website/docs/tutorials/cco-lastfm/cco-lastfm.scala
----------------------------------------------------------------------
diff --git a/website/docs/tutorials/cco-lastfm/cco-lastfm.scala b/website/docs/tutorials/cco-lastfm/cco-lastfm.scala
index 6ba46a9..709ab2a 100644
--- a/website/docs/tutorials/cco-lastfm/cco-lastfm.scala
+++ b/website/docs/tutorials/cco-lastfm/cco-lastfm.scala
@@ -32,10 +32,39 @@ val userArtistsIDS = IndexedDatasetSpark.apply(userArtistsRDD)(sc)
 val userFriendsRDD = sc.textFile("/path/to/data/lastfm/user_friends.dat").map(line => line.split("\t")).map(a => (a(0), a(1))).filter(_._1 != "userID")
 val userFriendsIDS = IndexedDatasetSpark.apply(userFriendsRDD)(sc)
 
-import org.apache.mahout.math.cf.SimilarityAnalysis
+val primaryIDS = userFriendsIDS
+val secondaryActionRDDs = List(userArtistsRDD, userTagsRDD)
+
+import org.apache.mahout.math.indexeddataset.{IndexedDataset, BiDictionary}
+
+def adjustRowCardinality(rowCardinality: Integer, datasetA: IndexedDataset): IndexedDataset = {
+  val returnedA = if (rowCardinality != datasetA.matrix.nrow) datasetA.newRowCardinality(rowCardinality)
+  else datasetA // this guarantees matching cardinality
+
+  returnedA
+}
+
+var rowCardinality = primaryIDS.rowIDs.size
 
-val artistReccosLlrDrmListByArtist = SimilarityAnalysis.cooccurrencesIDSs(Array(userArtistsIDS, userTagsIDS, userFriendsIDS), maxInterestingItemsPerThing = 20, maxNumInteractions = 500, randomSeed = 1234)
+val secondaryActionIDS: Array[IndexedDataset] = new Array[IndexedDataset](secondaryActionRDDs.length)
+for (i <- secondaryActionRDDs.indices) {
+
+  val bcPrimaryRowIDs = sc.broadcast(primaryIDS.rowIDs)
+  bcPrimaryRowIDs.value
+
+  val tempRDD = secondaryActionRDDs(i).filter(a => bcPrimaryRowIDs.value.contains(a._1))
+
+  var tempIDS = IndexedDatasetSpark.apply(tempRDD, existingRowIDs = Some(primaryIDS.rowIDs))(sc)
+  secondaryActionIDS(i) = adjustRowCardinality(rowCardinality,tempIDS)
+}
+
+import org.apache.mahout.math.cf.SimilarityAnalysis
 
+val artistReccosLlrDrmListByArtist = SimilarityAnalysis.cooccurrencesIDSs(
+  Array(primaryIDS, secondaryActionIDS(0), secondaryActionIDS(1)),
+  maxInterestingItemsPerThing = 20,
+  maxNumInteractions = 500,
+  randomSeed = 1234)
 // Anonymous User
 
 val artistMap = sc.textFile("/path/to/lastfm/artists.dat").map(line => line.split("\t")).map(a => (a(1), a(0))).filter(_._1 != "name").collect.toMap