You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by pa...@apache.org on 2015/04/21 20:50:57 UTC
svn commit: r1675182 - /mahout/site/mahout_cms/trunk/content/users/environment/how-to-build-an-app.mdtext

Author: pat
Date: Tue Apr 21 18:50:57 2015
New Revision: 1675182

URL: http://svn.apache.org/r1675182
Log:
CMS commit to mahout by pat

Modified:
    mahout/site/mahout_cms/trunk/content/users/environment/how-to-build-an-app.mdtext

Modified: mahout/site/mahout_cms/trunk/content/users/environment/how-to-build-an-app.mdtext
URL: http://svn.apache.org/viewvc/mahout/site/mahout_cms/trunk/content/users/environment/how-to-build-an-app.mdtext?rev=1675182&r1=1675181&r2=1675182&view=diff
==============================================================================
--- mahout/site/mahout_cms/trunk/content/users/environment/how-to-build-an-app.mdtext (original)
+++ mahout/site/mahout_cms/trunk/content/users/environment/how-to-build-an-app.mdtext Tue Apr 21 18:50:57 2015
@@ -46,8 +46,8 @@ Mahout has a helper function that reads
 Notice we read in all datasets before we adjust the number of rows in them to match the total number of users in the data. This is so the math works out even if some users took one action but not another.
 
     /**
-     * Read files of element tuples and create IndexedDatasets one per action. These share a 
-     * userID BiMap but have their own itemID BiMaps
+     * Read files of element tuples and create IndexedDatasets one per action. These 
+     * share a userID BiMap but have their own itemID BiMaps
      */
     def readActions(actionInput: Array[(String, String)]): Array[(String, IndexedDataset)] = {
       var actions = Array[(String, IndexedDataset)]()
@@ -74,8 +74,7 @@ Notice we read in all datasets before we
 
       val resizedNameActionPairs = actions.map { a =>
         //resize the matrix by, in effect by adding empty rows
-        val resizedMatrix = 
-          a._2.create(a._2.matrix, userDictionary, a._2.columnIDs).newRowCardinality(numUsers)
+        val resizedMatrix = a._2.create(a._2.matrix, userDictionary, a._2.columnIDs).newRowCardinality(numUsers)
         (a._1, resizedMatrix) // return the Tuple of (name, IndexedDataset)
       }
       resizedNameActionPairs // return the array of Tuples
@@ -85,7 +84,7 @@ Notice we read in all datasets before we
 Now that we have the data read in we can perform the cooccurrence calculation.
 
 
-    // strip off names, which only takes and array of IndexedDatasets
+    // strip off names, method takes an array of IndexedDatasets
     val indicatorMatrices = SimilarityAnalysis.cooccurrencesIDSs(actions.map(a => a._2))
 
 
@@ -100,13 +99,16 @@ The ```writeIndicators``` method uses th
 
     /**
      * Write indicatorMatrices to the output dir in the default format
+     * for indexing by a search engine.
      */
     def writeIndicators( indicators: Array[(String, IndexedDataset)]) = {
       for (indicator <- indicators ) {
+        // create a name based on the type of indicator
         val indicatorDir = OutputPath + indicator._1
         indicator._2.dfsWrite(
-          indicatorDir, // do we have to remove the last $ char?
-          // omit LLR strengths and format for search engine indexing
+          indicatorDir,
+          // Schema tells the writer to omit LLR strengths 
+          // and format for search engine indexing
           IndexedDatasetWriteBooleanSchema) 
       }
     }
@@ -141,7 +143,8 @@ See the Github project for the full sour
 
     packSettings
 
-    packMain := Map("cooc" -> "CooccurrenceDriver")
+    packMain := Map(
+      "cooc" -> "CooccurrenceDriver")
 
 
 ##Build