You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2016/05/29 17:06:45 UTC

mahout git commit: MAHOUT-1866: Add matrix-to-tsv string function, this closes apache/mahout#237

Repository: mahout
Updated Branches:
  refs/heads/master 1f3566d35 -> 8f4ee88fb


MAHOUT-1866: Add matrix-to-tsv string function, this closes apache/mahout#237


Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/8f4ee88f
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/8f4ee88f
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/8f4ee88f

Branch: refs/heads/master
Commit: 8f4ee88fb40710d983ea3fb6ad008317f6c00936
Parents: 1f3566d
Author: smarthi <sm...@apache.org>
Authored: Sun May 29 13:06:59 2016 -0400
Committer: smarthi <sm...@apache.org>
Committed: Sun May 29 13:06:59 2016 -0400

----------------------------------------------------------------------
 .../org/apache/mahout/math/drm/package.scala    | 36 ++++++++++++++++++++
 1 file changed, 36 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/mahout/blob/8f4ee88f/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
index 291c538..86c7054 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
@@ -148,6 +148,42 @@ package object drm {
   def drmSampleKRows[K](drmX: DrmLike[K], numSamples: Int, replacement: Boolean = false): Matrix =
     drmX.context.engine.drmSampleKRows(drmX, numSamples, replacement)
 
+  /**
+    * Convert a DRM sample into a Tab Separated Vector (TSV) to be loaded into an R-DataFrame
+    * for plotting and sketching
+    * @param drmX - DRM
+    * @param samplePercent - Percentage of Sample elements from the DRM to be fished out for plotting
+    * @tparam K
+    * @return TSV String
+    */
+  def drmSampleToTSV[K](drmX: DrmLike[K], samplePercent: Double = 1): String = {
+
+    val drmSize = drmX.checkpoint().numRows()
+    val sampleRatio: Double = 1.0 * samplePercent / 100
+    val numSamples: Int = (drmSize * sampleRatio).toInt
+
+    val plotMatrix = drmSampleKRows(drmX, numSamples, replacement = false)
+
+    // Plot Matrix rows
+    val matrixRows = plotMatrix.numRows()
+    val matrixCols = plotMatrix.numCols()
+
+    // Convert the Plot Matrix Rows to TSV
+    var str = ""
+
+    for (i <- 0 until matrixRows) {
+      for (j <- 0 until matrixCols) {
+        str += plotMatrix(i, j)
+        if (j <= matrixCols - 2) {
+          str += '\t'
+        }
+      }
+      str += '\n'
+    }
+
+    str
+  }
+
   ///////////////////////////////////////////////////////////
   // Elementwise unary functions on distributed operands.
   def dexp[K](drmA: DrmLike[K]): DrmLike[K] = new OpAewUnaryFunc[K](drmA, math.exp, true)