You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@mahout.apache.org by sm...@apache.org on 2016/05/29 17:06:45 UTC
mahout git commit: MAHOUT-1866: Add matrix-to-tsv string function,
this closes apache/mahout#237
Repository: mahout
Updated Branches:
refs/heads/master 1f3566d35 -> 8f4ee88fb
MAHOUT-1866: Add matrix-to-tsv string function, this closes apache/mahout#237
Project: http://git-wip-us.apache.org/repos/asf/mahout/repo
Commit: http://git-wip-us.apache.org/repos/asf/mahout/commit/8f4ee88f
Tree: http://git-wip-us.apache.org/repos/asf/mahout/tree/8f4ee88f
Diff: http://git-wip-us.apache.org/repos/asf/mahout/diff/8f4ee88f
Branch: refs/heads/master
Commit: 8f4ee88fb40710d983ea3fb6ad008317f6c00936
Parents: 1f3566d
Author: smarthi <sm...@apache.org>
Authored: Sun May 29 13:06:59 2016 -0400
Committer: smarthi <sm...@apache.org>
Committed: Sun May 29 13:06:59 2016 -0400
----------------------------------------------------------------------
.../org/apache/mahout/math/drm/package.scala | 36 ++++++++++++++++++++
1 file changed, 36 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/mahout/blob/8f4ee88f/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
----------------------------------------------------------------------
diff --git a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala b/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
index 291c538..86c7054 100644
--- a/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
+++ b/math-scala/src/main/scala/org/apache/mahout/math/drm/package.scala
@@ -148,6 +148,42 @@ package object drm {
def drmSampleKRows[K](drmX: DrmLike[K], numSamples: Int, replacement: Boolean = false): Matrix =
drmX.context.engine.drmSampleKRows(drmX, numSamples, replacement)
+ /**
+ * Convert a DRM sample into a Tab Separated Vector (TSV) to be loaded into an R-DataFrame
+ * for plotting and sketching
+ * @param drmX - DRM
+ * @param samplePercent - Percentage of Sample elements from the DRM to be fished out for plotting
+ * @tparam K
+ * @return TSV String
+ */
+ def drmSampleToTSV[K](drmX: DrmLike[K], samplePercent: Double = 1): String = {
+
+ val drmSize = drmX.checkpoint().numRows()
+ val sampleRatio: Double = 1.0 * samplePercent / 100
+ val numSamples: Int = (drmSize * sampleRatio).toInt
+
+ val plotMatrix = drmSampleKRows(drmX, numSamples, replacement = false)
+
+ // Plot Matrix rows
+ val matrixRows = plotMatrix.numRows()
+ val matrixCols = plotMatrix.numCols()
+
+ // Convert the Plot Matrix Rows to TSV
+ var str = ""
+
+ for (i <- 0 until matrixRows) {
+ for (j <- 0 until matrixCols) {
+ str += plotMatrix(i, j)
+ if (j <= matrixCols - 2) {
+ str += '\t'
+ }
+ }
+ str += '\n'
+ }
+
+ str
+ }
+
///////////////////////////////////////////////////////////
// Elementwise unary functions on distributed operands.
def dexp[K](drmA: DrmLike[K]): DrmLike[K] = new OpAewUnaryFunc[K](drmA, math.exp, true)