You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/07/20 08:25:32 UTC

[1/3] systemml git commit: [SYSTEMML-1790] Fix frame block reset w/ #rows larger than allocated

Repository: systemml
Updated Branches:
  refs/heads/master 614f5ab52 -> cca4f942c


[SYSTEMML-1790] Fix frame block reset w/ #rows larger than allocated

A FrameBlock reset, e.g., on feeding the same reuse frame block multiple
times into slice with different data sizes, currently does not work
properly, leading to an ArrayIndexOutOfBoundsException on the actual
data copy if the target is larger than then previously allocated block.
This patch fixes this issue by providing reset functionality on the
individual column arrays.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/7cd978df
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/7cd978df
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/7cd978df

Branch: refs/heads/master
Commit: 7cd978df5b218e6ba886ea6cb905abfac3aa49a7
Parents: 614f5ab
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed Jul 19 20:30:29 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu Jul 20 01:24:22 2017 -0700

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/FrameBlock.java   | 23 +++++++++++++++++++-
 1 file changed, 22 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/7cd978df/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
index 512b85c..bfe236e 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
@@ -360,7 +360,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 		}
 		if(_coldata != null) {
 			for( int i=0; i < _coldata.length; i++ )
-				_coldata[i]._size = nrow;
+				_coldata[i].reset(nrow);
 		}
 	}
 
@@ -1258,6 +1258,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 		public abstract void append(T value);
 		public abstract Array clone();
 		public abstract Array slice(int rl, int ru);
+		public abstract void reset(int size); 
 	}
 
 	private static class StringArray extends Array<String> {
@@ -1307,6 +1308,11 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 		public Array slice(int rl, int ru) {
 			return new StringArray(Arrays.copyOfRange(_data,rl,ru+1));
 		}
+		public void reset(int size) {
+			if( _data.length < size )
+				_data = new String[size];
+			_size = size;
+		}
 	}
 
 	private static class BooleanArray extends Array<Boolean> {
@@ -1357,6 +1363,11 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 		public Array slice(int rl, int ru) {
 			return new BooleanArray(Arrays.copyOfRange(_data,rl,ru+1));
 		}
+		public void reset(int size) {
+			if( _data.length < size )
+				_data = new boolean[size];
+			_size = size;
+		}
 	}
 
 	private static class LongArray extends Array<Long> {
@@ -1407,6 +1418,11 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 		public Array slice(int rl, int ru) {
 			return new LongArray(Arrays.copyOfRange(_data,rl,ru+1));
 		}
+		public void reset(int size) {
+			if( _data.length < size )
+				_data = new long[size];
+			_size = size;
+		}
 	}
 
 	private static class DoubleArray extends Array<Double> {
@@ -1457,6 +1473,11 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 		public Array slice(int rl, int ru) {
 			return new DoubleArray(Arrays.copyOfRange(_data,rl,ru+1));
 		}
+		public void reset(int size) {
+			if( _data.length < size )
+				_data = new double[size];
+			_size = size;
+		}
 	}
 
 	public static class ColumnMetadata implements Serializable {


[3/3] systemml git commit: [SYSTEMML-1792] Performance sparse-dense block matrix multiply

Posted by mb...@apache.org.
[SYSTEMML-1792] Performance sparse-dense block matrix multiply

Our sparse-dense matrix multiply was already cache-conscious but used
very small block static block sizes, which were optimized for moderate
sparsity. However, for cases with very sparse matrices (and skinny right
hand size matrices), the small block sizes added substantial overhead of
more than an order of magnitude. This patch makes these block sizes
adaptive, consistent with our cache-conscious implementations of
sparsity exploiting matrix multiply operators such as wsloss.

On a scenario with 1K x 2M mini batches of average sparsity 0.0003 and a
dense right hand side of 2M x 100, the runtime improved from ~300ms to
<2ms, without hurting the case of moderate sparsity.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/cca4f942
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/cca4f942
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/cca4f942

Branch: refs/heads/master
Commit: cca4f942cd8d4a2ac0b6e5994ebac4efdcdc06c3
Parents: 4a24b9a
Author: Matthias Boehm <mb...@gmail.com>
Authored: Thu Jul 20 01:21:19 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu Jul 20 01:24:27 2017 -0700

----------------------------------------------------------------------
 .../org/apache/sysml/runtime/matrix/data/LibMatrixMult.java | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/cca4f942/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
index 30e7d3d..c2af52d 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/LibMatrixMult.java
@@ -1292,8 +1292,11 @@ public class LibMatrixMult
 			{							
 				//blocksizes to fit blocks of B (dense) and several rows of A/C in common L2 cache size, 
 				//while blocking A/C for L1/L2 yet allowing long scans (2 pages) in the inner loop over j
-				final int blocksizeI = 32;
-				final int blocksizeK = 24; 
+				//in case of almost ultra-sparse matrices, we cannot ensure the blocking for the rhs and
+				//output - however, in this case it's unlikely that we consume every cache line in the rhs
+				
+				final int blocksizeI = (int) (8L*m*cd/m1.nonZeros);
+				final int blocksizeK = (int) (8L*m*cd/m1.nonZeros);
 				final int blocksizeJ = 1024; 
 				
 				//temporary array of current sparse positions
@@ -1314,7 +1317,7 @@ public class LibMatrixMult
 									int[] aix = a.indexes(i);
 									double[] avals = a.values(i);					
 									
-									int k = curk[i-bi] + apos;									
+									int k = curk[i-bi] + apos;			
 					    			//rest not aligned to blocks of 4 rows
 									int bn = alen%4;
 									for( ; k<apos+bn && aix[k]<bkmin; k++ )


[2/3] systemml git commit: [SYSTEMML-1791] Performance frame block indexing and transformapply

Posted by mb...@apache.org.
[SYSTEMML-1791] Performance frame block indexing and transformapply

This patch makes the following performance improvements to various frame
operations in order to remove unnecessary overheads:

(1) Shallow column copy on full column indexing.

(2) Bidirectional reuse of recode maps across original meta data frame
blocks and shallow column copies (e.g., after column indexing).

(3) Avoid unnecessary long-string-double conversions on transformapply
(the recently removed file-based transform required string lookups - we
now avoid this long-string conversion which is unnecessary for the
related frame operations).

Furthermore, this patch also makes a couple of cleanups methods which
become obsolete after the removal of the old file-based transform.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/4a24b9a7
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/4a24b9a7
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/4a24b9a7

Branch: refs/heads/master
Commit: 4a24b9a78424dc85fe774e6d2dd5689fea9cd5b1
Parents: 7cd978d
Author: Matthias Boehm <mb...@gmail.com>
Authored: Wed Jul 19 23:16:44 2017 -0700
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Thu Jul 20 01:24:24 2017 -0700

----------------------------------------------------------------------
 .../sysml/runtime/matrix/data/FrameBlock.java   | 45 ++++++++++----------
 .../sysml/runtime/transform/encode/Encoder.java | 10 -----
 .../runtime/transform/encode/EncoderBin.java    | 31 --------------
 .../transform/encode/EncoderComposite.java      |  7 ---
 .../transform/encode/EncoderDummycode.java      | 38 -----------------
 .../transform/encode/EncoderMVImpute.java       | 30 -------------
 .../runtime/transform/encode/EncoderOmit.java   |  5 ---
 .../transform/encode/EncoderPassThrough.java    |  5 ---
 .../runtime/transform/encode/EncoderRecode.java | 45 +++-----------------
 9 files changed, 29 insertions(+), 187 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/4a24b9a7/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
index bfe236e..5e6404b 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java
@@ -67,13 +67,8 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 	/** The data frame data as an ordered list of columns */
 	private Array[] _coldata = null;
 	
-	/** Cache for recode maps from frame meta data, indexed by column 0-based */
-	private Map<Integer, SoftReference<HashMap<String,Long>>> _rcdMapCache = null;
-	
 	public FrameBlock() {
 		_numRows = 0;
-		if( REUSE_RECODE_MAPS )
-			_rcdMapCache = new HashMap<Integer, SoftReference<HashMap<String,Long>>>();
 	}
 	
 	/**
@@ -120,8 +115,6 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 			_colmeta[j] = new ColumnMetadata(0);
 		for( int i=0; i<data.length; i++ )
 			appendRow(data[i]);
-		if( REUSE_RECODE_MAPS )
-			_rcdMapCache = new HashMap<Integer, SoftReference<HashMap<String,Long>>>();
 	}
 	
 	/**
@@ -872,16 +865,25 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 				ret._colnames[j-cl] = getColumnName(j);
 		}	
 		ret._numRows = ru-rl+1;
-
-		//copy output data
-		if(ret._coldata == null ) { 
+		if(ret._coldata == null )
 			ret._coldata = new Array[numCols];
+		
+		//fast-path: shallow copy column indexing 
+		if( ret._numRows == _numRows ) {
+			//this shallow copy does not only avoid an array copy, but
+			//also allows for bi-directional reuses of recodemaps 
 			for( int j=cl; j<=cu; j++ )
-				ret._coldata[j-cl] = _coldata[j].slice(rl,ru);
+				ret._coldata[j-cl] = _coldata[j];
+		}
+		//copy output data
+		else {
+			for( int j=cl; j<=cu; j++ ) {
+				if( ret._coldata[j-cl] == null )
+					ret._coldata[j-cl] = _coldata[j].slice(rl,ru);
+				else
+					ret._coldata[j-cl].set(0, ru-rl, _coldata[j], rl);
+			}
 		}
-		else
-			for( int j=cl; j<=cu; j++ )
-				ret._coldata[j-cl].set(0, ru-rl, _coldata[j], rl);	
 		
 		return ret;
 	}
@@ -1023,7 +1025,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 	public HashMap<String,Long> getRecodeMap(int col) {
 		//probe cache for existing map
 		if( REUSE_RECODE_MAPS ) {
-			SoftReference<HashMap<String,Long>> tmp = _rcdMapCache.get(col);
+			SoftReference<HashMap<String,Long>> tmp = _coldata[col]._rcdMapCache;
 			HashMap<String,Long> map = (tmp!=null) ? tmp.get() : null;
 			if( map != null ) return map;
 		}
@@ -1034,10 +1036,8 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 		for( int i=0; i<getNumRows(); i++ ) {
 			Object val = ldata.get(i);
 			if( val != null ) {
-//				String[] tmp = IOUtilFunctions.splitCSV(
-//						val.toString(), Lop.DATATYPE_PREFIX);
-
-				// Instead of using splitCSV which is forcing string with RFC-4180 format, using Lop.DATATYPE_PREFIX separator to split token and code 
+				// Instead of using splitCSV which is forcing string with RFC-4180 format, 
+				// using Lop.DATATYPE_PREFIX separator to split token and code 
 				String[] tmp = 	new String[2];
 				int pos = val.toString().lastIndexOf(Lop.DATATYPE_PREFIX);
 				tmp[0] = val.toString().substring(0, pos);
@@ -1047,9 +1047,8 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 		}
 		
 		//put created map into cache
-		if( REUSE_RECODE_MAPS ) {
-			_rcdMapCache.put(col, new SoftReference<HashMap<String,Long>>(map));
-		}
+		if( REUSE_RECODE_MAPS )
+			_coldata[col]._rcdMapCache = new SoftReference<>(map);
 		
 		return map;
 	}
@@ -1245,6 +1244,8 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable
 	 * in order to avoid unnecessary dependencies.
 	 */
 	private abstract static class Array<T> implements Writable {
+		protected SoftReference<HashMap<String,Long>> _rcdMapCache = null;
+		
 		protected int _size = 0;
 		protected int newSize() {
 			return (int) Math.max(_size*2, 4); 

http://git-wip-us.apache.org/repos/asf/systemml/blob/4a24b9a7/src/main/java/org/apache/sysml/runtime/transform/encode/Encoder.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/Encoder.java b/src/main/java/org/apache/sysml/runtime/transform/encode/Encoder.java
index 304dcdb..e4af8a6 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/Encoder.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/Encoder.java
@@ -117,16 +117,6 @@ public abstract class Encoder implements Serializable
 	 * @return output matrix block
 	 */
 	public abstract MatrixBlock apply(FrameBlock in, MatrixBlock out);
-	
-	/**
-	 * Encode input data according to existing transform meta
-	 * data (transform apply).
-	 * TODO remove once file-based transform removed
-	 * 
-	 * @param in input data as string array
-	 * @return encoded data as string array
-	 */
-	public abstract String[] apply(String[] in);
 
 	/**
 	 * Construct a frame block out of the transform meta data.

http://git-wip-us.apache.org/repos/asf/systemml/blob/4a24b9a7/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderBin.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderBin.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderBin.java
index fbe6994..e70a392 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderBin.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderBin.java
@@ -44,7 +44,6 @@ public class EncoderBin extends Encoder
 
 	private int[] _numBins = null;
 	private double[] _min=null, _max=null;	// min and max among non-missing values
-	private double[] _binWidths = null;		// width of a bin for each attribute
 	
 	//frame transform-apply attributes
 	private double[][] _binMins = null;
@@ -83,8 +82,6 @@ public class EncoderBin extends Encoder
 			Arrays.fill(_min, Double.MAX_VALUE);
 			_max = new double[_colList.length];
 			Arrays.fill(_max, -Double.MAX_VALUE);
-			
-			_binWidths = new double[_colList.length];
 		}
 	}
 
@@ -121,34 +118,6 @@ public class EncoderBin extends Encoder
 		// nothing to do
 	}
 	
-	/**
-	 * Method to apply transformations.
-	 */
-	@Override
-	public String[] apply(String[] words) {
-		if( !isApplicable() )
-			return words;
-	
-		for(int i=0; i < _colList.length; i++) {
-			int colID = _colList[i];
-			try {
-				double val = UtilFunctions.parseToDouble(words[colID-1]);
-				int binid = 1;
-				double tmp = _min[i] + _binWidths[i];
-				while(val > tmp && binid < _numBins[i]) {
-					tmp += _binWidths[i];
-					binid++;
-				}
-				words[colID-1] = Integer.toString(binid);
-			} 
-			catch(NumberFormatException e) {
-				throw new RuntimeException("Encountered \"" + words[colID-1] + "\" in column ID \"" + colID + "\", when expecting a numeric value. Consider adding \"" + words[colID-1] + "\" to na.strings, along with an appropriate imputation method.");
-			}
-		}
-		
-		return words;
-	}
-
 	@Override
 	public MatrixBlock apply(FrameBlock in, MatrixBlock out) {
 		for(int j=0; j<_colList.length; j++) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/4a24b9a7/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderComposite.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderComposite.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderComposite.java
index deff887..ffff1df 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderComposite.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderComposite.java
@@ -80,13 +80,6 @@ public class EncoderComposite extends Encoder
 		for( Encoder encoder : _encoders )
 			encoder.build(in);
 	}
-
-	@Override
-	public String[] apply(String[] in) {
-		for( Encoder encoder : _encoders )
-			encoder.apply(in);
-		return in;
-	}
 	
 	@Override 
 	public MatrixBlock apply(FrameBlock in, MatrixBlock out) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/4a24b9a7/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderDummycode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderDummycode.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderDummycode.java
index 743381a..9a2f059 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderDummycode.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderDummycode.java
@@ -58,44 +58,6 @@ public class EncoderDummycode extends Encoder
 		//do nothing
 	}
 	
-	/**
-	 * Method to apply transformations.
-	 * 
-	 * @param words array of strings
-	 * @return array of transformed strings
-	 */
-	@Override
-	public String[] apply(String[] words) 
-	{
-		if( !isApplicable() )
-			return words;
-		
-		String[] nwords = new String[(int)_dummycodedLength];
-		int rcdVal = 0;
-		
-		for(int colID=1, idx=0, ncolID=1; colID <= words.length; colID++) {
-			if(idx < _colList.length && colID==_colList[idx]) {
-				// dummycoded columns
-				try {
-					rcdVal = UtilFunctions.parseToInt(UtilFunctions.unquote(words[colID-1]));
-					nwords[ ncolID-1+rcdVal-1 ] = "1";
-					ncolID += _domainSizes[idx];
-					idx++;
-				} 
-				catch (Exception e) {
-					throw new RuntimeException("Error in dummycoding: colID="+colID + ", rcdVal=" + rcdVal+", word="+words[colID-1] 
-							+ ", domainSize=" + _domainSizes[idx] + ", dummyCodedLength=" + _dummycodedLength);
-				}
-			}
-			else {
-				nwords[ncolID-1] = words[colID-1];
-				ncolID++;
-			}
-		}
-		
-		return nwords;
-	}
-	
 	@Override
 	public MatrixBlock apply(FrameBlock in, MatrixBlock out) 
 	{

http://git-wip-us.apache.org/repos/asf/systemml/blob/4a24b9a7/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderMVImpute.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderMVImpute.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderMVImpute.java
index 55a0bde..ae9b809 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderMVImpute.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderMVImpute.java
@@ -336,36 +336,6 @@ public class EncoderMVImpute extends Encoder
 			throw new RuntimeException(ex);
 		}
 	}
-
-	@Override
-	public String[] apply(String[] words) 
-	{	
-		if( isApplicable() )
-			for(int i=0; i < _colList.length; i++) {
-				int colID = _colList[i];
-				String w = UtilFunctions.unquote(words[colID-1]);
-				if(TfUtils.isNA(_NAstrings, w))
-					w = words[colID-1] = _replacementList[i];
-				
-				if ( _isMVScaled.get(i) )
-					if ( _mvscMethodList[i] == MVMethod.GLOBAL_MEAN )
-						words[colID-1] = Double.toString( UtilFunctions.parseToDouble(w) - _meanList[i]._sum );
-					else
-						words[colID-1] = Double.toString( (UtilFunctions.parseToDouble(w) - _meanList[i]._sum) / _varList[i].mean._sum );
-			}
-		
-		if(_scnomvList != null)
-		for(int i=0; i < _scnomvList.length; i++)
-		{
-			int colID = _scnomvList[i];
-			if ( _scnomvMethodList[i] == MVMethod.GLOBAL_MEAN )
-				words[colID-1] = Double.toString( UtilFunctions.parseToDouble(words[colID-1]) - _scnomvMeanList[i]._sum );
-			else
-				words[colID-1] = Double.toString( (UtilFunctions.parseToDouble(words[colID-1]) - _scnomvMeanList[i]._sum) / _scnomvVarList[i].mean._sum );
-		}
-			
-		return words;
-	}
 	
 	@Override
 	public MatrixBlock apply(FrameBlock in, MatrixBlock out) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/4a24b9a7/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderOmit.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderOmit.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderOmit.java
index af09cee..0f74590 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderOmit.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderOmit.java
@@ -71,11 +71,6 @@ public class EncoderOmit extends Encoder
 	}
 	
 	@Override
-	public String[] apply(String[] words) {
-		return null;
-	}
-	
-	@Override
 	public MatrixBlock apply(FrameBlock in, MatrixBlock out) 
 	{
 		//determine output size

http://git-wip-us.apache.org/repos/asf/systemml/blob/4a24b9a7/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java
index d84ea0d..ee22ac1 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java
@@ -48,11 +48,6 @@ public class EncoderPassThrough extends Encoder
 	public void build(FrameBlock in) {
 		//do nothing
 	}
-
-	@Override
-	public String[] apply(String[] in) {
-		return in;
-	}
 	
 	@Override 
 	public MatrixBlock apply(FrameBlock in, MatrixBlock out) {

http://git-wip-us.apache.org/repos/asf/systemml/blob/4a24b9a7/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
index 855d565..526d31e 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
@@ -29,7 +29,6 @@ import org.apache.sysml.runtime.matrix.data.FrameBlock;
 import org.apache.sysml.runtime.matrix.data.MatrixBlock;
 import org.apache.sysml.runtime.transform.TfUtils;
 import org.apache.sysml.runtime.transform.meta.TfMetaUtils;
-import org.apache.sysml.runtime.util.UtilFunctions;
 import org.apache.wink.json4j.JSONException;
 import org.apache.wink.json4j.JSONObject;
 
@@ -39,7 +38,6 @@ public class EncoderRecode extends Encoder
 
 	//recode maps and custom map for partial recode maps 
 	private HashMap<Integer, HashMap<String, Long>> _rcdMaps  = new HashMap<Integer, HashMap<String, Long>>();
-	private HashMap<Integer, HashMap<String,String>> _finalMaps = null;
 	private HashMap<Integer, HashSet<Object>> _rcdMapsPart = null;
 	
 	public EncoderRecode(JSONObject parsedSpec, String[] colnames, int clen)
@@ -60,17 +58,9 @@ public class EncoderRecode extends Encoder
 		return _rcdMapsPart; 
 	}
 	
-	public HashMap<Integer, HashMap<String,String>> getRecodeMaps() {
-		return _finalMaps;
-	}
-	
-	private String lookupRCDMap(int colID, String key) {
-		if( _finalMaps!=null )
-			return _finalMaps.get(colID).get(key);
-		else { //used for cp
-			Long tmp = _rcdMaps.get(colID).get(key);
-			return (tmp!=null) ? Long.toString(tmp) : null;
-		}
+	private long lookupRCDMap(int colID, String key) {
+		Long tmp = _rcdMaps.get(colID).get(key);
+		return (tmp!=null) ? tmp : -1;
 	}
 	
 	@Override
@@ -132,28 +122,6 @@ public class EncoderRecode extends Encoder
 		}
 	}
 	
-	/**
-	 * Method to apply transformations.
-	 */
-	@Override
-	public String[] apply(String[] words) 
-	{
-		if( !isApplicable() )
-			return words;
-		
-		//apply recode maps on relevant columns of given row
-		for(int i=0; i < _colList.length; i++) {
-			//prepare input and get code
-			int colID = _colList[i];
-			String key = UtilFunctions.unquote(words[colID-1].trim());
-			String val = lookupRCDMap(colID, key);			
-			// replace unseen keys with NaN 
-			words[colID-1] = (val!=null) ? val : "NaN";
-		}
-			
-		return words;
-	}
-	
 	@Override
 	public MatrixBlock apply(FrameBlock in, MatrixBlock out) {
 		//apply recode maps column wise
@@ -162,9 +130,9 @@ public class EncoderRecode extends Encoder
 			for( int i=0; i<in.getNumRows(); i++ ) {
 				Object okey = in.get(i, colID-1);
 				String key = (okey!=null) ? okey.toString() : null;
-				String val = lookupRCDMap(colID, key);			
-				out.quickSetValue(i, colID-1, (val!=null) ? 
-						Double.parseDouble(val) : Double.NaN);
+				long code = lookupRCDMap(colID, key);			
+				out.quickSetValue(i, colID-1,
+					(code >= 0) ? code : Double.NaN);
 			}
 		}
 		
@@ -228,4 +196,3 @@ public class EncoderRecode extends Encoder
 		return token + Lop.DATATYPE_PREFIX + code.toString();
 	}
 }
- 
\ No newline at end of file