You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/11/18 09:01:44 UTC

systemml git commit: [SYSTEMML-2016] Performance frame transformencode (meta data creation)

Repository: systemml
Updated Branches:
  refs/heads/master e368de8d4 -> ec4963552


[SYSTEMML-2016] Performance frame transformencode (meta data creation)

This patch makes a minor performance improvement to transformencode.
Specifically, we now now reuse string builders across the construction
of recode map entries in order to avoid unnecessary allocation and
string copies. On a scneario of 100 iterations of transformencode of a
100K x 1 random input, the runtime improved from 25.6s to 20.1s.


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ec496355
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ec496355
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ec496355

Branch: refs/heads/master
Commit: ec49635520f5ef223830c8632b661bd091d87eb3
Parents: e368de8
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Nov 18 01:00:41 2017 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Nov 18 01:00:41 2017 -0800

----------------------------------------------------------------------
 .../sysml/runtime/transform/encode/EncoderRecode.java | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/ec496355/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
index 8758e73..11667ce 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
@@ -156,13 +156,14 @@ public class EncoderRecode extends Encoder
 		meta.ensureAllocatedColumns(maxDistinct);
 		
 		//create compact meta data representation
+		StringBuilder sb = new StringBuilder(); //for reuse
 		for( int j=0; j<_colList.length; j++ ) {
 			int colID = _colList[j]; //1-based
 			int rowID = 0;
 			if( _rcdMaps.containsKey(_colList[j]) )
 				for( Entry<String, Long> e : _rcdMaps.get(colID).entrySet() ) {
-					String tmp = constructRecodeMapEntry(e.getKey(), e.getValue());
-					meta.set(rowID++, colID-1, tmp); 
+					meta.set(rowID++, colID-1, 
+						constructRecodeMapEntry(e.getKey(), e.getValue(), sb)); 
 				}
 			meta.getColumnMetadata(colID-1).setNumDistinct(
 					_rcdMaps.get(colID).size());
@@ -197,7 +198,14 @@ public class EncoderRecode extends Encoder
 	 * @return the concatenation of token and code with delimiter in between
 	 */
 	public static String constructRecodeMapEntry(String token, Long code) {
-		return token + Lop.DATATYPE_PREFIX + code.toString();
+		StringBuilder sb = new StringBuilder(token.length()+16);
+		return constructRecodeMapEntry(token, code, sb);
+	}
+	
+	private static String constructRecodeMapEntry(String token, Long code, StringBuilder sb) {
+		sb.setLength(0); //reset reused string builder
+		return sb.append(token).append(Lop.DATATYPE_PREFIX)
+			.append(code.longValue()).toString();
 	}
 	
 	/**