You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@systemml.apache.org by mb...@apache.org on 2017/11/18 09:01:44 UTC
systemml git commit: [SYSTEMML-2016] Performance frame
transformencode (meta data creation)
Repository: systemml
Updated Branches:
refs/heads/master e368de8d4 -> ec4963552
[SYSTEMML-2016] Performance frame transformencode (meta data creation)
This patch makes a minor performance improvement to transformencode.
Specifically, we now now reuse string builders across the construction
of recode map entries in order to avoid unnecessary allocation and
string copies. On a scneario of 100 iterations of transformencode of a
100K x 1 random input, the runtime improved from 25.6s to 20.1s.
Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/ec496355
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/ec496355
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/ec496355
Branch: refs/heads/master
Commit: ec49635520f5ef223830c8632b661bd091d87eb3
Parents: e368de8
Author: Matthias Boehm <mb...@gmail.com>
Authored: Sat Nov 18 01:00:41 2017 -0800
Committer: Matthias Boehm <mb...@gmail.com>
Committed: Sat Nov 18 01:00:41 2017 -0800
----------------------------------------------------------------------
.../sysml/runtime/transform/encode/EncoderRecode.java | 14 +++++++++++---
1 file changed, 11 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/systemml/blob/ec496355/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
index 8758e73..11667ce 100644
--- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
+++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderRecode.java
@@ -156,13 +156,14 @@ public class EncoderRecode extends Encoder
meta.ensureAllocatedColumns(maxDistinct);
//create compact meta data representation
+ StringBuilder sb = new StringBuilder(); //for reuse
for( int j=0; j<_colList.length; j++ ) {
int colID = _colList[j]; //1-based
int rowID = 0;
if( _rcdMaps.containsKey(_colList[j]) )
for( Entry<String, Long> e : _rcdMaps.get(colID).entrySet() ) {
- String tmp = constructRecodeMapEntry(e.getKey(), e.getValue());
- meta.set(rowID++, colID-1, tmp);
+ meta.set(rowID++, colID-1,
+ constructRecodeMapEntry(e.getKey(), e.getValue(), sb));
}
meta.getColumnMetadata(colID-1).setNumDistinct(
_rcdMaps.get(colID).size());
@@ -197,7 +198,14 @@ public class EncoderRecode extends Encoder
* @return the concatenation of token and code with delimiter in between
*/
public static String constructRecodeMapEntry(String token, Long code) {
- return token + Lop.DATATYPE_PREFIX + code.toString();
+ StringBuilder sb = new StringBuilder(token.length()+16);
+ return constructRecodeMapEntry(token, code, sb);
+ }
+
+ private static String constructRecodeMapEntry(String token, Long code, StringBuilder sb) {
+ sb.setLength(0); //reset reused string builder
+ return sb.append(token).append(Lop.DATATYPE_PREFIX)
+ .append(code.longValue()).toString();
}
/**