You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by jh...@apache.org on 2014/12/09 06:42:53 UTC

tajo git commit: TAJO-1236: Remove slow 'new String' operation in parquet format. (jinho)

Repository: tajo
Updated Branches:
  refs/heads/master 8f68b4baf -> 661c7e216


TAJO-1236: Remove slow 'new String' operation in parquet format. (jinho)

Closes #292


Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/661c7e21
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/661c7e21
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/661c7e21

Branch: refs/heads/master
Commit: 661c7e216d664c71d2c889082703ecc6ae028dc3
Parents: 8f68b4b
Author: jhkim <jh...@apache.org>
Authored: Tue Dec 9 14:42:02 2014 +0900
Committer: jhkim <jh...@apache.org>
Committed: Tue Dec 9 14:42:02 2014 +0900

----------------------------------------------------------------------
 CHANGES                                         |  3 ++
 .../storage/parquet/TajoRecordConverter.java    | 34 ++++++++------------
 .../tajo/storage/parquet/TajoWriteSupport.java  | 23 +++++++------
 3 files changed, 28 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/tajo/blob/661c7e21/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 59a8f68..89d39bc 100644
--- a/CHANGES
+++ b/CHANGES
@@ -18,6 +18,9 @@ Release 0.9.1 - unreleased
 
   IMPROVEMENT
 
+    TAJO-1236: Remove slow 'new String' operation in parquet format. 
+    (jinho)
+
     TAJO-1230: Disable ipv6 support on JVM. (Jihun Kang via hyunsik)
 
     TAJO-1213: Implement CatalogStore::updateTableStats. (jaehwa)

http://git-wip-us.apache.org/repos/asf/tajo/blob/661c7e21/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java
----------------------------------------------------------------------
diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java b/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java
index 7c3d79d..a091eac 100644
--- a/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java
+++ b/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java
@@ -18,29 +18,23 @@
 
 package org.apache.tajo.storage.parquet;
 
-import com.google.protobuf.Message;
 import com.google.protobuf.InvalidProtocolBufferException;
-
-import java.nio.ByteBuffer;
-
-import parquet.io.api.GroupConverter;
-import parquet.io.api.Converter;
-import parquet.io.api.PrimitiveConverter;
-import parquet.io.api.Binary;
-import parquet.schema.Type;
-import parquet.schema.GroupType;
-
+import com.google.protobuf.Message;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.catalog.Schema;
 import org.apache.tajo.common.TajoDataTypes;
 import org.apache.tajo.common.TajoDataTypes.DataType;
-import org.apache.tajo.catalog.Schema;
-import org.apache.tajo.catalog.Column;
+import org.apache.tajo.datum.*;
 import org.apache.tajo.storage.Tuple;
 import org.apache.tajo.storage.VTuple;
-import org.apache.tajo.datum.DatumFactory;
-import org.apache.tajo.datum.Datum;
-import org.apache.tajo.datum.BlobDatum;
-import org.apache.tajo.datum.NullDatum;
-import org.apache.tajo.datum.ProtobufDatumFactory;
+import parquet.io.api.Binary;
+import parquet.io.api.Converter;
+import parquet.io.api.GroupConverter;
+import parquet.io.api.PrimitiveConverter;
+import parquet.schema.GroupType;
+import parquet.schema.Type;
+
+import java.nio.ByteBuffer;
 
 /**
  * Converter to convert a Parquet record into a Tajo Tuple.
@@ -222,7 +216,7 @@ public class TajoRecordConverter extends GroupConverter {
 
     @Override
     final public void addBinary(Binary value) {
-      parent.add(DatumFactory.createChar(value.toStringUsingUTF8()));
+      parent.add(DatumFactory.createChar(value.getBytes()));
     }
   }
 
@@ -343,7 +337,7 @@ public class TajoRecordConverter extends GroupConverter {
 
     @Override
     final public void addBinary(Binary value) {
-      parent.add(DatumFactory.createText(value.toStringUsingUTF8()));
+      parent.add(DatumFactory.createText(value.getBytes()));
     }
   }
 

http://git-wip-us.apache.org/repos/asf/tajo/blob/661c7e21/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
----------------------------------------------------------------------
diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java b/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
index 35165de..8651131 100644
--- a/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
+++ b/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
@@ -18,10 +18,12 @@
 
 package org.apache.tajo.storage.parquet;
 
-import java.util.Map;
-import java.util.HashMap;
-import java.util.List;
-
+import org.apache.hadoop.conf.Configuration;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.catalog.Schema;
+import org.apache.tajo.common.TajoDataTypes;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.storage.Tuple;
 import parquet.hadoop.api.WriteSupport;
 import parquet.io.api.Binary;
 import parquet.io.api.RecordConsumer;
@@ -29,12 +31,9 @@ import parquet.schema.GroupType;
 import parquet.schema.MessageType;
 import parquet.schema.Type;
 
-import org.apache.hadoop.conf.Configuration;
-import org.apache.tajo.catalog.Schema;
-import org.apache.tajo.catalog.Column;
-import org.apache.tajo.common.TajoDataTypes;
-import org.apache.tajo.storage.Tuple;
-import org.apache.tajo.datum.Datum;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 
 /**
  * Tajo implementation of {@link WriteSupport} for {@link Tuple}s.
@@ -116,7 +115,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> {
   private void writeValue(Type fieldType, Column column, Datum datum) {
     switch (column.getDataType().getType()) {
       case BOOLEAN:
-        recordConsumer.addBoolean((Boolean) datum.asBool());
+        recordConsumer.addBoolean(datum.asBool());
         break;
       case BIT:
       case INT2:
@@ -134,7 +133,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> {
         break;
       case CHAR:
       case TEXT:
-        recordConsumer.addBinary(Binary.fromString(datum.asChars()));
+        recordConsumer.addBinary(Binary.fromByteArray(datum.asTextBytes()));
         break;
       case PROTOBUF:
       case BLOB: