You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by jh...@apache.org on 2014/12/09 06:42:53 UTC
tajo git commit: TAJO-1236: Remove slow 'new String' operation in
parquet format. (jinho)
Repository: tajo
Updated Branches:
refs/heads/master 8f68b4baf -> 661c7e216
TAJO-1236: Remove slow 'new String' operation in parquet format. (jinho)
Closes #292
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/661c7e21
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/661c7e21
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/661c7e21
Branch: refs/heads/master
Commit: 661c7e216d664c71d2c889082703ecc6ae028dc3
Parents: 8f68b4b
Author: jhkim <jh...@apache.org>
Authored: Tue Dec 9 14:42:02 2014 +0900
Committer: jhkim <jh...@apache.org>
Committed: Tue Dec 9 14:42:02 2014 +0900
----------------------------------------------------------------------
CHANGES | 3 ++
.../storage/parquet/TajoRecordConverter.java | 34 ++++++++------------
.../tajo/storage/parquet/TajoWriteSupport.java | 23 +++++++------
3 files changed, 28 insertions(+), 32 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tajo/blob/661c7e21/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index 59a8f68..89d39bc 100644
--- a/CHANGES
+++ b/CHANGES
@@ -18,6 +18,9 @@ Release 0.9.1 - unreleased
IMPROVEMENT
+ TAJO-1236: Remove slow 'new String' operation in parquet format.
+ (jinho)
+
TAJO-1230: Disable ipv6 support on JVM. (Jihun Kang via hyunsik)
TAJO-1213: Implement CatalogStore::updateTableStats. (jaehwa)
http://git-wip-us.apache.org/repos/asf/tajo/blob/661c7e21/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java
----------------------------------------------------------------------
diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java b/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java
index 7c3d79d..a091eac 100644
--- a/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java
+++ b/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoRecordConverter.java
@@ -18,29 +18,23 @@
package org.apache.tajo.storage.parquet;
-import com.google.protobuf.Message;
import com.google.protobuf.InvalidProtocolBufferException;
-
-import java.nio.ByteBuffer;
-
-import parquet.io.api.GroupConverter;
-import parquet.io.api.Converter;
-import parquet.io.api.PrimitiveConverter;
-import parquet.io.api.Binary;
-import parquet.schema.Type;
-import parquet.schema.GroupType;
-
+import com.google.protobuf.Message;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.catalog.Schema;
import org.apache.tajo.common.TajoDataTypes;
import org.apache.tajo.common.TajoDataTypes.DataType;
-import org.apache.tajo.catalog.Schema;
-import org.apache.tajo.catalog.Column;
+import org.apache.tajo.datum.*;
import org.apache.tajo.storage.Tuple;
import org.apache.tajo.storage.VTuple;
-import org.apache.tajo.datum.DatumFactory;
-import org.apache.tajo.datum.Datum;
-import org.apache.tajo.datum.BlobDatum;
-import org.apache.tajo.datum.NullDatum;
-import org.apache.tajo.datum.ProtobufDatumFactory;
+import parquet.io.api.Binary;
+import parquet.io.api.Converter;
+import parquet.io.api.GroupConverter;
+import parquet.io.api.PrimitiveConverter;
+import parquet.schema.GroupType;
+import parquet.schema.Type;
+
+import java.nio.ByteBuffer;
/**
* Converter to convert a Parquet record into a Tajo Tuple.
@@ -222,7 +216,7 @@ public class TajoRecordConverter extends GroupConverter {
@Override
final public void addBinary(Binary value) {
- parent.add(DatumFactory.createChar(value.toStringUsingUTF8()));
+ parent.add(DatumFactory.createChar(value.getBytes()));
}
}
@@ -343,7 +337,7 @@ public class TajoRecordConverter extends GroupConverter {
@Override
final public void addBinary(Binary value) {
- parent.add(DatumFactory.createText(value.toStringUsingUTF8()));
+ parent.add(DatumFactory.createText(value.getBytes()));
}
}
http://git-wip-us.apache.org/repos/asf/tajo/blob/661c7e21/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
----------------------------------------------------------------------
diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java b/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
index 35165de..8651131 100644
--- a/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
+++ b/tajo-storage/src/main/java/org/apache/tajo/storage/parquet/TajoWriteSupport.java
@@ -18,10 +18,12 @@
package org.apache.tajo.storage.parquet;
-import java.util.Map;
-import java.util.HashMap;
-import java.util.List;
-
+import org.apache.hadoop.conf.Configuration;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.catalog.Schema;
+import org.apache.tajo.common.TajoDataTypes;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.storage.Tuple;
import parquet.hadoop.api.WriteSupport;
import parquet.io.api.Binary;
import parquet.io.api.RecordConsumer;
@@ -29,12 +31,9 @@ import parquet.schema.GroupType;
import parquet.schema.MessageType;
import parquet.schema.Type;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.tajo.catalog.Schema;
-import org.apache.tajo.catalog.Column;
-import org.apache.tajo.common.TajoDataTypes;
-import org.apache.tajo.storage.Tuple;
-import org.apache.tajo.datum.Datum;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
/**
* Tajo implementation of {@link WriteSupport} for {@link Tuple}s.
@@ -116,7 +115,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> {
private void writeValue(Type fieldType, Column column, Datum datum) {
switch (column.getDataType().getType()) {
case BOOLEAN:
- recordConsumer.addBoolean((Boolean) datum.asBool());
+ recordConsumer.addBoolean(datum.asBool());
break;
case BIT:
case INT2:
@@ -134,7 +133,7 @@ public class TajoWriteSupport extends WriteSupport<Tuple> {
break;
case CHAR:
case TEXT:
- recordConsumer.addBinary(Binary.fromString(datum.asChars()));
+ recordConsumer.addBinary(Binary.fromByteArray(datum.asTextBytes()));
break;
case PROTOBUF:
case BLOB: