You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/09/08 01:52:48 UTC
[28/38] hive git commit: HIVE-14451: Vectorization: Add byRef mode
for borrowed Strings in VectorDeserializeRow (Matt McCline,
reviewed by Sergey Shelukhin)
HIVE-14451: Vectorization: Add byRef mode for borrowed Strings in VectorDeserializeRow (Matt McCline, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/146a9183
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/146a9183
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/146a9183
Branch: refs/heads/hive-14535
Commit: 146a9183e60744746029e6a700c6d68a2edbc845
Parents: 6e76ee3
Author: Matt McCline <mm...@hortonworks.com>
Authored: Wed Sep 7 11:33:52 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Wed Sep 7 11:46:02 2016 -0700
----------------------------------------------------------------------
.../hive/ql/exec/tez/ReduceRecordSource.java | 4 +-
.../ql/exec/vector/VectorDeserializeRow.java | 179 +++++++++++--
.../hive/ql/exec/vector/VectorMapOperator.java | 9 +-
.../mapjoin/VectorMapJoinCommonOperator.java | 3 +-
.../VectorMapJoinGenerateResultOperator.java | 7 +-
.../fast/VectorMapJoinFastLongHashTable.java | 5 +-
.../fast/VectorMapJoinFastStringCommon.java | 4 +-
.../VectorMapJoinOptimizedLongCommon.java | 2 -
.../hive/ql/exec/vector/TestVectorSerDeRow.java | 254 +++++++++++++++----
.../ql/exec/vector/VectorRandomRowSource.java | 159 +++++++++---
.../mapjoin/fast/CheckFastRowHashMap.java | 8 +-
.../fast/BinarySortableDeserializeRead.java | 109 ++++++--
.../hive/serde2/fast/DeserializeRead.java | 38 ++-
.../lazy/fast/LazySimpleDeserializeRead.java | 94 +++++--
.../fast/LazyBinaryDeserializeRead.java | 5 +-
.../binarysortable/TestBinarySortableFast.java | 15 +-
.../hive/serde2/lazy/TestLazySimpleFast.java | 8 +-
.../serde2/lazybinary/TestLazyBinaryFast.java | 8 +-
.../hive/ql/exec/vector/BytesColumnVector.java | 31 +++
19 files changed, 774 insertions(+), 168 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
index f4c3b81..7e41b7a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordSource.java
@@ -185,6 +185,7 @@ public class ReduceRecordSource implements RecordSource {
new BinarySortableDeserializeRead(
VectorizedBatchUtil.typeInfosFromStructObjectInspector(
keyStructInspector),
+ /* useExternalBuffer */ true,
binarySortableSerDe.getSortOrders()));
keyBinarySortableDeserializeToRow.init(0);
@@ -194,7 +195,8 @@ public class ReduceRecordSource implements RecordSource {
new VectorDeserializeRow<LazyBinaryDeserializeRead>(
new LazyBinaryDeserializeRead(
VectorizedBatchUtil.typeInfosFromStructObjectInspector(
- valueStructInspectors)));
+ valueStructInspectors),
+ /* useExternalBuffer */ true));
valueLazyBinaryDeserializeToRow.init(firstValueColumnOffset);
// Create data buffers for value bytes column vectors.
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
index f66916b..47bef43 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java
@@ -74,6 +74,12 @@ public final class VectorDeserializeRow<T extends DeserializeRead> {
private TypeInfo[] sourceTypeInfos;
+ private byte[] inputBytes;
+
+ /**
+ * @param deserializeRead Set useExternalBuffer to true to avoid buffer copying and to get
+ * more efficient reading.
+ */
public VectorDeserializeRow(T deserializeRead) {
this();
this.deserializeRead = deserializeRead;
@@ -338,10 +344,15 @@ public final class VectorDeserializeRow<T extends DeserializeRead> {
* @param batch
* @param batchIndex
* @param logicalColumnIndex
+ * @param canRetainByteRef Specify true when it is safe to retain references to the bytes
+ * source for DeserializeRead. I.e. the STRING, CHAR/VARCHAR data
+ * can be set in BytesColumnVector with setRef instead of with setVal
+ * which copies data. An example of a safe usage is referring to bytes
+ * in a hash table entry that is immutable.
* @throws IOException
*/
private void deserializeRowColumn(VectorizedRowBatch batch, int batchIndex,
- int logicalColumnIndex) throws IOException {
+ int logicalColumnIndex, boolean canRetainByteRef) throws IOException {
Category sourceCategory = sourceCategories[logicalColumnIndex];
if (sourceCategory == null) {
/*
@@ -406,42 +417,114 @@ public final class VectorDeserializeRow<T extends DeserializeRead> {
break;
case BINARY:
case STRING:
- ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal(
- batchIndex,
- deserializeRead.currentBytes,
- deserializeRead.currentBytesStart,
- deserializeRead.currentBytesLength);
+ {
+ BytesColumnVector bytesColVec = ((BytesColumnVector) batch.cols[projectionColumnNum]);
+ if (deserializeRead.currentExternalBufferNeeded) {
+ bytesColVec.ensureValPreallocated(deserializeRead.currentExternalBufferNeededLen);
+ deserializeRead.copyToExternalBuffer(
+ bytesColVec.getValPreallocatedBytes(), bytesColVec.getValPreallocatedStart());
+ bytesColVec.setValPreallocated(
+ batchIndex,
+ deserializeRead.currentExternalBufferNeededLen);
+ } else if (canRetainByteRef && inputBytes == deserializeRead.currentBytes) {
+ bytesColVec.setRef(
+ batchIndex,
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesLength);
+ } else {
+ bytesColVec.setVal(
+ batchIndex,
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesLength);
+ }
+ }
break;
case VARCHAR:
{
// Use the basic STRING bytes read to get access, then use our optimal truncate/trim method
// that does not use Java String objects.
- int adjustedLength = StringExpr.truncate(
- deserializeRead.currentBytes,
- deserializeRead.currentBytesStart,
- deserializeRead.currentBytesLength,
- maxLengths[logicalColumnIndex]);
- ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal(
- batchIndex,
- deserializeRead.currentBytes,
- deserializeRead.currentBytesStart,
- adjustedLength);
+ BytesColumnVector bytesColVec = ((BytesColumnVector) batch.cols[projectionColumnNum]);
+ if (deserializeRead.currentExternalBufferNeeded) {
+ // Write directly into our BytesColumnVector value buffer.
+ bytesColVec.ensureValPreallocated(deserializeRead.currentExternalBufferNeededLen);
+ byte[] convertBuffer = bytesColVec.getValPreallocatedBytes();
+ int convertBufferStart = bytesColVec.getValPreallocatedStart();
+ deserializeRead.copyToExternalBuffer(
+ convertBuffer,
+ convertBufferStart);
+ bytesColVec.setValPreallocated(
+ batchIndex,
+ StringExpr.truncate(
+ convertBuffer,
+ convertBufferStart,
+ deserializeRead.currentExternalBufferNeededLen,
+ maxLengths[logicalColumnIndex]));
+ } else if (canRetainByteRef && inputBytes == deserializeRead.currentBytes) {
+ bytesColVec.setRef(
+ batchIndex,
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ StringExpr.truncate(
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesLength,
+ maxLengths[logicalColumnIndex]));
+ } else {
+ bytesColVec.setVal(
+ batchIndex,
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ StringExpr.truncate(
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesLength,
+ maxLengths[logicalColumnIndex]));
+ }
}
break;
case CHAR:
{
// Use the basic STRING bytes read to get access, then use our optimal truncate/trim method
// that does not use Java String objects.
- int adjustedLength = StringExpr.rightTrimAndTruncate(
- deserializeRead.currentBytes,
- deserializeRead.currentBytesStart,
- deserializeRead.currentBytesLength,
- maxLengths[logicalColumnIndex]);
- ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal(
- batchIndex,
- deserializeRead.currentBytes,
- deserializeRead.currentBytesStart,
- adjustedLength);
+ BytesColumnVector bytesColVec = ((BytesColumnVector) batch.cols[projectionColumnNum]);
+ if (deserializeRead.currentExternalBufferNeeded) {
+ // Write directly into our BytesColumnVector value buffer.
+ bytesColVec.ensureValPreallocated(deserializeRead.currentExternalBufferNeededLen);
+ byte[] convertBuffer = bytesColVec.getValPreallocatedBytes();
+ int convertBufferStart = bytesColVec.getValPreallocatedStart();
+ deserializeRead.copyToExternalBuffer(
+ convertBuffer,
+ convertBufferStart);
+ bytesColVec.setValPreallocated(
+ batchIndex,
+ StringExpr.rightTrimAndTruncate(
+ convertBuffer,
+ convertBufferStart,
+ deserializeRead.currentExternalBufferNeededLen,
+ maxLengths[logicalColumnIndex]));
+ } else if (canRetainByteRef && inputBytes == deserializeRead.currentBytes) {
+ bytesColVec.setRef(
+ batchIndex,
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ StringExpr.rightTrimAndTruncate(
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesLength,
+ maxLengths[logicalColumnIndex]));
+ } else {
+ bytesColVec.setVal(
+ batchIndex,
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ StringExpr.rightTrimAndTruncate(
+ deserializeRead.currentBytes,
+ deserializeRead.currentBytesStart,
+ deserializeRead.currentBytesLength,
+ maxLengths[logicalColumnIndex]));
+ }
}
break;
case DECIMAL:
@@ -644,6 +727,7 @@ public final class VectorDeserializeRow<T extends DeserializeRead> {
* @param length
*/
public void setBytes(byte[] bytes, int offset, int length) {
+ inputBytes = bytes;
deserializeRead.set(bytes, offset, length);
}
@@ -653,6 +737,10 @@ public final class VectorDeserializeRow<T extends DeserializeRead> {
* Use getDetailedReadPositionString to get detailed read position information to help
* diagnose exceptions that are thrown...
*
+ * This version of deserialize does not keep byte references to string/char/varchar/binary data
+ * type field. The bytes are copied into the BytesColumnVector buffer with setVal.
+ * (See deserializeByRef below if keep references is safe).
+ *
* @param batch
* @param batchIndex
* @throws IOException
@@ -663,12 +751,49 @@ public final class VectorDeserializeRow<T extends DeserializeRead> {
if (isConvert[i]) {
deserializeConvertRowColumn(batch, batchIndex, i);
} else {
- deserializeRowColumn(batch, batchIndex, i);
+ // Pass false for canRetainByteRef since we will NOT be keeping byte references to the input
+ // bytes with the BytesColumnVector.setRef method.
+ deserializeRowColumn(batch, batchIndex, i, /* canRetainByteRef */ false);
}
}
deserializeRead.extraFieldsCheck();
}
+ /**
+ * Deserialize a row from the range of bytes specified by setBytes.
+ *
+ * Use this method instead of deserialize when it is safe to retain references to the bytes source
+ * for DeserializeRead. I.e. the STRING, CHAR/VARCHAR data can be set in BytesColumnVector with
+ * setRef instead of with setVal which copies data.
+ *
+ * An example of a safe usage:
+ * Referring to bytes in a hash table entry that is immutable.
+ *
+ * An example of a unsafe usage:
+ * Referring to bytes in a reduce receive buffer that will be overwritten with new data.
+ *
+ * Use getDetailedReadPositionString to get detailed read position information to help
+ * diagnose exceptions that are thrown...
+ *
+ * @param batch
+ * @param batchIndex
+ * @throws IOException
+ */
+ public void deserializeByRef(VectorizedRowBatch batch, int batchIndex) throws IOException {
+ final int count = isConvert.length;
+ for (int i = 0; i < count; i++) {
+ if (isConvert[i]) {
+ deserializeConvertRowColumn(batch, batchIndex, i);
+ } else {
+ // Pass true for canRetainByteRef since we will be keeping byte references to the input
+ // bytes with the BytesColumnVector.setRef method.
+ deserializeRowColumn(batch, batchIndex, i, /* canRetainByteRef */ true);
+ }
+ }
+ deserializeRead.extraFieldsCheck();
+ }
+
+
public String getDetailedReadPositionString() {
return deserializeRead.getDetailedReadPositionString();
}
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
index 2bdc59b..c7fa0db 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapOperator.java
@@ -261,7 +261,10 @@ public class VectorMapOperator extends AbstractMapOperator {
LazySimpleSerDe.class.getName());
LazySimpleDeserializeRead lazySimpleDeserializeRead =
- new LazySimpleDeserializeRead(dataTypeInfos, simpleSerdeParams);
+ new LazySimpleDeserializeRead(
+ dataTypeInfos,
+ /* useExternalBuffer */ true,
+ simpleSerdeParams);
vectorDeserializeRow =
new VectorDeserializeRow<LazySimpleDeserializeRead>(lazySimpleDeserializeRead);
@@ -277,7 +280,9 @@ public class VectorMapOperator extends AbstractMapOperator {
case LAZY_BINARY:
{
LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
- new LazyBinaryDeserializeRead(dataTypeInfos);
+ new LazyBinaryDeserializeRead(
+ dataTypeInfos,
+ /* useExternalBuffer */ true);
vectorDeserializeRow =
new VectorDeserializeRow<LazyBinaryDeserializeRead>(lazyBinaryDeserializeRead);
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
index 24668f9..c288731 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java
@@ -571,7 +571,8 @@ public abstract class VectorMapJoinCommonOperator extends MapJoinOperator implem
new VectorDeserializeRow<LazyBinaryDeserializeRead>(
new LazyBinaryDeserializeRead(
VectorizedBatchUtil.typeInfosFromTypeNames(
- smallTableMapping.getTypeNames())));
+ smallTableMapping.getTypeNames()),
+ /* useExternalBuffer */ true));
smallTableVectorDeserializeRow.init(smallTableMapping.getOutputColumns());
}
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
index 469f86a..21a01e6 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java
@@ -144,7 +144,8 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
smallTableVectorDeserializeRow.setBytes(bytes, offset, length);
try {
- smallTableVectorDeserializeRow.deserialize(batch, batchIndex);
+ // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc.
+ smallTableVectorDeserializeRow.deserializeByRef(batch, batchIndex);
} catch (Exception e) {
throw new HiveException(
"\nHashMapResult detail: " +
@@ -442,7 +443,9 @@ public abstract class VectorMapJoinGenerateResultOperator extends VectorMapJoinC
bigTableVectorDeserializeRow =
new VectorDeserializeRow<LazyBinaryDeserializeRead>(
- new LazyBinaryDeserializeRead(bigTableTypeInfos));
+ new LazyBinaryDeserializeRead(
+ bigTableTypeInfos,
+ /* useExternalBuffer */ true));
bigTableVectorDeserializeRow.init(noNullsProjection);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
index ee66d5b..726a937 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java
@@ -267,7 +267,10 @@ public abstract class VectorMapJoinFastLongHashTable
this.isOuterJoin = isOuterJoin;
this.hashTableKeyType = hashTableKeyType;
PrimitiveTypeInfo[] primitiveTypeInfos = { hashTableKeyType.getPrimitiveTypeInfo() };
- keyBinarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos);
+ keyBinarySortableDeserializeRead =
+ new BinarySortableDeserializeRead(
+ primitiveTypeInfos,
+ /* useExternalBuffer */ false);
allocateBucketArray();
useMinMax = minMaxEnabled;
min = Long.MAX_VALUE;
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java
index bf378ac..456e6ba 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java
@@ -67,6 +67,8 @@ public class VectorMapJoinFastStringCommon {
this.isOuterJoin = isOuterJoin;
PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.stringTypeInfo };
keyBinarySortableDeserializeRead =
- new BinarySortableDeserializeRead(primitiveTypeInfos);
+ new BinarySortableDeserializeRead(
+ primitiveTypeInfos,
+ /* useExternalBuffer */ false);
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
index 0eabc44..ac85899 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java
@@ -161,8 +161,6 @@ public class VectorMapJoinOptimizedLongCommon {
min = Long.MAX_VALUE;
max = Long.MIN_VALUE;
this.hashTableKeyType = hashTableKeyType;
- // PrimitiveTypeInfo[] primitiveTypeInfos = { hashTableKeyType.getPrimitiveTypeInfo() };
- // keyBinarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos);
keyBinarySortableSerializeWrite = new BinarySortableSerializeWrite(1);
output = new Output();
keyBinarySortableSerializeWrite.set(output);
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
index c6704f9..238c136 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSerDeRow.java
@@ -27,6 +27,7 @@ import java.util.Random;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.OpenCSVSerde;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritable;
@@ -45,6 +46,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead;
import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableSerializeWrite;
import org.apache.hadoop.hive.serde2.fast.DeserializeRead;
@@ -69,6 +71,8 @@ import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
+import com.google.common.base.Charsets;
+
import junit.framework.TestCase;
/**
@@ -305,7 +309,8 @@ public class TestVectorSerDeRow extends TestCase {
}
}
- void testVectorSerializeRow(int caseNum, Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException {
+ void testVectorSerializeRow(int caseNum, Random r, SerializationType serializationType)
+ throws HiveException, IOException, SerDeException {
String[] emptyScratchTypeNames = new String[0];
@@ -324,11 +329,11 @@ public class TestVectorSerDeRow extends TestCase {
SerializeWrite serializeWrite;
switch (serializationType) {
case BINARY_SORTABLE:
- deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos());
+ deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */ false);
serializeWrite = new BinarySortableSerializeWrite(fieldCount);
break;
case LAZY_BINARY:
- deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos());
+ deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */ false);
serializeWrite = new LazyBinarySerializeWrite(fieldCount);
break;
case LAZY_SIMPLE:
@@ -336,7 +341,7 @@ public class TestVectorSerDeRow extends TestCase {
StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector);
byte separator = (byte) '\t';
- deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(),
+ deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), /* useExternalBuffer */ false,
separator, lazySerDeParams);
serializeWrite = new LazySimpleSerializeWrite(fieldCount,
separator, lazySerDeParams);
@@ -367,7 +372,7 @@ public class TestVectorSerDeRow extends TestCase {
}
void examineBatch(VectorizedRowBatch batch, VectorExtractRow vectorExtractRow,
- Object[][] randomRows, int firstRandomRowIndex ) {
+ PrimitiveTypeInfo[] primitiveTypeInfos, Object[][] randomRows, int firstRandomRowIndex ) {
int rowSize = vectorExtractRow.getCount();
Object[] row = new Object[rowSize];
@@ -381,13 +386,14 @@ public class TestVectorSerDeRow extends TestCase {
fail("Unexpected NULL from extractRow");
}
if (!row[c].equals(expectedRow[c])) {
- fail("Row " + (firstRandomRowIndex + i) + " and column " + c + " mismatch");
+ fail("Row " + (firstRandomRowIndex + i) + " and column " + c + " mismatch (" + primitiveTypeInfos[c].getPrimitiveCategory() + " actual value " + row[c] + " and expected value " + expectedRow[c] + ")");
}
}
}
}
- private Output serializeRow(Object[] row, VectorRandomRowSource source, SerializeWrite serializeWrite) throws HiveException, IOException {
+ private Output serializeRow(Object[] row, VectorRandomRowSource source,
+ SerializeWrite serializeWrite) throws HiveException, IOException {
Output output = new Output();
serializeWrite.set(output);
PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
@@ -514,9 +520,7 @@ public class TestVectorSerDeRow extends TestCase {
return output;
}
- private Properties createProperties(String fieldNames, String fieldTypes) {
- Properties tbl = new Properties();
-
+ private void addToProperties(Properties tbl, String fieldNames, String fieldTypes) {
// Set the configuration parameters
tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "9");
@@ -524,19 +528,23 @@ public class TestVectorSerDeRow extends TestCase {
tbl.setProperty("columns.types", fieldTypes);
tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
+ }
- return tbl;
+ private LazySerDeParameters getSerDeParams( StructObjectInspector rowObjectInspector) throws SerDeException {
+ return getSerDeParams(new Configuration(), new Properties(), rowObjectInspector);
}
- private LazySerDeParameters getSerDeParams(StructObjectInspector rowObjectInspector) throws SerDeException {
+ private LazySerDeParameters getSerDeParams(Configuration conf, Properties tbl, StructObjectInspector rowObjectInspector) throws SerDeException {
String fieldNames = ObjectInspectorUtils.getFieldNames(rowObjectInspector);
String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowObjectInspector);
- Configuration conf = new Configuration();
- Properties tbl = createProperties(fieldNames, fieldTypes);
+ addToProperties(tbl, fieldNames, fieldTypes);
return new LazySerDeParameters(conf, tbl, LazySimpleSerDe.class.getName());
}
- void testVectorDeserializeRow(int caseNum, Random r, SerializationType serializationType) throws HiveException, IOException, SerDeException {
+ void testVectorDeserializeRow(int caseNum, Random r, SerializationType serializationType,
+ boolean alternate1, boolean alternate2,
+ boolean useExternalBuffer)
+ throws HiveException, IOException, SerDeException {
String[] emptyScratchTypeNames = new String[0];
@@ -552,24 +560,88 @@ public class TestVectorSerDeRow extends TestCase {
Arrays.fill(cv.isNull, true);
}
+ PrimitiveTypeInfo[] primitiveTypeInfos = source.primitiveTypeInfos();
int fieldCount = source.typeNames().size();
DeserializeRead deserializeRead;
SerializeWrite serializeWrite;
switch (serializationType) {
case BINARY_SORTABLE:
- deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos());
- serializeWrite = new BinarySortableSerializeWrite(fieldCount);
+ boolean useColumnSortOrderIsDesc = alternate1;
+ if (!useColumnSortOrderIsDesc) {
+ deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer);
+ serializeWrite = new BinarySortableSerializeWrite(fieldCount);
+ } else {
+ boolean[] columnSortOrderIsDesc = new boolean[fieldCount];
+ for (int i = 0; i < fieldCount; i++) {
+ columnSortOrderIsDesc[i] = r.nextBoolean();
+ }
+ deserializeRead = new BinarySortableDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer,
+ columnSortOrderIsDesc);
+
+ byte[] columnNullMarker = new byte[fieldCount];
+ byte[] columnNotNullMarker = new byte[fieldCount];
+ for (int i = 0; i < fieldCount; i++) {
+ if (columnSortOrderIsDesc[i]) {
+ // Descending
+ // Null last (default for descending order)
+ columnNullMarker[i] = BinarySortableSerDe.ZERO;
+ columnNotNullMarker[i] = BinarySortableSerDe.ONE;
+ } else {
+ // Ascending
+ // Null first (default for ascending order)
+ columnNullMarker[i] = BinarySortableSerDe.ZERO;
+ columnNotNullMarker[i] = BinarySortableSerDe.ONE;
+ }
+ }
+ serializeWrite = new BinarySortableSerializeWrite(columnSortOrderIsDesc, columnNullMarker, columnNotNullMarker);
+ }
+ boolean useBinarySortableCharsNeedingEscape = alternate2;
+ if (useBinarySortableCharsNeedingEscape) {
+ source.addBinarySortableAlphabets();
+ }
break;
case LAZY_BINARY:
- deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos());
+ deserializeRead = new LazyBinaryDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer);
serializeWrite = new LazyBinarySerializeWrite(fieldCount);
break;
case LAZY_SIMPLE:
{
StructObjectInspector rowObjectInspector = source.rowStructObjectInspector();
- LazySerDeParameters lazySerDeParams = getSerDeParams(rowObjectInspector);
+ Configuration conf = new Configuration();
+ Properties tbl = new Properties();
+ tbl.setProperty(serdeConstants.FIELD_DELIM, "\t");
+ tbl.setProperty(serdeConstants.LINE_DELIM, "\n");
byte separator = (byte) '\t';
- deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(),
+ boolean useLazySimpleEscapes = alternate1;
+ if (useLazySimpleEscapes) {
+ tbl.setProperty(serdeConstants.QUOTE_CHAR, "'");
+ String escapeString = "\\";
+ tbl.setProperty(serdeConstants.ESCAPE_CHAR, escapeString);
+ }
+
+ LazySerDeParameters lazySerDeParams = getSerDeParams(conf, tbl, rowObjectInspector);
+
+ if (useLazySimpleEscapes) {
+ // LazySimple seems to throw away everything but \n and \r.
+ boolean[] needsEscape = lazySerDeParams.getNeedsEscape();
+ StringBuilder sb = new StringBuilder();
+ if (needsEscape['\n']) {
+ sb.append('\n');
+ }
+ if (needsEscape['\r']) {
+ sb.append('\r');
+ }
+ // for (int i = 0; i < needsEscape.length; i++) {
+ // if (needsEscape[i]) {
+ // sb.append((char) i);
+ // }
+ // }
+ String needsEscapeStr = sb.toString();
+ if (needsEscapeStr.length() > 0) {
+ source.addEscapables(needsEscapeStr);
+ }
+ }
+ deserializeRead = new LazySimpleDeserializeRead(source.primitiveTypeInfos(), useExternalBuffer,
separator, lazySerDeParams);
serializeWrite = new LazySimpleSerializeWrite(fieldCount,
separator, lazySerDeParams);
@@ -597,47 +669,133 @@ public class TestVectorSerDeRow extends TestCase {
Output output = serializeRow(row, source, serializeWrite);
vectorDeserializeRow.setBytes(output.getData(), 0, output.getLength());
- vectorDeserializeRow.deserialize(batch, batch.size);
+ try {
+ vectorDeserializeRow.deserialize(batch, batch.size);
+ } catch (Exception e) {
+ throw new HiveException(
+ "\nDeserializeRead details: " +
+ vectorDeserializeRow.getDetailedReadPositionString(),
+ e);
+ }
batch.size++;
if (batch.size == batch.DEFAULT_SIZE) {
- examineBatch(batch, vectorExtractRow, randomRows, firstRandomRowIndex);
+ examineBatch(batch, vectorExtractRow, primitiveTypeInfos, randomRows, firstRandomRowIndex);
firstRandomRowIndex = i + 1;
batch.reset();
}
}
if (batch.size > 0) {
- examineBatch(batch, vectorExtractRow, randomRows, firstRandomRowIndex);
+ examineBatch(batch, vectorExtractRow, primitiveTypeInfos, randomRows, firstRandomRowIndex);
}
}
public void testVectorSerDeRow() throws Throwable {
- try {
Random r = new Random(5678);
- for (int c = 0; c < 10; c++) {
- testVectorSerializeRow(c, r, SerializationType.BINARY_SORTABLE);
- }
- for (int c = 0; c < 10; c++) {
- testVectorSerializeRow(c, r, SerializationType.LAZY_BINARY);
- }
- for (int c = 0; c < 10; c++) {
- testVectorSerializeRow(c, r, SerializationType.LAZY_SIMPLE);
- }
- for (int c = 0; c < 10; c++) {
- testVectorDeserializeRow(c, r, SerializationType.BINARY_SORTABLE);
- }
- for (int c = 0; c < 10; c++) {
- testVectorDeserializeRow(c, r, SerializationType.LAZY_BINARY);
- }
- for (int c = 0; c < 10; c++) {
- testVectorDeserializeRow(c, r, SerializationType.LAZY_SIMPLE);
- }
-
-
- } catch (Throwable e) {
- e.printStackTrace();
- throw e;
- }
+ int c = 0;
+
+ /*
+ * SERIALIZE tests.
+ */
+ testVectorSerializeRow(c++, r, SerializationType.BINARY_SORTABLE);
+
+ testVectorSerializeRow(c++, r, SerializationType.LAZY_BINARY);
+
+ testVectorSerializeRow(c++, r, SerializationType.LAZY_SIMPLE);
+
+ /*
+ * DESERIALIZE tests.
+ */
+
+ // BINARY_SORTABLE
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.BINARY_SORTABLE,
+ /* alternate1 = useColumnSortOrderIsDesc */ false,
+ /* alternate2 = useBinarySortableCharsNeedingEscape */ false,
+ /* useExternalBuffer */ false);
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.BINARY_SORTABLE,
+ /* alternate1 = useColumnSortOrderIsDesc */ true,
+ /* alternate2 = useBinarySortableCharsNeedingEscape */ false,
+ /* useExternalBuffer */ false);
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.BINARY_SORTABLE,
+ /* alternate1 = useColumnSortOrderIsDesc */ false,
+ /* alternate2 = useBinarySortableCharsNeedingEscape */ false,
+ /* useExternalBuffer */ true);
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.BINARY_SORTABLE,
+ /* alternate1 = useColumnSortOrderIsDesc */ true,
+ /* alternate2 = useBinarySortableCharsNeedingEscape */ false,
+ /* useExternalBuffer */ true);
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.BINARY_SORTABLE,
+ /* alternate1 = useColumnSortOrderIsDesc */ false,
+ /* alternate2 = useBinarySortableCharsNeedingEscape */ true,
+ /* useExternalBuffer */ false);
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.BINARY_SORTABLE,
+ /* alternate1 = useColumnSortOrderIsDesc */ true,
+ /* alternate2 = useBinarySortableCharsNeedingEscape */ true,
+ /* useExternalBuffer */ false);
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.BINARY_SORTABLE,
+ /* alternate1 = useColumnSortOrderIsDesc */ false,
+ /* alternate2 = useBinarySortableCharsNeedingEscape */ true,
+ /* useExternalBuffer */ true);
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.BINARY_SORTABLE,
+ /* alternate1 = useColumnSortOrderIsDesc */ true,
+ /* alternate2 = useBinarySortableCharsNeedingEscape */ true,
+ /* useExternalBuffer */ true);
+
+ // LAZY_BINARY
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.LAZY_BINARY,
+ /* alternate1 = unused */ false,
+ /* alternate2 = unused */ false,
+ /* useExternalBuffer */ false);
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.LAZY_BINARY,
+ /* alternate1 = unused */ false,
+ /* alternate2 = unused */ false,
+ /* useExternalBuffer */ true);
+
+ // LAZY_SIMPLE
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.LAZY_SIMPLE,
+ /* alternate1 = useLazySimpleEscapes */ false,
+ /* alternate2 = unused */ false,
+ /* useExternalBuffer */ false);
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.LAZY_SIMPLE,
+ /* alternate1 = useLazySimpleEscapes */ false,
+ /* alternate2 = unused */ false,
+ /* useExternalBuffer */ true);
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.LAZY_SIMPLE,
+ /* alternate1 = useLazySimpleEscapes */ true,
+ /* alternate2 = unused */ false,
+ /* useExternalBuffer */ false);
+
+ testVectorDeserializeRow(c++, r,
+ SerializationType.LAZY_SIMPLE,
+ /* alternate1 = useLazySimpleEscapes */ true,
+ /* alternate2 = unused */ false,
+ /* useExternalBuffer */ true);
}
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
index 349c76a..57bf60d 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/VectorRandomRowSource.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.common.type.RandomTypeUtil;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
@@ -61,6 +62,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hive.common.util.DateUtils;
+import com.google.common.base.Charsets;
+
/**
* Generate object inspector and random row object[].
*/
@@ -80,6 +83,11 @@ public class VectorRandomRowSource {
private StructObjectInspector rowStructObjectInspector;
+ private String[] alphabets;
+
+ private boolean addEscapables;
+ private String needsEscapeStr;
+
public List<String> typeNames() {
return typeNames;
}
@@ -201,6 +209,35 @@ public class VectorRandomRowSource {
typeNames.add(typeName);
}
rowStructObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector(columnNames, primitiveObjectInspectorList);
+ alphabets = new String[columnCount];
+ }
+
+ public void addBinarySortableAlphabets() {
+ for (int c = 0; c < columnCount; c++) {
+ switch (primitiveCategories[c]) {
+ case STRING:
+ case CHAR:
+ case VARCHAR:
+ byte[] bytes = new byte[10 + r.nextInt(10)];
+ for (int i = 0; i < bytes.length; i++) {
+ bytes[i] = (byte) (32 + r.nextInt(96));
+ }
+ int alwaysIndex = r.nextInt(bytes.length);
+ bytes[alwaysIndex] = 0; // Must be escaped by BinarySortable.
+ int alwaysIndex2 = r.nextInt(bytes.length);
+ bytes[alwaysIndex2] = 1; // Must be escaped by BinarySortable.
+ alphabets[c] = new String(bytes, Charsets.UTF_8);
+ break;
+ default:
+ // No alphabet needed.
+ break;
+ }
+ }
+ }
+
+ public void addEscapables(String needsEscapeStr) {
+ addEscapables = true;
+ this.needsEscapeStr = needsEscapeStr;
}
public Object[][] randomRows(int n) {
@@ -327,65 +364,107 @@ public class VectorRandomRowSource {
}
public Object randomObject(int column) {
- return randomObject(column, r, primitiveCategories, primitiveTypeInfos);
+ return randomObject(column, r, primitiveCategories, primitiveTypeInfos, alphabets, addEscapables, needsEscapeStr);
}
public static Object randomObject(int column, Random r, PrimitiveCategory[] primitiveCategories,
PrimitiveTypeInfo[] primitiveTypeInfos) {
+ return randomObject(column, r, primitiveCategories, primitiveTypeInfos, null, false, "");
+ }
+
+ public static Object randomObject(int column, Random r, PrimitiveCategory[] primitiveCategories,
+ PrimitiveTypeInfo[] primitiveTypeInfos, String[] alphabets, boolean addEscapables, String needsEscapeStr) {
PrimitiveCategory primitiveCategory = primitiveCategories[column];
PrimitiveTypeInfo primitiveTypeInfo = primitiveTypeInfos[column];
- switch (primitiveCategory) {
- case BOOLEAN:
- return Boolean.valueOf(r.nextInt(1) == 1);
- case BYTE:
- return Byte.valueOf((byte) r.nextInt());
- case SHORT:
- return Short.valueOf((short) r.nextInt());
- case INT:
- return Integer.valueOf(r.nextInt());
- case LONG:
- return Long.valueOf(r.nextLong());
- case DATE:
- return RandomTypeUtil.getRandDate(r);
- case FLOAT:
- return Float.valueOf(r.nextFloat() * 10 - 5);
- case DOUBLE:
- return Double.valueOf(r.nextDouble() * 10 - 5);
- case STRING:
- return RandomTypeUtil.getRandString(r);
- case CHAR:
- return getRandHiveChar(r, (CharTypeInfo) primitiveTypeInfo);
- case VARCHAR:
- return getRandHiveVarchar(r, (VarcharTypeInfo) primitiveTypeInfo);
- case BINARY:
- return getRandBinary(r, 1 + r.nextInt(100));
- case TIMESTAMP:
- return RandomTypeUtil.getRandTimestamp(r);
- case INTERVAL_YEAR_MONTH:
- return getRandIntervalYearMonth(r);
- case INTERVAL_DAY_TIME:
- return getRandIntervalDayTime(r);
- case DECIMAL:
- return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
- default:
- throw new Error("Unknown primitive category " + primitiveCategory);
+ try {
+ switch (primitiveCategory) {
+ case BOOLEAN:
+ return Boolean.valueOf(r.nextInt(1) == 1);
+ case BYTE:
+ return Byte.valueOf((byte) r.nextInt());
+ case SHORT:
+ return Short.valueOf((short) r.nextInt());
+ case INT:
+ return Integer.valueOf(r.nextInt());
+ case LONG:
+ return Long.valueOf(r.nextLong());
+ case DATE:
+ return RandomTypeUtil.getRandDate(r);
+ case FLOAT:
+ return Float.valueOf(r.nextFloat() * 10 - 5);
+ case DOUBLE:
+ return Double.valueOf(r.nextDouble() * 10 - 5);
+ case STRING:
+ case CHAR:
+ case VARCHAR:
+ {
+ String result;
+ if (alphabets != null && alphabets[column] != null) {
+ result = RandomTypeUtil.getRandString(r, alphabets[column], r.nextInt(10));
+ } else {
+ result = RandomTypeUtil.getRandString(r);
+ }
+ if (addEscapables && result.length() > 0) {
+ int escapeCount = 1 + r.nextInt(2);
+ for (int i = 0; i < escapeCount; i++) {
+ int index = r.nextInt(result.length());
+ String begin = result.substring(0, index);
+ String end = result.substring(index);
+ Character needsEscapeChar = needsEscapeStr.charAt(r.nextInt(needsEscapeStr.length()));
+ result = begin + needsEscapeChar + end;
+ }
+ }
+ switch (primitiveCategory) {
+ case STRING:
+ return result;
+ case CHAR:
+ return new HiveChar(result, ((CharTypeInfo) primitiveTypeInfo).getLength());
+ case VARCHAR:
+ return new HiveChar(result, ((VarcharTypeInfo) primitiveTypeInfo).getLength());
+ default:
+ throw new Error("Unknown primitive category " + primitiveCategory);
+ }
+ }
+ case BINARY:
+ return getRandBinary(r, 1 + r.nextInt(100));
+ case TIMESTAMP:
+ return RandomTypeUtil.getRandTimestamp(r);
+ case INTERVAL_YEAR_MONTH:
+ return getRandIntervalYearMonth(r);
+ case INTERVAL_DAY_TIME:
+ return getRandIntervalDayTime(r);
+ case DECIMAL:
+ return getRandHiveDecimal(r, (DecimalTypeInfo) primitiveTypeInfo);
+ default:
+ throw new Error("Unknown primitive category " + primitiveCategory);
+ }
+ } catch (Exception e) {
+ throw new RuntimeException("randomObject failed on column " + column + " type " + primitiveCategory, e);
}
}
- public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
+ public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo, String alphabet) {
int maxLength = 1 + r.nextInt(charTypeInfo.getLength());
- String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
+ String randomString = RandomTypeUtil.getRandString(r, alphabet, 100);
HiveChar hiveChar = new HiveChar(randomString, maxLength);
return hiveChar;
}
- public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
+ public static HiveChar getRandHiveChar(Random r, CharTypeInfo charTypeInfo) {
+ return getRandHiveChar(r, charTypeInfo, "abcdefghijklmnopqrstuvwxyz");
+ }
+
+ public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo, String alphabet) {
int maxLength = 1 + r.nextInt(varcharTypeInfo.getLength());
- String randomString = RandomTypeUtil.getRandString(r, "abcdefghijklmnopqrstuvwxyz", 100);
+ String randomString = RandomTypeUtil.getRandString(r, alphabet, 100);
HiveVarchar hiveVarchar = new HiveVarchar(randomString, maxLength);
return hiveVarchar;
}
+ public static HiveVarchar getRandHiveVarchar(Random r, VarcharTypeInfo varcharTypeInfo) {
+ return getRandHiveVarchar(r, varcharTypeInfo, "abcdefghijklmnopqrstuvwxyz");
+ }
+
public static byte[] getRandBinary(Random r, int len){
byte[] bytes = new byte[len];
for (int j = 0; j < len; j++){
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
index 0bcfb56..7f68186 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastRowHashMap.java
@@ -69,7 +69,9 @@ public class CheckFastRowHashMap extends CheckFastHashTable {
int length = ref.getLength();
LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
- new LazyBinaryDeserializeRead(typeInfos);
+ new LazyBinaryDeserializeRead(
+ typeInfos,
+ /* useExternalBuffer */ false);
lazyBinaryDeserializeRead.set(bytes, offset, length);
@@ -127,7 +129,9 @@ public class CheckFastRowHashMap extends CheckFastHashTable {
}
LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
- new LazyBinaryDeserializeRead(typeInfos);
+ new LazyBinaryDeserializeRead(
+ typeInfos,
+ /* useExternalBuffer */ false);
lazyBinaryDeserializeRead.set(bytes, offset, length);
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
index 003a2d4..0cbc8d0 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/binarysortable/fast/BinarySortableDeserializeRead.java
@@ -32,7 +32,6 @@ import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.io.Text;
/*
* Directly deserialize with the caller reading field-by-field the LazyBinary serialization format.
@@ -64,8 +63,12 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
private int end;
private int fieldStart;
+ private int bytesStart;
+
+ private int internalBufferLen;
+ private byte[] internalBuffer;
+
private byte[] tempTimestampBytes;
- private Text tempText;
private byte[] tempDecimalBuffer;
@@ -77,13 +80,14 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
/*
* Use this constructor when only ascending sort order is used.
*/
- public BinarySortableDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos) {
- this(primitiveTypeInfos, null);
+ public BinarySortableDeserializeRead(PrimitiveTypeInfo[] primitiveTypeInfos,
+ boolean useExternalBuffer) {
+ this(primitiveTypeInfos, useExternalBuffer, null);
}
- public BinarySortableDeserializeRead(TypeInfo[] typeInfos,
+ public BinarySortableDeserializeRead(TypeInfo[] typeInfos, boolean useExternalBuffer,
boolean[] columnSortOrderIsDesc) {
- super(typeInfos);
+ super(typeInfos, useExternalBuffer);
fieldCount = typeInfos.length;
if (columnSortOrderIsDesc != null) {
this.columnSortOrderIsDesc = columnSortOrderIsDesc;
@@ -94,6 +98,7 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
inputByteBuffer = new InputByteBuffer();
readBeyondConfiguredFieldsWarned = false;
bufferRangeHasExtraDataWarned = false;
+ internalBufferLen = -1;
}
// Not public since we must have column information.
@@ -139,6 +144,8 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
sb.append(" current read offset ");
sb.append(inputByteBuffer.tell());
}
+ sb.append(" column sort order ");
+ sb.append(Arrays.toString(columnSortOrderIsDesc));
return sb.toString();
}
@@ -276,14 +283,55 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
case CHAR:
case VARCHAR:
{
- if (tempText == null) {
- tempText = new Text();
+ /*
+ * This code is a modified version of BinarySortableSerDe.deserializeText that lets us
+ * detect if we can return a reference to the bytes directly.
+ */
+
+ // Get the actual length first
+ bytesStart = inputByteBuffer.tell();
+ final boolean invert = columnSortOrderIsDesc[fieldIndex];
+ int length = 0;
+ do {
+ byte b = inputByteBuffer.read(invert);
+ if (b == 0) {
+ // end of string
+ break;
+ }
+ if (b == 1) {
+ // the last char is an escape char. read the actual char
+ inputByteBuffer.read(invert);
+ }
+ length++;
+ } while (true);
+
+ if (length == 0 ||
+ (!invert && length == inputByteBuffer.tell() - bytesStart - 1)) {
+ // No inversion or escaping happened, so we are can reference directly.
+ currentExternalBufferNeeded = false;
+ currentBytes = inputByteBuffer.getData();
+ currentBytesStart = bytesStart;
+ currentBytesLength = length;
+ } else {
+ // We are now positioned at the end of this field's bytes.
+ if (useExternalBuffer) {
+ // If we decided not to reposition and re-read the buffer to copy it with
+ // copyToExternalBuffer, we we will still be correctly positioned for the next field.
+ currentExternalBufferNeeded = true;
+ currentExternalBufferNeededLen = length;
+ } else {
+ // The copyToBuffer will reposition and re-read the input buffer.
+ currentExternalBufferNeeded = false;
+ if (internalBufferLen < length) {
+ internalBufferLen = length;
+ internalBuffer = new byte[internalBufferLen];
+ }
+ copyToBuffer(internalBuffer, 0, length);
+ currentBytes = internalBuffer;
+ currentBytesStart = 0;
+ currentBytesLength = length;
+ }
}
- BinarySortableSerDe.deserializeText(
- inputByteBuffer, columnSortOrderIsDesc[fieldIndex], tempText);
- currentBytes = tempText.getBytes();
- currentBytesStart = 0;
- currentBytesLength = tempText.getLength();
}
break;
case INTERVAL_YEAR_MONTH:
@@ -317,7 +365,9 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
final boolean invert = columnSortOrderIsDesc[fieldIndex];
int b = inputByteBuffer.read(invert) - 1;
- assert (b == 1 || b == -1 || b == 0);
+ if (!(b == 1 || b == -1 || b == 0)) {
+ throw new IOException("Unexpected byte value " + (int)b + " in binary sortable format data (invert " + invert + ")");
+ }
boolean positive = b != -1;
int factor = inputByteBuffer.read(invert) ^ 0x80;
@@ -334,7 +384,10 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
do {
b = inputByteBuffer.read(positive ? invert : !invert);
- assert(b != 1);
+ if (b == 1) {
+ throw new IOException("Expected -1 and found byte value " + (int)b + " in binary sortable format data (invert " + invert + ")");
+ }
+
if (b == 0) {
// end of digits
@@ -396,6 +449,32 @@ public final class BinarySortableDeserializeRead extends DeserializeRead {
return isNull;
}
+ @Override
+ public void copyToExternalBuffer(byte[] externalBuffer, int externalBufferStart) throws IOException {
+ copyToBuffer(externalBuffer, externalBufferStart, currentExternalBufferNeededLen);
+ }
+
+ private void copyToBuffer(byte[] buffer, int bufferStart, int bufferLength) throws IOException {
+ final boolean invert = columnSortOrderIsDesc[fieldIndex];
+ inputByteBuffer.seek(bytesStart);
+ // 3. Copy the data.
+ for (int i = 0; i < bufferLength; i++) {
+ byte b = inputByteBuffer.read(invert);
+ if (b == 1) {
+ // The last char is an escape char, read the actual char.
+ // The serialization format escape \0 to \1, and \1 to \2,
+ // to make sure the string is null-terminated.
+ b = (byte) (inputByteBuffer.read(invert) - 1);
+ }
+ buffer[bufferStart + i] = b;
+ }
+ // 4. Read the null terminator.
+ byte b = inputByteBuffer.read(invert);
+ if (b != 0) {
+ throw new RuntimeException("Expected 0 terminating byte");
+ }
+ }
+
/*
* Call this method after all fields have been read to check for extra fields.
*/
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
index 8f3e771..1600fec 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/fast/DeserializeRead.java
@@ -47,12 +47,34 @@ public abstract class DeserializeRead {
protected TypeInfo[] typeInfos;
+ protected boolean useExternalBuffer;
+
protected boolean[] columnsToInclude;
protected Category[] categories;
protected PrimitiveCategory[] primitiveCategories;
- public DeserializeRead(TypeInfo[] typeInfos) {
+ /**
+ * Constructor.
+ *
+ * When useExternalBuffer is specified true and readCheckNull reads a string/char/varchar/binary
+ * field, it will request an external buffer to receive the data of format conversion.
+ *
+ * if (!deserializeRead.readCheckNull()) {
+ * if (deserializeRead.currentExternalBufferNeeded) {
+ * <Ensure external buffer is as least deserializeRead.currentExternalBufferNeededLen bytes>
+ * deserializeRead.copyToExternalBuffer(externalBuffer, externalBufferStart);
+ * } else {
+ * <Otherwise, field data is available in the currentBytes, currentBytesStart, and
+ * currentBytesLength of deserializeRead>
+ * }
+ *
+ * @param typeInfos
+ * @param useExternalBuffer Specify true when the caller is prepared to provide a bytes buffer
+ * to receive a string/char/varchar/binary field that needs format
+ * conversion.
+ */
+ public DeserializeRead(TypeInfo[] typeInfos, boolean useExternalBuffer) {
this.typeInfos = typeInfos;
final int count = typeInfos.length;
categories = new Category[count];
@@ -96,6 +118,8 @@ public abstract class DeserializeRead {
// No writable needed for this data type.
}
}
+
+ this.useExternalBuffer = useExternalBuffer;
}
columnsToInclude = null;
@@ -194,7 +218,19 @@ public abstract class DeserializeRead {
*
* For CHAR and VARCHAR when the caller takes responsibility for
* truncation/padding issues.
+ *
+ * When currentExternalBufferNeeded is true, conversion is needed into an external buffer of
+ * at least currentExternalBufferNeededLen bytes. Use copyToExternalBuffer to get the result.
+ *
+ * Otherwise, currentBytes, currentBytesStart, and currentBytesLength are the result.
*/
+ public boolean currentExternalBufferNeeded;
+ public int currentExternalBufferNeededLen;
+
+ public void copyToExternalBuffer(byte[] externalBuffer, int externalBufferStart) throws IOException {
+ throw new RuntimeException("Not implemented");
+ }
+
public byte[] currentBytes;
public int currentBytesStart;
public int currentBytesLength;
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
index ac44390..07709d8 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleDeserializeRead.java
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.serde2.lazy.fast;
+import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.nio.charset.CharacterCodingException;
import java.sql.Date;
@@ -78,15 +79,17 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
private int fieldStart;
private int fieldLength;
- private Text tempText;
+ private int internalBufferLen;
+ private byte[] internalBuffer;
+
private TimestampParser timestampParser;
private boolean extraFieldWarned;
private boolean missingFieldWarned;
- public LazySimpleDeserializeRead(TypeInfo[] typeInfos,
+ public LazySimpleDeserializeRead(TypeInfo[] typeInfos, boolean useExternalBuffer,
byte separator, LazySerDeParameters lazyParams) {
- super(typeInfos);
+ super(typeInfos, useExternalBuffer);
// Field length is difference between positions hence one extra.
startPosition = new int[typeInfos.length + 1];
@@ -100,13 +103,14 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
lastColumnTakesRest = lazyParams.isLastColumnTakesRest();
fieldCount = typeInfos.length;
- tempText = new Text();
extraFieldWarned = false;
missingFieldWarned = false;
+ internalBufferLen = -1;
}
- public LazySimpleDeserializeRead(TypeInfo[] typeInfos, LazySerDeParameters lazyParams) {
- this(typeInfos, lazyParams.getSeparators()[0], lazyParams);
+ public LazySimpleDeserializeRead(TypeInfo[] typeInfos, boolean useExternalBuffer,
+ LazySerDeParameters lazyParams) {
+ this(typeInfos, useExternalBuffer, lazyParams.getSeparators()[0], lazyParams);
}
// Not public since we must have the field count so every 8 fields NULL bytes can be navigated.
@@ -395,16 +399,48 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
case STRING:
case CHAR:
case VARCHAR:
- if (isEscaped) {
- LazyUtils.copyAndEscapeStringDataToText(bytes, fieldStart, fieldLength, escapeChar, tempText);
- currentBytes = tempText.getBytes();
- currentBytesStart = 0;
- currentBytesLength = tempText.getLength();
- } else {
- // if the data is not escaped, simply copy the data.
- currentBytes = bytes;
- currentBytesStart = fieldStart;
- currentBytesLength = fieldLength;
+ {
+ if (isEscaped) {
+ // First calculate the length of the output string
+ int outputLength = 0;
+ for (int i = 0; i < fieldLength; i++) {
+ if (bytes[fieldStart + i] != escapeChar) {
+ outputLength++;
+ } else {
+ outputLength++;
+ i++;
+ }
+ }
+ if (outputLength == fieldLength) {
+ // No escaping.
+ currentExternalBufferNeeded = false;
+ currentBytes = bytes;
+ currentBytesStart = fieldStart;
+ currentBytesLength = outputLength;
+ } else {
+ if (useExternalBuffer) {
+ currentExternalBufferNeeded = true;
+ currentExternalBufferNeededLen = outputLength;
+ } else {
+ // The copyToBuffer will reposition and re-read the input buffer.
+ currentExternalBufferNeeded = false;
+ if (internalBufferLen < outputLength) {
+ internalBufferLen = outputLength;
+ internalBuffer = new byte[internalBufferLen];
+ }
+ copyToBuffer(internalBuffer, 0, outputLength);
+ currentBytes = internalBuffer;
+ currentBytesStart = 0;
+ currentBytesLength = outputLength;
+ }
+ }
+ } else {
+ // If the data is not escaped, reference the data directly.
+ currentExternalBufferNeeded = false;
+ currentBytes = bytes;
+ currentBytesStart = fieldStart;
+ currentBytesLength = fieldLength;
+ }
}
break;
case BINARY:
@@ -528,6 +564,32 @@ public final class LazySimpleDeserializeRead extends DeserializeRead {
return false;
}
+ @Override
+ public void copyToExternalBuffer(byte[] externalBuffer, int externalBufferStart) {
+ copyToBuffer(externalBuffer, externalBufferStart, currentExternalBufferNeededLen);
+ }
+
+ private void copyToBuffer(byte[] buffer, int bufferStart, int bufferLength) {
+ int k = 0;
+ for (int i = 0; i < bufferLength; i++) {
+ byte b = bytes[fieldStart + i];
+ if (b == escapeChar && i < bufferLength - 1) {
+ ++i;
+ // Check if it's '\r' or '\n'
+ if (bytes[fieldStart + i] == 'r') {
+ buffer[bufferStart + k++] = '\r';
+ } else if (bytes[fieldStart + i] == 'n') {
+ buffer[bufferStart + k++] = '\n';
+ } else {
+ // get the next byte
+ buffer[bufferStart + k++] = bytes[fieldStart + i];
+ }
+ } else {
+ buffer[bufferStart + k++] = b;
+ }
+ }
+ }
+
public void logExceptionMessage(byte[] bytes, int bytesStart, int bytesLength, String dataType) {
try {
if(LOG.isDebugEnabled()) {
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
index 0df1d79..472ace7 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinaryDeserializeRead.java
@@ -68,11 +68,12 @@ public final class LazyBinaryDeserializeRead extends DeserializeRead {
private boolean readBeyondConfiguredFieldsWarned;
private boolean bufferRangeHasExtraDataWarned;
- public LazyBinaryDeserializeRead(TypeInfo[] typeInfos) {
- super(typeInfos);
+ public LazyBinaryDeserializeRead(TypeInfo[] typeInfos, boolean useExternalBuffer) {
+ super(typeInfos, useExternalBuffer);
fieldCount = typeInfos.length;
tempVInt = new VInt();
tempVLong = new VLong();
+ currentExternalBufferNeeded = false;
readBeyondConfiguredFieldsWarned = false;
bufferRangeHasExtraDataWarned = false;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
index 7babf7a..49ee9c6 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableFast.java
@@ -115,7 +115,10 @@ public class TestBinarySortableFast extends TestCase {
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
BinarySortableDeserializeRead binarySortableDeserializeRead =
- new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc);
+ new BinarySortableDeserializeRead(
+ primitiveTypeInfos,
+ /* useExternalBuffer */ false,
+ columnSortOrderIsDesc);
if (useIncludeColumns) {
binarySortableDeserializeRead.setColumnsToInclude(columnsToInclude);
@@ -143,7 +146,10 @@ public class TestBinarySortableFast extends TestCase {
* Clip off one byte and expect to get an EOFException on the write field.
*/
BinarySortableDeserializeRead binarySortableDeserializeRead2 =
- new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc);
+ new BinarySortableDeserializeRead(
+ primitiveTypeInfos,
+ /* useExternalBuffer */ false,
+ columnSortOrderIsDesc);
if (useIncludeColumns) {
binarySortableDeserializeRead2.setColumnsToInclude(columnsToInclude);
@@ -247,7 +253,10 @@ public class TestBinarySortableFast extends TestCase {
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
BinarySortableDeserializeRead binarySortableDeserializeRead =
- new BinarySortableDeserializeRead(primitiveTypeInfos, columnSortOrderIsDesc);
+ new BinarySortableDeserializeRead(
+ primitiveTypeInfos,
+ /* useExternalBuffer */ false,
+ columnSortOrderIsDesc);
if (useIncludeColumns) {
binarySortableDeserializeRead.setColumnsToInclude(columnsToInclude);
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
index 66c6203..8285c06 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleFast.java
@@ -96,7 +96,9 @@ public class TestLazySimpleFast extends TestCase {
for (int i = 0; i < rowCount; i++) {
Object[] row = rows[i];
LazySimpleDeserializeRead lazySimpleDeserializeRead =
- new LazySimpleDeserializeRead(writePrimitiveTypeInfos,
+ new LazySimpleDeserializeRead(
+ writePrimitiveTypeInfos,
+ /* useExternalBuffer */ false,
separator, serdeParams);
if (useIncludeColumns) {
@@ -186,7 +188,9 @@ public class TestLazySimpleFast extends TestCase {
Object[] row = rows[i];
LazySimpleDeserializeRead lazySimpleDeserializeRead =
- new LazySimpleDeserializeRead(writePrimitiveTypeInfos,
+ new LazySimpleDeserializeRead(
+ writePrimitiveTypeInfos,
+ /* useExternalBuffer */ false,
separator, serdeParams);
if (useIncludeColumns) {
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
----------------------------------------------------------------------
diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
index 5af11cd..e64d67d 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinaryFast.java
@@ -91,7 +91,9 @@ public class TestLazyBinaryFast extends TestCase {
// Specifying the right type info length tells LazyBinaryDeserializeRead which is the last
// column.
LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
- new LazyBinaryDeserializeRead(writePrimitiveTypeInfos);
+ new LazyBinaryDeserializeRead(
+ writePrimitiveTypeInfos,
+ /* useExternalBuffer */ false);
if (useIncludeColumns) {
lazyBinaryDeserializeRead.setColumnsToInclude(columnsToInclude);
@@ -191,7 +193,9 @@ public class TestLazyBinaryFast extends TestCase {
// When doWriteFewerColumns, try to read more fields than exist in buffer.
LazyBinaryDeserializeRead lazyBinaryDeserializeRead =
- new LazyBinaryDeserializeRead(primitiveTypeInfos);
+ new LazyBinaryDeserializeRead(
+ primitiveTypeInfos,
+ /* useExternalBuffer */ false);
if (useIncludeColumns) {
lazyBinaryDeserializeRead.setColumnsToInclude(columnsToInclude);
http://git-wip-us.apache.org/repos/asf/hive/blob/146a9183/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
----------------------------------------------------------------------
diff --git a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
index a6d932c..3caa584 100644
--- a/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
+++ b/storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
@@ -170,6 +170,37 @@ public class BytesColumnVector extends ColumnVector {
}
/**
+ * Preallocate space in the local buffer so the caller can fill in the value bytes themselves.
+ *
+ * Always use with getValPreallocatedBytes, getValPreallocatedStart, and setValPreallocated.
+ */
+ public void ensureValPreallocated(int length) {
+ if ((nextFree + length) > buffer.length) {
+ increaseBufferSpace(length);
+ }
+ }
+
+ public byte[] getValPreallocatedBytes() {
+ return buffer;
+ }
+
+ public int getValPreallocatedStart() {
+ return nextFree;
+ }
+
+ /**
+ * Set the length of the preallocated values bytes used.
+ * @param elementNum
+ * @param length
+ */
+ public void setValPreallocated(int elementNum, int length) {
+ vector[elementNum] = buffer;
+ this.start[elementNum] = nextFree;
+ this.length[elementNum] = length;
+ nextFree += length;
+ }
+
+ /**
* Set a field to the concatenation of two string values. Result data is copied
* into the internal buffer.
*