You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2017/02/02 02:03:19 UTC
[20/50] [abbrv] hive git commit: HIVE-15709: Vectorization: Fix
performance issue with using LazyBinaryUtils.writeVInt and locking / thread
local storage (Matt McCline, reviewed by Gopal Vijayaraghavan)
HIVE-15709: Vectorization: Fix performance issue with using LazyBinaryUtils.writeVInt and locking / thread local storage (Matt McCline, reviewed by Gopal Vijayaraghavan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/bb33ffac
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/bb33ffac
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/bb33ffac
Branch: refs/heads/hive-14535
Commit: bb33ffacb35d813cd95aeb79c3d8e1067f96d2c3
Parents: 5468207
Author: Matt McCline <mm...@hortonworks.com>
Authored: Tue Jan 31 11:47:04 2017 -0800
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Tue Jan 31 11:47:04 2017 -0800
----------------------------------------------------------------------
.../hive/ql/exec/vector/VectorSerializeRow.java | 638 +++----------------
.../hive/serde2/lazybinary/LazyBinaryUtils.java | 4 +-
.../fast/LazyBinarySerializeWrite.java | 28 +-
3 files changed, 130 insertions(+), 540 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/bb33ffac/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java
index a95098a..319b4a8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSerializeRow.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.exec.vector;
import java.io.IOException;
import java.sql.Timestamp;
+import java.util.Arrays;
import java.util.List;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
@@ -49,6 +50,11 @@ public final class VectorSerializeRow<T extends SerializeWrite> {
private T serializeWrite;
+ private Category[] categories;
+ private PrimitiveCategory[] primitiveCategories;
+
+ private int[] outputColumnNums;
+
public VectorSerializeRow(T serializeWrite) {
this();
this.serializeWrite = serializeWrite;
@@ -58,598 +64,164 @@ public final class VectorSerializeRow<T extends SerializeWrite> {
private VectorSerializeRow() {
}
- private abstract class Writer<W extends SerializeWrite> {
- protected int columnIndex;
-
- Writer(int columnIndex) {
- this.columnIndex = columnIndex;
- }
-
- abstract boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException;
- }
-
- private abstract class AbstractLongWriter extends Writer<T> {
-
- AbstractLongWriter(int columnIndex) {
- super(columnIndex);
- }
- }
-
- private class BooleanWriter extends AbstractLongWriter {
-
- BooleanWriter(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex];
-
- if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- serializeWrite.writeBoolean(colVector.vector[0] != 0);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- } else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- serializeWrite.writeBoolean(colVector.vector[batchIndex] != 0);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- }
- }
- }
-
- private class ByteWriter extends AbstractLongWriter {
-
- ByteWriter(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex];
-
- if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- serializeWrite.writeByte((byte) colVector.vector[0]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- } else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- serializeWrite.writeByte((byte) colVector.vector[batchIndex]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- }
- }
- }
-
- private class ShortWriter extends AbstractLongWriter {
-
- ShortWriter(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex];
-
- if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- serializeWrite.writeShort((short) colVector.vector[0]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- } else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- serializeWrite.writeShort((short) colVector.vector[batchIndex]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- }
- }
- }
-
- private class IntWriter extends AbstractLongWriter {
-
- IntWriter(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex];
-
- if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- serializeWrite.writeInt((int) colVector.vector[0]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- } else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- serializeWrite.writeInt((int) colVector.vector[batchIndex]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- }
- }
- }
-
- private class LongWriter extends AbstractLongWriter {
-
- LongWriter(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex];
-
- if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- serializeWrite.writeLong(colVector.vector[0]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- } else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- serializeWrite.writeLong(colVector.vector[batchIndex]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- }
- }
- }
-
- private class DateWriter extends AbstractLongWriter {
-
- DateWriter(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex];
-
- if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- serializeWrite.writeDate((int) colVector.vector[0]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- } else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- serializeWrite.writeDate((int) colVector.vector[batchIndex]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- }
- }
- }
-
- private class TimestampWriter extends Writer {
-
- Timestamp scratchTimestamp;
-
- TimestampWriter(int columnIndex) {
- super(columnIndex);
- scratchTimestamp = new Timestamp(0);
- }
-
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- TimestampColumnVector colVector = (TimestampColumnVector) batch.cols[columnIndex];
+ public void init(List<String> typeNames, int[] columnMap) throws HiveException {
- if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- colVector.timestampUpdate(scratchTimestamp, 0);
- serializeWrite.writeTimestamp(scratchTimestamp);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- } else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- colVector.timestampUpdate(scratchTimestamp, batchIndex);
- serializeWrite.writeTimestamp(scratchTimestamp);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
+ final int size = typeNames.size();
+ categories = new Category[size];
+ primitiveCategories = new PrimitiveCategory[size];
+ outputColumnNums = Arrays.copyOf(columnMap, size);
+ TypeInfo typeInfo;
+ for (int i = 0; i < size; i++) {
+ typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i));
+ categories[i] = typeInfo.getCategory();
+ if (categories[i] == Category.PRIMITIVE) {
+ primitiveCategories[i] = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
}
}
}
- private class IntervalYearMonthWriter extends AbstractLongWriter {
-
- IntervalYearMonthWriter(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- LongColumnVector colVector = (LongColumnVector) batch.cols[columnIndex];
+ public void init(List<String> typeNames) throws HiveException {
- if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- serializeWrite.writeHiveIntervalYearMonth((int) colVector.vector[0]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- } else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- serializeWrite.writeHiveIntervalYearMonth((int) colVector.vector[batchIndex]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
+ final int size = typeNames.size();
+ categories = new Category[size];
+ primitiveCategories = new PrimitiveCategory[size];
+ outputColumnNums = new int[size];
+ TypeInfo typeInfo;
+ for (int i = 0; i < size; i++) {
+ typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i));
+ categories[i] = typeInfo.getCategory();
+ if (categories[i] == Category.PRIMITIVE) {
+ primitiveCategories[i] = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
}
+ outputColumnNums[i] = i;
}
}
- private class IntervalDayTimeWriter extends Writer {
-
- private HiveIntervalDayTime hiveIntervalDayTime;
-
- IntervalDayTimeWriter(int columnIndex) {
- super(columnIndex);
- hiveIntervalDayTime = new HiveIntervalDayTime();
- }
-
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- IntervalDayTimeColumnVector colVector = (IntervalDayTimeColumnVector) batch.cols[columnIndex];
+ public void init(TypeInfo[] typeInfos, int[] columnMap)
+ throws HiveException {
- if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- hiveIntervalDayTime.set(colVector.asScratchIntervalDayTime(0));
- serializeWrite.writeHiveIntervalDayTime(hiveIntervalDayTime);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- } else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- hiveIntervalDayTime.set(colVector.asScratchIntervalDayTime(batchIndex));
- serializeWrite.writeHiveIntervalDayTime(hiveIntervalDayTime);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
+ final int size = typeInfos.length;
+ categories = new Category[size];
+ primitiveCategories = new PrimitiveCategory[size];
+ outputColumnNums = Arrays.copyOf(columnMap, size);
+ TypeInfo typeInfo;
+ for (int i = 0; i < typeInfos.length; i++) {
+ typeInfo = typeInfos[i];
+ categories[i] = typeInfo.getCategory();
+ if (categories[i] == Category.PRIMITIVE) {
+ primitiveCategories[i] = ((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory();
}
}
}
- private abstract class AbstractDoubleWriter extends Writer<T> {
-
- AbstractDoubleWriter(int columnIndex) {
- super(columnIndex);
- }
- }
-
- private class FloatWriter extends AbstractDoubleWriter {
-
- FloatWriter(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex];
-
- if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- serializeWrite.writeFloat((float) colVector.vector[0]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- } else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- serializeWrite.writeFloat((float) colVector.vector[batchIndex]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- }
- }
+ public int getCount() {
+ return categories.length;
}
- private class DoubleWriter extends AbstractDoubleWriter {
-
- DoubleWriter(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- DoubleColumnVector colVector = (DoubleColumnVector) batch.cols[columnIndex];
-
- if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- serializeWrite.writeDouble(colVector.vector[0]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- } else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- serializeWrite.writeDouble(colVector.vector[batchIndex]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- }
- }
+ public void setOutput(Output output) {
+ serializeWrite.set(output);
}
- private class StringWriter extends Writer<T> {
-
- StringWriter(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex];
-
- if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- serializeWrite.writeString(colVector.vector[0], colVector.start[0], colVector.length[0]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- } else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- serializeWrite.writeString(colVector.vector[batchIndex],
- colVector.start[batchIndex], colVector.length[batchIndex]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- }
- }
+ public void setOutputAppend(Output output) {
+ serializeWrite.setAppend(output);
}
- private class BinaryWriter extends Writer<T> {
-
- BinaryWriter(int columnIndex) {
- super(columnIndex);
- }
+ private boolean hasAnyNulls;
+ private boolean isAllNulls;
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- BytesColumnVector colVector = (BytesColumnVector) batch.cols[columnIndex];
+ /*
+ * Note that when serializing a row, the logical mapping using selected in use has already
+ * been performed. batchIndex is the actual index of the row.
+ */
+ public void serializeWrite(VectorizedRowBatch batch, int batchIndex) throws IOException {
+ hasAnyNulls = false;
+ isAllNulls = true;
+ ColumnVector colVector;
+ int adjustedBatchIndex;
+ final int size = categories.length;
+ for (int i = 0; i < size; i++) {
+ colVector = batch.cols[outputColumnNums[i]];
if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- serializeWrite.writeBinary(colVector.vector[0], colVector.start[0], colVector.length[0]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
+ adjustedBatchIndex = 0;
} else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- serializeWrite.writeBinary(colVector.vector[batchIndex],
- colVector.start[batchIndex], colVector.length[batchIndex]);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
+ adjustedBatchIndex = batchIndex;
}
- }
- }
-
- private class HiveDecimalWriter extends Writer<T> {
- protected HiveDecimalWritable[] vector;
-
- HiveDecimalWriter(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- boolean apply(VectorizedRowBatch batch, int batchIndex) throws IOException {
- DecimalColumnVector colVector = (DecimalColumnVector) batch.cols[columnIndex];
-
- if (colVector.isRepeating) {
- if (colVector.noNulls || !colVector.isNull[0]) {
- // We serialize specifying the HiveDecimalWritable but also the desired
- // serialization scale that will be used by text serialization for adding
- // trailing fractional zeroes.
- serializeWrite.writeHiveDecimal(colVector.vector[0], colVector.scale);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
- } else {
- if (colVector.noNulls || !colVector.isNull[batchIndex]) {
- serializeWrite.writeHiveDecimal(colVector.vector[batchIndex], colVector.scale);
- return true;
- } else {
- serializeWrite.writeNull();
- return false;
- }
+ if (!colVector.noNulls && colVector.isNull[adjustedBatchIndex]) {
+ serializeWrite.writeNull();
+ hasAnyNulls = true;
+ continue;
}
- }
- }
-
- private Writer<T>[] writers;
-
- private Writer<T> createWriter(TypeInfo typeInfo, int columnIndex) throws HiveException {
- Writer<T> writer;
- Category category = typeInfo.getCategory();
- switch (category) {
- case PRIMITIVE:
- {
- PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
- PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
- switch (primitiveCategory) {
- // case VOID:
- // UNDONE:
- // break;
+ isAllNulls = false;
+ switch (categories[i]) {
+ case PRIMITIVE:
+ switch (primitiveCategories[i]) {
case BOOLEAN:
- writer = new BooleanWriter(columnIndex);
+ serializeWrite.writeBoolean(((LongColumnVector) colVector).vector[adjustedBatchIndex] != 0);
break;
case BYTE:
- writer = new ByteWriter(columnIndex);
+ serializeWrite.writeByte((byte) ((LongColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case SHORT:
- writer = new ShortWriter(columnIndex);
+ serializeWrite.writeShort((short) ((LongColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case INT:
- writer = new IntWriter(columnIndex);
+ serializeWrite.writeInt((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case LONG:
- writer = new LongWriter(columnIndex);
+ serializeWrite.writeLong(((LongColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case DATE:
- writer = new DateWriter(columnIndex);
+ serializeWrite.writeDate((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case TIMESTAMP:
- writer = new TimestampWriter(columnIndex);
+ serializeWrite.writeTimestamp(((TimestampColumnVector) colVector).asScratchTimestamp(adjustedBatchIndex));
break;
case FLOAT:
- writer = new FloatWriter(columnIndex);
+ serializeWrite.writeFloat((float) ((DoubleColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case DOUBLE:
- writer = new DoubleWriter(columnIndex);
+ serializeWrite.writeDouble(((DoubleColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case STRING:
case CHAR:
case VARCHAR:
- // We store CHAR and VARCHAR without pads, so use STRING writer class.
- writer = new StringWriter(columnIndex);
+ {
+ // We store CHAR and VARCHAR without pads, so write with STRING.
+ BytesColumnVector bytesColVector = (BytesColumnVector) colVector;
+ serializeWrite.writeString(
+ bytesColVector.vector[adjustedBatchIndex],
+ bytesColVector.start[adjustedBatchIndex],
+ bytesColVector.length[adjustedBatchIndex]);
+ }
break;
case BINARY:
- writer = new BinaryWriter(columnIndex);
+ {
+ BytesColumnVector bytesColVector = (BytesColumnVector) colVector;
+ serializeWrite.writeBinary(
+ bytesColVector.vector[adjustedBatchIndex],
+ bytesColVector.start[adjustedBatchIndex],
+ bytesColVector.length[adjustedBatchIndex]);
+ }
break;
case DECIMAL:
- writer = new HiveDecimalWriter(columnIndex);
+ {
+ DecimalColumnVector decimalColVector = (DecimalColumnVector) colVector;
+ serializeWrite.writeHiveDecimal(decimalColVector.vector[adjustedBatchIndex], decimalColVector.scale);
+ }
break;
case INTERVAL_YEAR_MONTH:
- writer = new IntervalYearMonthWriter(columnIndex);
+ serializeWrite.writeHiveIntervalYearMonth((int) ((LongColumnVector) colVector).vector[adjustedBatchIndex]);
break;
case INTERVAL_DAY_TIME:
- writer = new IntervalDayTimeWriter(columnIndex);
+ serializeWrite.writeHiveIntervalDayTime(((IntervalDayTimeColumnVector) colVector).asScratchIntervalDayTime(adjustedBatchIndex));
break;
default:
- throw new HiveException("Unexpected primitive type category " + primitiveCategory);
+ throw new RuntimeException("Unexpected primitive category " + primitiveCategories[i]);
}
- }
- break;
- default:
- throw new HiveException("Unexpected type category " + category);
- }
- return writer;
- }
-
- public void init(List<String> typeNames, int[] columnMap) throws HiveException {
-
- writers = new Writer[typeNames.size()];
- for (int i = 0; i < typeNames.size(); i++) {
- String typeName = typeNames.get(i);
- TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
- int columnIndex = columnMap[i];
- Writer<T> writer = createWriter(typeInfo, columnIndex);
- writers[i] = writer;
- }
- }
-
- public void init(List<String> typeNames) throws HiveException {
-
- writers = new Writer[typeNames.size()];
- for (int i = 0; i < typeNames.size(); i++) {
- String typeName = typeNames.get(i);
- TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);
- Writer<T> writer = createWriter(typeInfo, i);
- writers[i] = writer;
- }
- }
-
- public void init(TypeInfo[] typeInfos, int[] columnMap)
- throws HiveException {
-
- writers = new Writer[typeInfos.length];
- for (int i = 0; i < typeInfos.length; i++) {
- int columnIndex = columnMap[i];
- Writer<T> writer = createWriter(typeInfos[i], columnIndex);
- writers[i] = writer;
- }
- }
-
- public int getCount() {
- return writers.length;
- }
-
- public void setOutput(Output output) {
- serializeWrite.set(output);
- }
-
- public void setOutputAppend(Output output) {
- serializeWrite.setAppend(output);
- }
-
- private boolean hasAnyNulls;
- private boolean isAllNulls;
-
- /*
- * Note that when serializing a row, the logical mapping using selected in use has already
- * been performed. batchIndex is the actual index of the row.
- */
- public void serializeWrite(VectorizedRowBatch batch, int batchIndex) throws IOException {
-
- hasAnyNulls = false;
- isAllNulls = true;
- for (Writer<T> writer : writers) {
- if (!writer.apply(batch, batchIndex)) {
- hasAnyNulls = true;
- } else {
- isAllNulls = false;
+ break;
+ default:
+ throw new RuntimeException("Unexpected category " + categories[i]);
}
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/bb33ffac/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
index f8a110d..f4ac56f 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
@@ -402,10 +402,12 @@ public final class LazyBinaryUtils {
return 1 + len;
}
+ public static int VLONG_BYTES_LEN = 9;
+
private static ThreadLocal<byte[]> vLongBytesThreadLocal = new ThreadLocal<byte[]>() {
@Override
public byte[] initialValue() {
- return new byte[9];
+ return new byte[VLONG_BYTES_LEN];
}
};
http://git-wip-us.apache.org/repos/asf/hive/blob/bb33ffac/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java
----------------------------------------------------------------------
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java
index 6bc4622..085d71c 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/fast/LazyBinarySerializeWrite.java
@@ -60,11 +60,13 @@ public class LazyBinarySerializeWrite implements SerializeWrite {
private HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable;
private HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable;
private HiveIntervalDayTime hiveIntervalDayTime;
+ private byte[] vLongBytes;
private long[] scratchLongs;
private byte[] scratchBuffer;
public LazyBinarySerializeWrite(int fieldCount) {
this();
+ vLongBytes = new byte[LazyBinaryUtils.VLONG_BYTES_LEN];
this.fieldCount = fieldCount;
}
@@ -270,7 +272,7 @@ public class LazyBinarySerializeWrite implements SerializeWrite {
// Set bit in NULL byte when a field is NOT NULL.
nullByte |= 1 << (fieldIndex % 8);
- LazyBinaryUtils.writeVInt(output, v);
+ writeVInt(v);
fieldIndex++;
@@ -301,7 +303,7 @@ public class LazyBinarySerializeWrite implements SerializeWrite {
// Set bit in NULL byte when a field is NOT NULL.
nullByte |= 1 << (fieldIndex % 8);
- LazyBinaryUtils.writeVLong(output, v);
+ writeVLong(v);
fieldIndex++;
@@ -402,7 +404,7 @@ public class LazyBinarySerializeWrite implements SerializeWrite {
nullByte |= 1 << (fieldIndex % 8);
int length = v.length;
- LazyBinaryUtils.writeVInt(output, length);
+ writeVInt(length);
output.write(v, 0, length);
@@ -432,7 +434,7 @@ public class LazyBinarySerializeWrite implements SerializeWrite {
// Set bit in NULL byte when a field is NOT NULL.
nullByte |= 1 << (fieldIndex % 8);
- LazyBinaryUtils.writeVInt(output, length);
+ writeVInt(length);
output.write(v, start, length);
@@ -498,7 +500,7 @@ public class LazyBinarySerializeWrite implements SerializeWrite {
// Set bit in NULL byte when a field is NOT NULL.
nullByte |= 1 << (fieldIndex % 8);
- LazyBinaryUtils.writeVInt(output, DateWritable.dateToDays(date));
+ writeVInt(DateWritable.dateToDays(date));
fieldIndex++;
@@ -527,7 +529,7 @@ public class LazyBinarySerializeWrite implements SerializeWrite {
// Set bit in NULL byte when a field is NOT NULL.
nullByte |= 1 << (fieldIndex % 8);
- LazyBinaryUtils.writeVInt(output, dateAsDays);
+ writeVInt(dateAsDays);
fieldIndex++;
@@ -751,4 +753,18 @@ public class LazyBinarySerializeWrite implements SerializeWrite {
output.writeByte(nullOffset, nullByte);
}
}
+
+ /*
+ * Write a VInt using our temporary byte buffer instead of paying the thread local performance
+ * cost of LazyBinaryUtils.writeVInt
+ */
+ private void writeVInt(int v) {
+ final int len = LazyBinaryUtils.writeVLongToByteArray(vLongBytes, v);
+ output.write(vLongBytes, 0, len);
+ }
+
+ private void writeVLong(long v) {
+ final int len = LazyBinaryUtils.writeVLongToByteArray(vLongBytes, v);
+ output.write(vLongBytes, 0, len);
+ }
}