You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by mm...@apache.org on 2016/05/03 01:59:50 UTC
[39/40] hive git commit: HIVE-12878: Support Vectorization for
TEXTFILE and other formats (Matt McCline, reviewed by Sergey Shelukhin)
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java
index de0300a..894ef59 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java
@@ -18,6 +18,8 @@
package org.apache.hadoop.hive.ql.exec.vector;
+import java.sql.Date;
+import java.sql.Timestamp;
import java.util.List;
import org.slf4j.Logger;
@@ -28,6 +30,7 @@ import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.plan.VectorPartitionConversion;
import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
@@ -39,609 +42,689 @@ import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
-import org.apache.hive.common.util.DateUtils;
+import org.apache.hadoop.io.Writable;
+
+import com.google.common.base.Preconditions;
+
/**
- * This class assigns specified columns of a row from a Writable row Object[].
- *
- * The caller provides the hive type names and target column numbers in the order desired to
- * assign from the Writable row Object[].
+ * This class assigns specified columns of a row from a Writable row objects.
*
- * This class is abstract to allow the subclasses to control batch reuse.
+ * The caller provides the data types and projection column numbers of a subset of the columns
+ * to assign.
*/
-public abstract class VectorAssignRow {
+public class VectorAssignRow {
+
private static final long serialVersionUID = 1L;
private static final Logger LOG = LoggerFactory.getLogger(VectorAssignRow.class);
- protected abstract class Assigner {
- protected int columnIndex;
-
- Assigner(int columnIndex) {
- this.columnIndex = columnIndex;
- }
-
- public int getColumnIndex() {
- return columnIndex;
- }
-
- abstract void setColumnVector(VectorizedRowBatch batch);
-
- abstract void forgetColumnVector();
-
- abstract void assign(int batchIndex, Object object);
- }
-
- private class VoidAssigner extends Assigner {
-
- VoidAssigner(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- void setColumnVector(VectorizedRowBatch batch) {
- }
-
- @Override
- void forgetColumnVector() {
- }
-
- @Override
- void assign(int batchIndex, Object object) {
- // This is no-op, there is no column to assign to and the object is expected to be null.
- assert (object == null);
- }
+ /*
+ * These members have information for assigning a row column objects into the VectorizedRowBatch
+ * columns.
+ *
+ * We say "target" because when there is conversion the data type being converted is the source.
+ */
+ boolean[] isConvert;
+ // For each column, are we converting the row column object?
+
+ int[] projectionColumnNums;
+ // Assigning can be a subset of columns, so this is the projection --
+ // the batch column numbers.
+
+ Category[] targetCategories;
+ // The data type category of each column being assigned.
+
+ PrimitiveCategory[] targetPrimitiveCategories;
+ // The data type primitive category of each column being assigned.
+
+ int[] maxLengths;
+ // For the CHAR and VARCHAR data types, the maximum character length of
+ // the columns. Otherwise, 0.
+
+ /*
+ * These members have information for data type conversion.
+ * Not defined if there is no conversion.
+ */
+ PrimitiveObjectInspector[] convertSourcePrimitiveObjectInspectors;
+ // The primitive object inspector of the source data type for any column being
+ // converted. Otherwise, null.
+
+ Writable[] convertTargetWritables;
+ // Conversion to the target data type requires a "helper" target writable in a
+ // few cases.
+
+ /*
+ * Allocate the target related arrays.
+ */
+ private void allocateArrays(int count) {
+ isConvert = new boolean[count];
+ projectionColumnNums = new int[count];
+ targetCategories = new Category[count];
+ targetPrimitiveCategories = new PrimitiveCategory[count];
+ maxLengths = new int[count];
}
- private abstract class AbstractLongAssigner extends Assigner {
-
- protected LongColumnVector colVector;
- protected long[] vector;
-
- AbstractLongAssigner(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- void setColumnVector(VectorizedRowBatch batch) {
- colVector = (LongColumnVector) batch.cols[columnIndex];
- vector = colVector.vector;
- }
-
- @Override
- void forgetColumnVector() {
- colVector = null;
- vector = null;
- }
+ /*
+ * Allocate the source conversion related arrays (optional).
+ */
+ private void allocateConvertArrays(int count) {
+ convertSourcePrimitiveObjectInspectors = new PrimitiveObjectInspector[count];
+ convertTargetWritables = new Writable[count];
}
- protected class BooleanAssigner extends AbstractLongAssigner {
-
- BooleanAssigner(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- BooleanWritable bw = (BooleanWritable) object;
- vector[batchIndex] = (bw.get() ? 1 : 0);
- colVector.isNull[batchIndex] = false;
+ /*
+ * Initialize one column's target related arrays.
+ */
+ private void initTargetEntry(int logicalColumnIndex, int projectionColumnNum, TypeInfo typeInfo) {
+ isConvert[logicalColumnIndex] = false;
+ projectionColumnNums[logicalColumnIndex] = projectionColumnNum;
+ Category category = typeInfo.getCategory();
+ targetCategories[logicalColumnIndex] = category;
+ if (category == Category.PRIMITIVE) {
+ PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
+ PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
+ targetPrimitiveCategories[logicalColumnIndex] = primitiveCategory;
+ switch (primitiveCategory) {
+ case CHAR:
+ maxLengths[logicalColumnIndex] = ((CharTypeInfo) primitiveTypeInfo).getLength();
+ break;
+ case VARCHAR:
+ maxLengths[logicalColumnIndex] = ((VarcharTypeInfo) primitiveTypeInfo).getLength();
+ break;
+ default:
+ // No additional data type specific setting.
+ break;
}
}
}
- protected class ByteAssigner extends AbstractLongAssigner {
-
- ByteAssigner(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- ByteWritable bw = (ByteWritable) object;
- vector[batchIndex] = bw.get();
- colVector.isNull[batchIndex] = false;
+ /*
+ * Initialize one column's source conversion related arrays.
+ * Assumes initTargetEntry has already been called.
+ */
+ private void initConvertSourceEntry(int logicalColumnIndex, TypeInfo convertSourceTypeInfo) {
+ isConvert[logicalColumnIndex] = true;
+ Category convertSourceCategory = convertSourceTypeInfo.getCategory();
+ if (convertSourceCategory == Category.PRIMITIVE) {
+ PrimitiveTypeInfo convertSourcePrimitiveTypeInfo = (PrimitiveTypeInfo) convertSourceTypeInfo;
+ convertSourcePrimitiveObjectInspectors[logicalColumnIndex] =
+ PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ convertSourcePrimitiveTypeInfo);
+
+ // These need to be based on the target.
+ PrimitiveCategory targetPrimitiveCategory = targetPrimitiveCategories[logicalColumnIndex];
+ switch (targetPrimitiveCategory) {
+ case DATE:
+ convertTargetWritables[logicalColumnIndex] = new DateWritable();
+ break;
+ case STRING:
+ convertTargetWritables[logicalColumnIndex] = new Text();
+ break;
+ default:
+ // No additional data type specific setting.
+ break;
}
}
}
- private class ShortAssigner extends AbstractLongAssigner {
+ /*
+ * Initialize using an StructObjectInspector and a column projection list.
+ */
+ public void init(StructObjectInspector structObjectInspector, List<Integer> projectedColumns)
+ throws HiveException {
- ShortAssigner(int columnIndex) {
- super(columnIndex);
- }
+ List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
+ final int count = fields.size();
+ allocateArrays(count);
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- ShortWritable sw = (ShortWritable) object;
- vector[batchIndex] = sw.get();
- colVector.isNull[batchIndex] = false;
- }
- }
- }
+ for (int i = 0; i < count; i++) {
- private class IntAssigner extends AbstractLongAssigner {
+ int projectionColumnNum = projectedColumns.get(i);
- IntAssigner(int columnIndex) {
- super(columnIndex);
- }
+ StructField field = fields.get(i);
+ ObjectInspector fieldInspector = field.getFieldObjectInspector();
+ TypeInfo typeInfo =
+ TypeInfoUtils.getTypeInfoFromTypeString(fieldInspector.getTypeName());
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- IntWritable iw = (IntWritable) object;
- vector[batchIndex] = iw.get();
- colVector.isNull[batchIndex] = false;
- }
+ initTargetEntry(i, projectionColumnNum, typeInfo);
}
}
- private class LongAssigner extends AbstractLongAssigner {
-
- LongAssigner(int columnIndex) {
- super(columnIndex);
- }
+ /*
+ * Initialize using an StructObjectInspector.
+ * No projection -- the column range 0 .. fields.size()-1
+ */
+ public void init(StructObjectInspector structObjectInspector) throws HiveException {
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- LongWritable lw = (LongWritable) object;
- vector[batchIndex] = lw.get();
- colVector.isNull[batchIndex] = false;
- }
- }
- }
+ List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
+ final int count = fields.size();
+ allocateArrays(count);
- private class DateAssigner extends AbstractLongAssigner {
+ for (int i = 0; i < count; i++) {
- DateAssigner(int columnIndex) {
- super(columnIndex);
- }
+ StructField field = fields.get(i);
+ ObjectInspector fieldInspector = field.getFieldObjectInspector();
+ TypeInfo typeInfo =
+ TypeInfoUtils.getTypeInfoFromTypeString(fieldInspector.getTypeName());
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- DateWritable bw = (DateWritable) object;
- vector[batchIndex] = bw.getDays();
- colVector.isNull[batchIndex] = false;
- }
+ initTargetEntry(i, i, typeInfo);
}
}
- private abstract class AbstractTimestampAssigner extends Assigner {
+ /*
+ * Initialize using target data type names.
+ * No projection -- the column range 0 .. types.size()-1
+ */
+ public void init(List<String> typeNames) throws HiveException {
- protected TimestampColumnVector colVector;
+ final int count = typeNames.size();
+ allocateArrays(count);
- AbstractTimestampAssigner(int columnIndex) {
- super(columnIndex);
- }
+ for (int i = 0; i < count; i++) {
- @Override
- void setColumnVector(VectorizedRowBatch batch) {
- colVector = (TimestampColumnVector) batch.cols[columnIndex];
- }
+ TypeInfo typeInfo =
+ TypeInfoUtils.getTypeInfoFromTypeString(typeNames.get(i));
- @Override
- void forgetColumnVector() {
- colVector = null;
+ initTargetEntry(i, i, typeInfo);
}
}
- private class TimestampAssigner extends AbstractTimestampAssigner {
+ /**
+ * Initialize for conversion from a provided (source) data types to the target data types
+ * desired in the VectorizedRowBatch.
+ *
+ * No projection -- the column range 0 .. count-1
+ *
+ * where count is the minimum of the target data type array size, included array size,
+ * and source data type array size.
+ *
+ * @param sourceTypeInfos
+ * @param targetTypeInfos
+ * @param columnsToIncludeTruncated
+ * Flag array indicating which columns are to be included.
+ * "Truncated" because all false entries on the end of the array have been
+ * eliminated.
+ * @return the minimum count described above is returned. That is, the number of columns
+ * that will be processed by assign.
+ */
+ public int initConversion(TypeInfo[] sourceTypeInfos, TypeInfo[] targetTypeInfos,
+ boolean[] columnsToIncludeTruncated) {
+
+ int targetColumnCount;
+ if (columnsToIncludeTruncated == null) {
+ targetColumnCount = targetTypeInfos.length;
+ } else {
+ targetColumnCount = Math.min(targetTypeInfos.length, columnsToIncludeTruncated.length);
+ }
+
+ int sourceColumnCount = Math.min(sourceTypeInfos.length, targetColumnCount);
+
+ allocateArrays(sourceColumnCount);
+ allocateConvertArrays(sourceColumnCount);
+
+ for (int i = 0; i < sourceColumnCount; i++) {
+
+ if (columnsToIncludeTruncated != null && !columnsToIncludeTruncated[i]) {
+
+ // Field not included in query.
- TimestampAssigner(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
} else {
- colVector.set(batchIndex, ((TimestampWritable) object).getTimestamp());
- colVector.isNull[batchIndex] = false;
- }
- }
- }
+ TypeInfo targetTypeInfo = targetTypeInfos[i];
- private class IntervalYearMonthAssigner extends AbstractLongAssigner {
+ if (targetTypeInfo.getCategory() != ObjectInspector.Category.PRIMITIVE) {
- IntervalYearMonthAssigner(int columnIndex) {
- super(columnIndex);
- }
+ // For now, we don't have an assigner for complex types...
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- HiveIntervalYearMonthWritable iymw = (HiveIntervalYearMonthWritable) object;
- HiveIntervalYearMonth iym = iymw.getHiveIntervalYearMonth();
- vector[batchIndex] = iym.getTotalMonths();
- colVector.isNull[batchIndex] = false;
- }
- }
- }
+ } else {
+ TypeInfo sourceTypeInfo = sourceTypeInfos[i];
- private abstract class AbstractIntervalDayTimeAssigner extends Assigner {
+ if (!sourceTypeInfo.equals(targetTypeInfo)) {
- protected IntervalDayTimeColumnVector colVector;
+ if (VectorPartitionConversion.isImplicitVectorColumnConversion(
+ sourceTypeInfo, targetTypeInfo)) {
- AbstractIntervalDayTimeAssigner(int columnIndex) {
- super(columnIndex);
- }
+ // Do implicit conversion accepting the source type and putting it in the same
+ // target type ColumnVector type.
+ initTargetEntry(i, i, sourceTypeInfo);
- @Override
- void setColumnVector(VectorizedRowBatch batch) {
- colVector = (IntervalDayTimeColumnVector) batch.cols[columnIndex];
- }
+ } else {
- @Override
- void forgetColumnVector() {
- colVector = null;
- }
- }
+ // Do formal conversion...
+ initTargetEntry(i, i, targetTypeInfo);
+ initConvertSourceEntry(i, sourceTypeInfo);
- private class IntervalDayTimeAssigner extends AbstractIntervalDayTimeAssigner {
+ }
+ } else {
- IntervalDayTimeAssigner(int columnIndex) {
- super(columnIndex);
- }
+ // No conversion.
+ initTargetEntry(i, i, targetTypeInfo);
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- HiveIntervalDayTimeWritable idtw = (HiveIntervalDayTimeWritable) object;
- HiveIntervalDayTime idt = idtw.getHiveIntervalDayTime();
- colVector.set(batchIndex, idt);
- colVector.isNull[batchIndex] = false;
+ }
+ }
}
}
- }
-
- private abstract class AbstractDoubleAssigner extends Assigner {
-
- protected DoubleColumnVector colVector;
- protected double[] vector;
-
- AbstractDoubleAssigner(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- void setColumnVector(VectorizedRowBatch batch) {
- colVector = (DoubleColumnVector) batch.cols[columnIndex];
- vector = colVector.vector;
- }
-
- @Override
- void forgetColumnVector() {
- colVector = null;
- vector = null;
- }
- }
-
- private class FloatAssigner extends AbstractDoubleAssigner {
-
- FloatAssigner(int columnIndex) {
- super(columnIndex);
- }
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- FloatWritable fw = (FloatWritable) object;
- vector[batchIndex] = fw.get();
- colVector.isNull[batchIndex] = false;
- }
- }
+ return sourceColumnCount;
}
- private class DoubleAssigner extends AbstractDoubleAssigner {
-
- DoubleAssigner(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- DoubleWritable dw = (DoubleWritable) object;
- vector[batchIndex] = dw.get();
- colVector.isNull[batchIndex] = false;
+ /**
+ * Assign a row's column object to the ColumnVector at batchIndex in the VectorizedRowBatch.
+ *
+ * @param batch
+ * @param batchIndex
+ * @param logicalColumnIndex
+ * @param object The row column object whose type is the target data type.
+ */
+ public void assignRowColumn(VectorizedRowBatch batch, int batchIndex, int logicalColumnIndex,
+ Object object) {
+ final int projectionColumnNum = projectionColumnNums[logicalColumnIndex];
+ if (object == null) {
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
+ }
+ Category targetCategory = targetCategories[logicalColumnIndex];
+ if (targetCategory == null) {
+ /*
+ * This is a column that we don't want (i.e. not included) -- we are done.
+ */
+ return;
+ }
+ switch (targetCategory) {
+ case PRIMITIVE:
+ {
+ PrimitiveCategory targetPrimitiveCategory = targetPrimitiveCategories[logicalColumnIndex];
+ switch (targetPrimitiveCategory) {
+ case VOID:
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
+ case BOOLEAN:
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ (((BooleanWritable) object).get() ? 1 : 0);
+ break;
+ case BYTE:
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ ((ByteWritable) object).get();
+ break;
+ case SHORT:
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ ((ShortWritable) object).get();
+ break;
+ case INT:
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ ((IntWritable) object).get();
+ break;
+ case LONG:
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ ((LongWritable) object).get();
+ break;
+ case TIMESTAMP:
+ ((TimestampColumnVector) batch.cols[projectionColumnNum]).set(
+ batchIndex, ((TimestampWritable) object).getTimestamp());
+ break;
+ case DATE:
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ ((DateWritable) object).getDays();
+ break;
+ case FLOAT:
+ ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ ((FloatWritable) object).get();
+ break;
+ case DOUBLE:
+ ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ ((DoubleWritable) object).get();
+ break;
+ case BINARY:
+ {
+ BytesWritable bw = (BytesWritable) object;
+ ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal(
+ batchIndex, bw.getBytes(), 0, bw.getLength());
+ }
+ break;
+ case STRING:
+ {
+ Text tw = (Text) object;
+ ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal(
+ batchIndex, tw.getBytes(), 0, tw.getLength());
+ }
+ break;
+ case VARCHAR:
+ {
+ // UNDONE: Performance problem with conversion to String, then bytes...
+
+ // We store VARCHAR type stripped of pads.
+ HiveVarchar hiveVarchar;
+ if (object instanceof HiveVarchar) {
+ hiveVarchar = (HiveVarchar) object;
+ } else {
+ hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar();
+ }
+
+ // TODO: HIVE-13624 Do we need maxLength checking?
+
+ byte[] bytes = hiveVarchar.getValue().getBytes();
+ ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal(
+ batchIndex, bytes, 0, bytes.length);
+ }
+ break;
+ case CHAR:
+ {
+ // UNDONE: Performance problem with conversion to String, then bytes...
+
+ // We store CHAR type stripped of pads.
+ HiveChar hiveChar;
+ if (object instanceof HiveChar) {
+ hiveChar = (HiveChar) object;
+ } else {
+ hiveChar = ((HiveCharWritable) object).getHiveChar();
+ }
+
+ // TODO: HIVE-13624 Do we need maxLength checking?
+
+ // We store CHAR in vector row batch with padding stripped.
+ byte[] bytes = hiveChar.getStrippedValue().getBytes();
+ ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal(
+ batchIndex, bytes, 0, bytes.length);
+ }
+ break;
+ case DECIMAL:
+ if (object instanceof HiveDecimal) {
+ ((DecimalColumnVector) batch.cols[projectionColumnNum]).set(
+ batchIndex, (HiveDecimal) object);
+ } else {
+ ((DecimalColumnVector) batch.cols[projectionColumnNum]).set(
+ batchIndex, (HiveDecimalWritable) object);
+ }
+ break;
+ case INTERVAL_YEAR_MONTH:
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ ((HiveIntervalYearMonthWritable) object).getHiveIntervalYearMonth().getTotalMonths();
+ break;
+ case INTERVAL_DAY_TIME:
+ ((IntervalDayTimeColumnVector) batch.cols[projectionColumnNum]).set(
+ batchIndex, ((HiveIntervalDayTimeWritable) object).getHiveIntervalDayTime());
+ break;
+ default:
+ throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() +
+ " not supported");
+ }
}
- }
- }
-
- private abstract class AbstractBytesAssigner extends Assigner {
-
- protected BytesColumnVector colVector;
-
- AbstractBytesAssigner(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- void setColumnVector(VectorizedRowBatch batch) {
- colVector = (BytesColumnVector) batch.cols[columnIndex];
- }
-
- @Override
- void forgetColumnVector() {
- colVector = null;
- }
- }
-
- private class BinaryAssigner extends AbstractBytesAssigner {
-
- BinaryAssigner(int columnIndex) {
- super(columnIndex);
+ break;
+ default:
+ throw new RuntimeException("Category " + targetCategory.name() + " not supported");
}
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- BytesWritable bw = (BytesWritable) object;
- colVector.setVal(batchIndex, bw.getBytes(), 0, bw.getLength());
- colVector.isNull[batchIndex] = false;
- }
- }
+ /*
+ * We always set the null flag to false when there is a value.
+ */
+ batch.cols[projectionColumnNum].isNull[batchIndex] = false;
}
- private class StringAssigner extends AbstractBytesAssigner {
-
- StringAssigner(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- Text tw = (Text) object;
- colVector.setVal(batchIndex, tw.getBytes(), 0, tw.getLength());
- colVector.isNull[batchIndex] = false;
+ /**
+ * Convert row's column object and then assign it the ColumnVector at batchIndex
+ * in the VectorizedRowBatch.
+ *
+ * Public so VectorDeserializeRow can use this method to convert a row's column object.
+ *
+ * @param batch
+ * @param batchIndex
+ * @param logicalColumnIndex
+ * @param object The row column object whose type is the VectorAssignRow.initConversion
+ * source data type.
+ *
+ */
+ public void assignConvertRowColumn(VectorizedRowBatch batch, int batchIndex,
+ int logicalColumnIndex, Object object) {
+ Preconditions.checkState(isConvert[logicalColumnIndex]);
+ final int projectionColumnNum = projectionColumnNums[logicalColumnIndex];
+ if (object == null) {
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
+ }
+ try {
+ Category targetCategory = targetCategories[logicalColumnIndex];
+ if (targetCategory == null) {
+ /*
+ * This is a column that we don't want (i.e. not included) -- we are done.
+ */
+ return;
}
- }
- }
-
- private class VarCharAssigner extends AbstractBytesAssigner {
-
- VarCharAssigner(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- // We store VARCHAR type stripped of pads.
- HiveVarchar hiveVarchar;
- if (object instanceof HiveVarchar) {
- hiveVarchar = (HiveVarchar) object;
- } else {
- hiveVarchar = ((HiveVarcharWritable) object).getHiveVarchar();
+ switch (targetCategory) {
+ case PRIMITIVE:
+ PrimitiveCategory targetPrimitiveCategory = targetPrimitiveCategories[logicalColumnIndex];
+ switch (targetPrimitiveCategory) {
+ case VOID:
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
+ case BOOLEAN:
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ (PrimitiveObjectInspectorUtils.getBoolean(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]) ? 1 : 0);
+ break;
+ case BYTE:
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ PrimitiveObjectInspectorUtils.getByte(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ break;
+ case SHORT:
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ PrimitiveObjectInspectorUtils.getShort(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ break;
+ case INT:
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ PrimitiveObjectInspectorUtils.getInt(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ break;
+ case LONG:
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ PrimitiveObjectInspectorUtils.getLong(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ break;
+ case TIMESTAMP:
+ {
+ Timestamp timestamp =
+ PrimitiveObjectInspectorUtils.getTimestamp(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ if (timestamp == null) {
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
+ }
+ ((TimestampColumnVector) batch.cols[projectionColumnNum]).set(
+ batchIndex, timestamp);
+ }
+ break;
+ case DATE:
+ {
+ Date date = PrimitiveObjectInspectorUtils.getDate(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ if (date == null) {
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
+ }
+ DateWritable dateWritable = (DateWritable) convertTargetWritables[logicalColumnIndex];
+ dateWritable.set(date);
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ dateWritable.getDays();
+ }
+ break;
+ case FLOAT:
+ ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ PrimitiveObjectInspectorUtils.getFloat(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ break;
+ case DOUBLE:
+ ((DoubleColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ PrimitiveObjectInspectorUtils.getDouble(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ break;
+ case BINARY:
+ {
+ BytesWritable bytesWritable =
+ PrimitiveObjectInspectorUtils.getBinary(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ if (bytesWritable == null) {
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
+ }
+ ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal(
+ batchIndex, bytesWritable.getBytes(), 0, bytesWritable.getLength());
+ }
+ break;
+ case STRING:
+ {
+ String string = PrimitiveObjectInspectorUtils.getString(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ if (string == null) {
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
+ }
+ Text text = (Text) convertTargetWritables[logicalColumnIndex];
+ text.set(string);
+ ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal(
+ batchIndex, text.getBytes(), 0, text.getLength());
+ }
+ break;
+ case VARCHAR:
+ {
+ // UNDONE: Performance problem with conversion to String, then bytes...
+
+ HiveVarchar hiveVarchar =
+ PrimitiveObjectInspectorUtils.getHiveVarchar(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ if (hiveVarchar == null) {
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
+ }
+
+ // TODO: Do we need maxLength checking?
+
+ byte[] bytes = hiveVarchar.getValue().getBytes();
+ ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal(
+ batchIndex, bytes, 0, bytes.length);
+ }
+ break;
+ case CHAR:
+ {
+ // UNDONE: Performance problem with conversion to String, then bytes...
+
+ HiveChar hiveChar =
+ PrimitiveObjectInspectorUtils.getHiveChar(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ if (hiveChar == null) {
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
+ }
+ // We store CHAR in vector row batch with padding stripped.
+
+ // TODO: Do we need maxLength checking?
+
+ byte[] bytes = hiveChar.getStrippedValue().getBytes();
+ ((BytesColumnVector) batch.cols[projectionColumnNum]).setVal(
+ batchIndex, bytes, 0, bytes.length);
+ }
+ break;
+ case DECIMAL:
+ {
+ HiveDecimal hiveDecimal =
+ PrimitiveObjectInspectorUtils.getHiveDecimal(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ if (hiveDecimal == null) {
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
+ }
+ ((DecimalColumnVector) batch.cols[projectionColumnNum]).set(
+ batchIndex, hiveDecimal);
+ }
+ break;
+ case INTERVAL_YEAR_MONTH:
+ {
+ HiveIntervalYearMonth intervalYearMonth =
+ PrimitiveObjectInspectorUtils.getHiveIntervalYearMonth(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ if (intervalYearMonth == null) {
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
+ }
+ ((LongColumnVector) batch.cols[projectionColumnNum]).vector[batchIndex] =
+ intervalYearMonth.getTotalMonths();
+ }
+ break;
+ case INTERVAL_DAY_TIME:
+ {
+ HiveIntervalDayTime intervalDayTime =
+ PrimitiveObjectInspectorUtils.getHiveIntervalDayTime(
+ object, convertSourcePrimitiveObjectInspectors[logicalColumnIndex]);
+ if (intervalDayTime == null) {
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
+ }
+ ((IntervalDayTimeColumnVector) batch.cols[projectionColumnNum]).set(
+ batchIndex, intervalDayTime);
+ }
+ break;
+ default:
+ throw new RuntimeException("Primitive category " + targetPrimitiveCategory.name() +
+ " not supported");
}
- byte[] bytes = hiveVarchar.getValue().getBytes();
- colVector.setVal(batchIndex, bytes, 0, bytes.length);
- colVector.isNull[batchIndex] = false;
+ break;
+ default:
+ throw new RuntimeException("Category " + targetCategory.name() + " not supported");
}
- }
- }
-
- private class CharAssigner extends AbstractBytesAssigner {
+ } catch (NumberFormatException e) {
- CharAssigner(int columnIndex) {
- super(columnIndex);
+ // Some of the conversion methods throw this exception on numeric parsing errors.
+ VectorizedBatchUtil.setNullColIsNullValue(batch.cols[projectionColumnNum], batchIndex);
+ return;
}
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
- } else {
- // We store CHAR type stripped of pads.
- HiveChar hiveChar;
- if (object instanceof HiveChar) {
- hiveChar = (HiveChar) object;
- } else {
- hiveChar = ((HiveCharWritable) object).getHiveChar();
- }
-
- // We store CHAR in vector row batch with padding stripped.
- byte[] bytes = hiveChar.getStrippedValue().getBytes();
- colVector.setVal(batchIndex, bytes, 0, bytes.length);
- colVector.isNull[batchIndex] = false;
- }
- }
+ // We always set the null flag to false when there is a value.
+ batch.cols[projectionColumnNum].isNull[batchIndex] = false;
}
- private class DecimalAssigner extends Assigner {
-
- protected DecimalColumnVector colVector;
-
- DecimalAssigner(int columnIndex) {
- super(columnIndex);
- }
-
- @Override
- void setColumnVector(VectorizedRowBatch batch) {
- colVector = (DecimalColumnVector) batch.cols[columnIndex];
- }
-
- @Override
- void forgetColumnVector() {
- colVector = null;
- }
-
- @Override
- void assign(int batchIndex, Object object) {
- if (object == null) {
- VectorizedBatchUtil.setNullColIsNullValue(colVector, batchIndex);
+ /*
+ * Assign a row from an array of objects.
+ */
+ public void assignRow(VectorizedRowBatch batch, int batchIndex, Object[] objects) {
+ final int count = isConvert.length;
+ for (int i = 0; i < count; i++) {
+ if (isConvert[i]) {
+ assignConvertRowColumn(batch, batchIndex, i, objects[i]);
} else {
- if (object instanceof HiveDecimal) {
- colVector.set(batchIndex, (HiveDecimal) object);
- } else {
- colVector.set(batchIndex, (HiveDecimalWritable) object);
- }
- colVector.isNull[batchIndex] = false;
+ assignRowColumn(batch, batchIndex, i, objects[i]);
}
}
}
- private Assigner createAssigner(PrimitiveTypeInfo primitiveTypeInfo, int columnIndex) throws HiveException {
- PrimitiveCategory primitiveCategory = primitiveTypeInfo.getPrimitiveCategory();
- Assigner assigner;
- switch (primitiveCategory) {
- case VOID:
- assigner = new VoidAssigner(columnIndex);
- break;
- case BOOLEAN:
- assigner = new BooleanAssigner(columnIndex);
- break;
- case BYTE:
- assigner = new ByteAssigner(columnIndex);
- break;
- case SHORT:
- assigner = new ShortAssigner(columnIndex);
- break;
- case INT:
- assigner = new IntAssigner(columnIndex);
- break;
- case LONG:
- assigner = new LongAssigner(columnIndex);
- break;
- case TIMESTAMP:
- assigner = new TimestampAssigner(columnIndex);
- break;
- case DATE:
- assigner = new DateAssigner(columnIndex);
- break;
- case FLOAT:
- assigner = new FloatAssigner(columnIndex);
- break;
- case DOUBLE:
- assigner = new DoubleAssigner(columnIndex);
- break;
- case BINARY:
- assigner = new BinaryAssigner(columnIndex);
- break;
- case STRING:
- assigner = new StringAssigner(columnIndex);
- break;
- case VARCHAR:
- assigner = new VarCharAssigner(columnIndex);
- break;
- case CHAR:
- assigner = new CharAssigner(columnIndex);
- break;
- case DECIMAL:
- assigner = new DecimalAssigner(columnIndex);
- break;
- case INTERVAL_YEAR_MONTH:
- assigner = new IntervalYearMonthAssigner(columnIndex);
- break;
- case INTERVAL_DAY_TIME:
- assigner = new IntervalDayTimeAssigner(columnIndex);
- break;
- default:
- throw new HiveException("No vector row assigner for primitive category " +
- primitiveCategory);
- }
- return assigner;
- }
-
- Assigner[] assigners;
-
- public void init(StructObjectInspector structObjectInspector, List<Integer> projectedColumns) throws HiveException {
-
- List<? extends StructField> fields = structObjectInspector.getAllStructFieldRefs();
- assigners = new Assigner[fields.size()];
-
- int i = 0;
- for (StructField field : fields) {
- int columnIndex = projectedColumns.get(i);
- ObjectInspector fieldInspector = field.getFieldObjectInspector();
- PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(
- fieldInspector.getTypeName());
- assigners[i] = createAssigner(primitiveTypeInfo, columnIndex);
- i++;
- }
- }
-
- public void init(List<String> typeNames) throws HiveException {
-
- assigners = new Assigner[typeNames.size()];
-
- int i = 0;
- for (String typeName : typeNames) {
- PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) TypeInfoUtils.getTypeInfoFromTypeString(typeName);
- assigners[i] = createAssigner(primitiveTypeInfo, i);
- i++;
- }
- }
+ /*
+ * Assign a row from a list of standard objects up to a count
+ */
+ public void assignRow(VectorizedRowBatch batch, int batchIndex,
+ List<Object> standardObjects, int columnCount) {
- protected void setBatch(VectorizedRowBatch batch) throws HiveException {
- for (int i = 0; i < assigners.length; i++) {
- Assigner assigner = assigners[i];
- int columnIndex = assigner.getColumnIndex();
- if (batch.cols[columnIndex] == null) {
- throw new HiveException("Unexpected null vector column " + columnIndex);
+ for (int i = 0; i < columnCount; i++) {
+ if (isConvert[i]) {
+ assignConvertRowColumn(batch, batchIndex, i, standardObjects.get(i));
+ } else {
+ assignRowColumn(batch, batchIndex, i, standardObjects.get(i));
}
- assigner.setColumnVector(batch);
- }
- }
-
- protected void forgetBatch() {
- for (Assigner assigner : assigners) {
- assigner.forgetColumnVector();
}
}
-
- public void assignRowColumn(int batchIndex, int logicalColumnIndex, Object object) {
- assigners[logicalColumnIndex].assign(batchIndex, object);
- }
-
- public void assignRow(int batchIndex, Object[] objects) {
- int i = 0;
- for (Assigner assigner : assigners) {
- assigner.assign(batchIndex, objects[i++]);
- }
- }
-
-}
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java
deleted file mode 100644
index a696825..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowDynBatch.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector;
-
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-
-/**
- * This class assigns specified columns of a VectorizedRowBatch row from a Writable row Object[].
- *
- * The caller provides the hive type names and target column numbers in the order desired to
- * assign from the Writable row Object[].
- *
- * This class is for use when the batch being assigned may change each time before processOp
- * is called.
- */
-public class VectorAssignRowDynBatch extends VectorAssignRow {
-
- public void setBatchOnEntry(VectorizedRowBatch batch) throws HiveException {
- setBatch(batch);
- }
-
- public void forgetBatchOnExit() {
- forgetBatch();
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/d5285d8e/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java
deleted file mode 100644
index 8c7c2ad..0000000
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRowSameBatch.java
+++ /dev/null
@@ -1,36 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.exec.vector;
-
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-
-/**
- * This class assigns specified columns of a VectorizedRowBatch row from a Writable row Object[].
- *
- * The caller provides the hive type names and target column numbers in the order desired to
- * assign from the Writable row Object[].
- *
- * This class is for use when the batch being assigned is always the same.
- */
-public class VectorAssignRowSameBatch extends VectorAssignRow {
-
- public void setOneBatch(VectorizedRowBatch batch) throws HiveException {
- setBatch(batch);
- }
-}
\ No newline at end of file