You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ga...@apache.org on 2014/10/22 19:07:00 UTC
svn commit: r1633652 - in /hive/trunk/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/
java/org/apache/hadoop/hive/ql/io/orc/ test/queries/clientpositive/
test/results/clientpositive/
Author: gates
Date: Wed Oct 22 17:07:00 2014
New Revision: 1633652
URL: http://svn.apache.org/r1633652
Log:
HIVE-8474 Vectorized reads of transactional tables fail when not all columns are selected (Alan Gates, reviewed by Ashutosh Chauhan)
Added:
hive/trunk/ql/src/test/queries/clientpositive/acid_vectorization_partition.q
hive/trunk/ql/src/test/queries/clientpositive/acid_vectorization_project.q
hive/trunk/ql/src/test/results/clientpositive/acid_vectorization_partition.q.out
hive/trunk/ql/src/test/results/clientpositive/acid_vectorization_project.q.out
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java?rev=1633652&r1=1633651&r2=1633652&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java Wed Oct 22 17:07:00 2014
@@ -23,6 +23,8 @@ import java.sql.Timestamp;
import java.util.LinkedList;
import java.util.List;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr;
@@ -50,6 +52,7 @@ import org.apache.hadoop.io.LongWritable
import org.apache.hadoop.io.Text;
public class VectorizedBatchUtil {
+ private static final Log LOG = LogFactory.getLog(VectorizedBatchUtil.class);
/**
* Sets the IsNull value for ColumnVector at specified index
@@ -232,169 +235,237 @@ public class VectorizedBatchUtil {
final int off = colOffset;
// Iterate thru the cols and load the batch
for (int i = 0; i < fieldRefs.size(); i++) {
- Object fieldData = oi.getStructFieldData(row, fieldRefs.get(i));
- ObjectInspector foi = fieldRefs.get(i).getFieldObjectInspector();
-
- // Vectorization only supports PRIMITIVE data types. Assert the same
- assert (foi.getCategory() == Category.PRIMITIVE);
+ setVector(row, oi, fieldRefs, batch, buffer, rowIndex, i, off);
+ }
+ }
- // Get writable object
- PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
- Object writableCol = poi.getPrimitiveWritableObject(fieldData);
-
- // NOTE: The default value for null fields in vectorization is 1 for int types, NaN for
- // float/double. String types have no default value for null.
- switch (poi.getPrimitiveCategory()) {
- case BOOLEAN: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
- if (writableCol != null) {
- lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
- lcv.isNull[rowIndex] = false;
- } else {
- lcv.vector[rowIndex] = 1;
- setNullColIsNullValue(lcv, rowIndex);
- }
+ /**
+ * Iterates thru all the columns in a given row and populates the batch
+ * from a given offset
+ *
+ * @param row Deserialized row object
+ * @param oi Object insepector for that row
+ * @param rowIndex index to which the row should be added to batch
+ * @param batch Vectorized batch to which the row is added at rowIndex
+ * @param context context object for this vectorized batch
+ * @param buffer
+ * @throws HiveException
+ */
+ public static void acidAddRowToBatch(Object row,
+ StructObjectInspector oi,
+ int rowIndex,
+ VectorizedRowBatch batch,
+ VectorizedRowBatchCtx context,
+ DataOutputBuffer buffer) throws HiveException {
+ List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
+ // Iterate thru the cols and load the batch
+ for (int i = 0; i < fieldRefs.size(); i++) {
+ if (batch.cols[i] == null) {
+ // This means the column was not included in the projection from the underlying read
+ continue;
+ }
+ if (context.isPartitionCol(i)) {
+ // The value will have already been set before we're called, so don't overwrite it
+ continue;
}
- break;
- case BYTE: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
- if (writableCol != null) {
- lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
- lcv.isNull[rowIndex] = false;
- } else {
- lcv.vector[rowIndex] = 1;
- setNullColIsNullValue(lcv, rowIndex);
- }
+ setVector(row, oi, fieldRefs, batch, buffer, rowIndex, i, 0);
+ }
+ }
+
+ private static void setVector(Object row,
+ StructObjectInspector oi,
+ List<? extends StructField> fieldRefs,
+ VectorizedRowBatch batch,
+ DataOutputBuffer buffer,
+ int rowIndex,
+ int colIndex,
+ int offset) throws HiveException {
+
+ Object fieldData = oi.getStructFieldData(row, fieldRefs.get(colIndex));
+ ObjectInspector foi = fieldRefs.get(colIndex).getFieldObjectInspector();
+
+ // Vectorization only supports PRIMITIVE data types. Assert the same
+ assert (foi.getCategory() == Category.PRIMITIVE);
+
+ // Get writable object
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
+ Object writableCol = poi.getPrimitiveWritableObject(fieldData);
+
+ // NOTE: The default value for null fields in vectorization is 1 for int types, NaN for
+ // float/double. String types have no default value for null.
+ switch (poi.getPrimitiveCategory()) {
+ case BOOLEAN: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+ if (writableCol != null) {
+ lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
+ lcv.isNull[rowIndex] = false;
+ } else {
+ lcv.vector[rowIndex] = 1;
+ setNullColIsNullValue(lcv, rowIndex);
}
- break;
- case SHORT: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
- if (writableCol != null) {
- lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
- lcv.isNull[rowIndex] = false;
- } else {
- lcv.vector[rowIndex] = 1;
- setNullColIsNullValue(lcv, rowIndex);
- }
+ }
+ break;
+ case BYTE: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+ if (writableCol != null) {
+ lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
+ lcv.isNull[rowIndex] = false;
+ } else {
+ lcv.vector[rowIndex] = 1;
+ setNullColIsNullValue(lcv, rowIndex);
}
- break;
- case INT: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
- if (writableCol != null) {
- lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
- lcv.isNull[rowIndex] = false;
- } else {
- lcv.vector[rowIndex] = 1;
- setNullColIsNullValue(lcv, rowIndex);
- }
+ }
+ break;
+ case SHORT: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+ if (writableCol != null) {
+ lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
+ lcv.isNull[rowIndex] = false;
+ } else {
+ lcv.vector[rowIndex] = 1;
+ setNullColIsNullValue(lcv, rowIndex);
}
- break;
- case LONG: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
- if (writableCol != null) {
- lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
- lcv.isNull[rowIndex] = false;
- } else {
- lcv.vector[rowIndex] = 1;
- setNullColIsNullValue(lcv, rowIndex);
- }
+ }
+ break;
+ case INT: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+ if (writableCol != null) {
+ lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
+ lcv.isNull[rowIndex] = false;
+ } else {
+ lcv.vector[rowIndex] = 1;
+ setNullColIsNullValue(lcv, rowIndex);
}
- break;
- case DATE: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
- if (writableCol != null) {
- lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays();
- lcv.isNull[rowIndex] = false;
- } else {
- lcv.vector[rowIndex] = 1;
- setNullColIsNullValue(lcv, rowIndex);
- }
+ }
+ break;
+ case LONG: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+ if (writableCol != null) {
+ lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
+ lcv.isNull[rowIndex] = false;
+ } else {
+ lcv.vector[rowIndex] = 1;
+ setNullColIsNullValue(lcv, rowIndex);
}
- break;
- case FLOAT: {
- DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off + i];
- if (writableCol != null) {
- dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
- dcv.isNull[rowIndex] = false;
- } else {
- dcv.vector[rowIndex] = Double.NaN;
- setNullColIsNullValue(dcv, rowIndex);
- }
+ }
+ break;
+ case DATE: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+ if (writableCol != null) {
+ lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays();
+ lcv.isNull[rowIndex] = false;
+ } else {
+ lcv.vector[rowIndex] = 1;
+ setNullColIsNullValue(lcv, rowIndex);
}
- break;
- case DOUBLE: {
- DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off + i];
- if (writableCol != null) {
- dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
- dcv.isNull[rowIndex] = false;
- } else {
- dcv.vector[rowIndex] = Double.NaN;
- setNullColIsNullValue(dcv, rowIndex);
- }
+ }
+ break;
+ case FLOAT: {
+ DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
+ if (writableCol != null) {
+ dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
+ dcv.isNull[rowIndex] = false;
+ } else {
+ dcv.vector[rowIndex] = Double.NaN;
+ setNullColIsNullValue(dcv, rowIndex);
}
- break;
- case TIMESTAMP: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
- if (writableCol != null) {
- Timestamp t = ((TimestampWritable) writableCol).getTimestamp();
- lcv.vector[rowIndex] = TimestampUtils.getTimeNanoSec(t);
- lcv.isNull[rowIndex] = false;
- } else {
- lcv.vector[rowIndex] = 1;
- setNullColIsNullValue(lcv, rowIndex);
- }
+ }
+ break;
+ case DOUBLE: {
+ DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[offset + colIndex];
+ if (writableCol != null) {
+ dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
+ dcv.isNull[rowIndex] = false;
+ } else {
+ dcv.vector[rowIndex] = Double.NaN;
+ setNullColIsNullValue(dcv, rowIndex);
}
- break;
- case BINARY: {
- BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
- if (writableCol != null) {
- bcv.isNull[rowIndex] = false;
- BytesWritable bw = (BytesWritable) writableCol;
- byte[] bytes = bw.getBytes();
- int start = buffer.getLength();
- int length = bytes.length;
- try {
- buffer.write(bytes, 0, length);
- } catch (IOException ioe) {
- throw new IllegalStateException("bad write", ioe);
- }
- bcv.setRef(rowIndex, buffer.getData(), start, length);
- } else {
- setNullColIsNullValue(bcv, rowIndex);
- }
+ }
+ break;
+ case TIMESTAMP: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[offset + colIndex];
+ if (writableCol != null) {
+ Timestamp t = ((TimestampWritable) writableCol).getTimestamp();
+ lcv.vector[rowIndex] = TimestampUtils.getTimeNanoSec(t);
+ lcv.isNull[rowIndex] = false;
+ } else {
+ lcv.vector[rowIndex] = 1;
+ setNullColIsNullValue(lcv, rowIndex);
}
- break;
- case STRING: {
- BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
- if (writableCol != null) {
+ }
+ break;
+ case BINARY: {
+ BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
+ if (writableCol != null) {
bcv.isNull[rowIndex] = false;
- Text colText = (Text) writableCol;
+ BytesWritable bw = (BytesWritable) writableCol;
+ byte[] bytes = bw.getBytes();
int start = buffer.getLength();
- int length = colText.getLength();
+ int length = bytes.length;
try {
- buffer.write(colText.getBytes(), 0, length);
+ buffer.write(bytes, 0, length);
} catch (IOException ioe) {
throw new IllegalStateException("bad write", ioe);
}
bcv.setRef(rowIndex, buffer.getData(), start, length);
- } else {
- setNullColIsNullValue(bcv, rowIndex);
- }
+ } else {
+ setNullColIsNullValue(bcv, rowIndex);
}
- break;
- case CHAR: {
- BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
+ }
+ break;
+ case STRING: {
+ BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
+ if (writableCol != null) {
+ bcv.isNull[rowIndex] = false;
+ Text colText = (Text) writableCol;
+ int start = buffer.getLength();
+ int length = colText.getLength();
+ try {
+ buffer.write(colText.getBytes(), 0, length);
+ } catch (IOException ioe) {
+ throw new IllegalStateException("bad write", ioe);
+ }
+ bcv.setRef(rowIndex, buffer.getData(), start, length);
+ } else {
+ setNullColIsNullValue(bcv, rowIndex);
+ }
+ }
+ break;
+ case CHAR: {
+ BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
+ if (writableCol != null) {
+ bcv.isNull[rowIndex] = false;
+ HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar();
+ byte[] bytes = colHiveChar.getStrippedValue().getBytes();
+
+ // We assume the CHAR maximum length was enforced when the object was created.
+ int length = bytes.length;
+
+ int start = buffer.getLength();
+ try {
+ // In vector mode, we store CHAR as unpadded.
+ buffer.write(bytes, 0, length);
+ } catch (IOException ioe) {
+ throw new IllegalStateException("bad write", ioe);
+ }
+ bcv.setRef(rowIndex, buffer.getData(), start, length);
+ } else {
+ setNullColIsNullValue(bcv, rowIndex);
+ }
+ }
+ break;
+ case VARCHAR: {
+ BytesColumnVector bcv = (BytesColumnVector) batch.cols[offset + colIndex];
if (writableCol != null) {
bcv.isNull[rowIndex] = false;
- HiveChar colHiveChar = ((HiveCharWritable) writableCol).getHiveChar();
- byte[] bytes = colHiveChar.getStrippedValue().getBytes();
-
- // We assume the CHAR maximum length was enforced when the object was created.
+ HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar();
+ byte[] bytes = colHiveVarchar.getValue().getBytes();
+
+ // We assume the VARCHAR maximum length was enforced when the object was created.
int length = bytes.length;
int start = buffer.getLength();
try {
- // In vector mode, we store CHAR as unpadded.
buffer.write(bytes, 0, length);
} catch (IOException ioe) {
throw new IllegalStateException("bad write", ioe);
@@ -405,45 +476,21 @@ public class VectorizedBatchUtil {
}
}
break;
- case VARCHAR: {
- BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
- if (writableCol != null) {
- bcv.isNull[rowIndex] = false;
- HiveVarchar colHiveVarchar = ((HiveVarcharWritable) writableCol).getHiveVarchar();
- byte[] bytes = colHiveVarchar.getValue().getBytes();
-
- // We assume the VARCHAR maximum length was enforced when the object was created.
- int length = bytes.length;
-
- int start = buffer.getLength();
- try {
- buffer.write(bytes, 0, length);
- } catch (IOException ioe) {
- throw new IllegalStateException("bad write", ioe);
- }
- bcv.setRef(rowIndex, buffer.getData(), start, length);
- } else {
- setNullColIsNullValue(bcv, rowIndex);
- }
- }
- break;
- case DECIMAL:
- DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[off + i];
- if (writableCol != null) {
- dcv.isNull[rowIndex] = false;
- HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol;
- dcv.vector[rowIndex].update(wobj.getHiveDecimal().unscaledValue(),
- (short) wobj.getScale());
- } else {
- setNullColIsNullValue(dcv, rowIndex);
- }
- break;
- default:
- throw new HiveException("Vectorizaton is not supported for datatype:"
- + poi.getPrimitiveCategory());
- }
+ case DECIMAL:
+ DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[offset + colIndex];
+ if (writableCol != null) {
+ dcv.isNull[rowIndex] = false;
+ HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol;
+ dcv.vector[rowIndex].update(wobj.getHiveDecimal().unscaledValue(),
+ (short) wobj.getScale());
+ } else {
+ setNullColIsNullValue(dcv, rowIndex);
+ }
+ break;
+ default:
+ throw new HiveException("Vectorizaton is not supported for datatype:" +
+ poi.getPrimitiveCategory());
}
}
-
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1633652&r1=1633651&r2=1633652&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java Wed Oct 22 17:07:00 2014
@@ -22,10 +22,12 @@ import java.sql.Date;
import java.sql.Timestamp;
import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
+import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -84,7 +86,11 @@ public class VectorizedRowBatchCtx {
private Map<String, Object> partitionValues;
//partition types
- private Map<String, PrimitiveCategory> partitionTypes;
+ private Map<String, PrimitiveCategory> partitionTypes;
+
+ // partition column positions, for use by classes that need to know whether a given column is a
+ // partition column
+ private Set<Integer> partitionCols;
// Column projection list - List of column indexes to include. This
// list does not contain partition columns
@@ -203,12 +209,13 @@ public class VectorizedRowBatchCtx {
// Check to see if this split is part of a partition of a table
String pcols = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMNS);
+ String[] partKeys = null;
if (pcols != null && pcols.length() > 0) {
// Partitions exist for this table. Get the partition object inspector and
// raw row object inspector (row with out partition col)
LinkedHashMap<String, String> partSpec = part.getPartSpec();
- String[] partKeys = pcols.trim().split("/");
+ partKeys = pcols.trim().split("/");
String pcolTypes = partProps.getProperty(hive_metastoreConstants.META_TABLE_PARTITION_COLUMN_TYPES);
String[] partKeyTypes = pcolTypes.trim().split(":");
@@ -262,6 +269,15 @@ public class VectorizedRowBatchCtx {
.asList(new StructObjectInspector[] {partRawRowObjectInspector, partObjectInspector}));
rowOI = rowObjectInspector;
rawRowOI = partRawRowObjectInspector;
+
+ // We have to do this after we've set rowOI, as getColIndexBasedOnColName uses it
+ partitionCols = new HashSet<Integer>();
+ if (pcols != null && pcols.length() > 0) {
+ for (int i = 0; i < partKeys.length; i++) {
+ partitionCols.add(getColIndexBasedOnColName(partKeys[i]));
+ }
+ }
+
} else {
// No partitions for this table, hence row OI equals raw row OI
@@ -586,6 +602,16 @@ public class VectorizedRowBatchCtx {
}
}
+ /**
+ * Determine whether a given column is a partition column
+ * @param colnum column number in
+ * {@link org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch}s created by this context.
+ * @return true if it is a partition column, false otherwise
+ */
+ public final boolean isPartitionCol(int colnum) {
+ return (partitionCols == null) ? false : partitionCols.contains(colnum);
+ }
+
private void addScratchColumnsToBatch(VectorizedRowBatch vrb) throws HiveException {
if (columnTypeMap != null && !columnTypeMap.isEmpty()) {
int origNumCols = vrb.numCols;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java?rev=1633652&r1=1633651&r2=1633652&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/VectorizedOrcAcidRowReader.java Wed Oct 22 17:07:00 2014
@@ -48,7 +48,6 @@ class VectorizedOrcAcidRowReader
private final OrcStruct value;
private final VectorizedRowBatchCtx rowBatchCtx;
private final ObjectInspector objectInspector;
- private boolean needToSetPartition = true;
private final DataOutputBuffer buffer = new DataOutputBuffer();
VectorizedOrcAcidRowReader(AcidInputFormat.RowReader<OrcStruct> inner,
@@ -83,23 +82,20 @@ class VectorizedOrcAcidRowReader
if (!innerReader.next(key, value)) {
return false;
}
- if (needToSetPartition) {
- try {
- rowBatchCtx.addPartitionColsToBatch(vectorizedRowBatch);
- } catch (HiveException e) {
- throw new IOException("Problem adding partition column", e);
- }
- needToSetPartition = false;
+ try {
+ rowBatchCtx.addPartitionColsToBatch(vectorizedRowBatch);
+ } catch (HiveException e) {
+ throw new IOException("Problem adding partition column", e);
}
try {
- VectorizedBatchUtil.addRowToBatch(value,
+ VectorizedBatchUtil.acidAddRowToBatch(value,
(StructObjectInspector) objectInspector,
- vectorizedRowBatch.size++, vectorizedRowBatch, buffer);
+ vectorizedRowBatch.size++, vectorizedRowBatch, rowBatchCtx, buffer);
while (vectorizedRowBatch.size < vectorizedRowBatch.selected.length &&
innerReader.next(key, value)) {
- VectorizedBatchUtil.addRowToBatch(value,
+ VectorizedBatchUtil.acidAddRowToBatch(value,
(StructObjectInspector) objectInspector,
- vectorizedRowBatch.size++, vectorizedRowBatch, buffer);
+ vectorizedRowBatch.size++, vectorizedRowBatch, rowBatchCtx, buffer);
}
} catch (HiveException he) {
throw new IOException("error iterating", he);
Added: hive/trunk/ql/src/test/queries/clientpositive/acid_vectorization_partition.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/acid_vectorization_partition.q?rev=1633652&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/acid_vectorization_partition.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/acid_vectorization_partition.q Wed Oct 22 17:07:00 2014
@@ -0,0 +1,10 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.enforce.bucketing=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+CREATE TABLE acid_vectorized_part(a INT, b STRING) partitioned by (ds string) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true');
+insert into table acid_vectorized_part partition (ds = 'today') select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10;
+insert into table acid_vectorized_part partition (ds = 'tomorrow') select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10;
+set hive.vectorized.execution.enabled=true;
+select * from acid_vectorized_part order by a, b;
Added: hive/trunk/ql/src/test/queries/clientpositive/acid_vectorization_project.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/acid_vectorization_project.q?rev=1633652&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/acid_vectorization_project.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/acid_vectorization_project.q Wed Oct 22 17:07:00 2014
@@ -0,0 +1,11 @@
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.enforce.bucketing=true;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+CREATE TABLE acid_vectorized(a INT, b STRING, c float) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true');
+insert into table acid_vectorized select cint, cstring1, cfloat from alltypesorc where cint is not null order by cint limit 10;
+set hive.vectorized.execution.enabled=true;
+select a,b from acid_vectorized order by a;
+select a,c from acid_vectorized order by a;
+select b,c from acid_vectorized order by b;
Added: hive/trunk/ql/src/test/results/clientpositive/acid_vectorization_partition.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/acid_vectorization_partition.q.out?rev=1633652&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/acid_vectorization_partition.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/acid_vectorization_partition.q.out Wed Oct 22 17:07:00 2014
@@ -0,0 +1,60 @@
+PREHOOK: query: CREATE TABLE acid_vectorized_part(a INT, b STRING) partitioned by (ds string) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acid_vectorized_part
+POSTHOOK: query: CREATE TABLE acid_vectorized_part(a INT, b STRING) partitioned by (ds string) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acid_vectorized_part
+PREHOOK: query: insert into table acid_vectorized_part partition (ds = 'today') select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@acid_vectorized_part@ds=today
+POSTHOOK: query: insert into table acid_vectorized_part partition (ds = 'today') select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@acid_vectorized_part@ds=today
+POSTHOOK: Lineage: acid_vectorized_part PARTITION(ds=today).a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: acid_vectorized_part PARTITION(ds=today).b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+PREHOOK: query: insert into table acid_vectorized_part partition (ds = 'tomorrow') select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@acid_vectorized_part@ds=tomorrow
+POSTHOOK: query: insert into table acid_vectorized_part partition (ds = 'tomorrow') select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@acid_vectorized_part@ds=tomorrow
+POSTHOOK: Lineage: acid_vectorized_part PARTITION(ds=tomorrow).a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: acid_vectorized_part PARTITION(ds=tomorrow).b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+PREHOOK: query: select * from acid_vectorized_part order by a, b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized_part
+PREHOOK: Input: default@acid_vectorized_part@ds=today
+PREHOOK: Input: default@acid_vectorized_part@ds=tomorrow
+#### A masked pattern was here ####
+POSTHOOK: query: select * from acid_vectorized_part order by a, b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized_part
+POSTHOOK: Input: default@acid_vectorized_part@ds=today
+POSTHOOK: Input: default@acid_vectorized_part@ds=tomorrow
+#### A masked pattern was here ####
+-1073279343 oj1YrV5Wa today
+-1073279343 oj1YrV5Wa tomorrow
+-1073051226 A34p7oRr2WvUJNf tomorrow
+-1073051226 A34p7oRr2WvUJNf today
+-1072910839 0iqrc5 tomorrow
+-1072910839 0iqrc5 today
+-1072081801 dPkN74F7 today
+-1072081801 dPkN74F7 tomorrow
+-1072076362 2uLyD28144vklju213J1mr today
+-1072076362 2uLyD28144vklju213J1mr tomorrow
+-1071480828 aw724t8c5558x2xneC624 tomorrow
+-1071480828 aw724t8c5558x2xneC624 today
+-1071363017 Anj0oF today
+-1071363017 Anj0oF tomorrow
+-1070883071 0ruyd6Y50JpdGRf6HqD tomorrow
+-1070883071 0ruyd6Y50JpdGRf6HqD today
+-1070551679 iUR3Q today
+-1070551679 iUR3Q tomorrow
+-1069736047 k17Am8uPHWk02cEf1jet tomorrow
+-1069736047 k17Am8uPHWk02cEf1jet today
Added: hive/trunk/ql/src/test/results/clientpositive/acid_vectorization_project.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/acid_vectorization_project.q.out?rev=1633652&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/acid_vectorization_project.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/acid_vectorization_project.q.out Wed Oct 22 17:07:00 2014
@@ -0,0 +1,73 @@
+PREHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING, c float) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING, c float) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acid_vectorized
+PREHOOK: query: insert into table acid_vectorized select cint, cstring1, cfloat from alltypesorc where cint is not null order by cint limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: insert into table acid_vectorized select cint, cstring1, cfloat from alltypesorc where cint is not null order by cint limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@acid_vectorized
+POSTHOOK: Lineage: acid_vectorized.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+POSTHOOK: Lineage: acid_vectorized.c SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cfloat, type:float, comment:null), ]
+PREHOOK: query: select a,b from acid_vectorized order by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+POSTHOOK: query: select a,b from acid_vectorized order by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+-1073279343 oj1YrV5Wa
+-1073051226 A34p7oRr2WvUJNf
+-1072910839 0iqrc5
+-1072081801 dPkN74F7
+-1072076362 2uLyD28144vklju213J1mr
+-1071480828 aw724t8c5558x2xneC624
+-1071363017 Anj0oF
+-1070883071 0ruyd6Y50JpdGRf6HqD
+-1070551679 iUR3Q
+-1069736047 k17Am8uPHWk02cEf1jet
+PREHOOK: query: select a,c from acid_vectorized order by a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+POSTHOOK: query: select a,c from acid_vectorized order by a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+-1073279343 11.0
+-1073051226 NULL
+-1072910839 11.0
+-1072081801 NULL
+-1072076362 NULL
+-1071480828 -51.0
+-1071363017 8.0
+-1070883071 NULL
+-1070551679 NULL
+-1069736047 11.0
+PREHOOK: query: select b,c from acid_vectorized order by b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+POSTHOOK: query: select b,c from acid_vectorized order by b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+0iqrc5 11.0
+0ruyd6Y50JpdGRf6HqD NULL
+2uLyD28144vklju213J1mr NULL
+A34p7oRr2WvUJNf NULL
+Anj0oF 8.0
+aw724t8c5558x2xneC624 -51.0
+dPkN74F7 NULL
+iUR3Q NULL
+k17Am8uPHWk02cEf1jet 11.0
+oj1YrV5Wa 11.0