You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/05/30 17:06:54 UTC
svn commit: r1487884 - in /hive/branches/vectorization:
ql/src/java/org/apache/hadoop/hive/ql/exec/vector/
ql/src/java/org/apache/hadoop/hive/ql/io/orc/
ql/src/test/org/apache/hadoop/hive/ql/exec/vector/
ql/src/test/org/apache/hadoop/hive/ql/io/orc/ se...
Author: omalley
Date: Thu May 30 15:06:54 2013
New Revision: 1487884
URL: http://svn.apache.org/r1487884
Log:
HIVE-4160 Vectorized reader support for Byte Boolean and Timestamp (Sarvesh
Sakalanaga via omalley)
Modified:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java Thu May 30 15:06:54 2013
@@ -18,16 +18,20 @@
package org.apache.hadoop.hive.ql.exec.vector;
+import java.sql.Timestamp;
import java.util.List;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
@@ -88,6 +92,28 @@ public class VectorizedBatchUtil {
// NOTE: The default value for null fields in vectorization is 1 for int types, NaN for
// float/double. String types have no default value for null.
switch (poi.getPrimitiveCategory()) {
+ case BOOLEAN: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[i];
+ if (writableCol != null) {
+ lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
+ lcv.isNull[rowIndex] = false;
+ } else {
+ lcv.vector[rowIndex] = 1;
+ SetNullColIsNullValue(lcv, rowIndex);
+ }
+ }
+ break;
+ case BYTE: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[i];
+ if (writableCol != null) {
+ lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
+ lcv.isNull[rowIndex] = false;
+ } else {
+ lcv.vector[rowIndex] = 1;
+ SetNullColIsNullValue(lcv, rowIndex);
+ }
+ }
+ break;
case SHORT: {
LongColumnVector lcv = (LongColumnVector) batch.cols[i];
if (writableCol != null) {
@@ -143,6 +169,18 @@ public class VectorizedBatchUtil {
}
}
break;
+ case TIMESTAMP: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[i];
+ if (writableCol != null) {
+ Timestamp t = ((TimestampWritable) writableCol).getTimestamp();
+ lcv.vector[rowIndex] = (t.getTime() * 1000000) + (t.getNanos() % 1000000);
+ lcv.isNull[rowIndex] = false;
+ } else {
+ lcv.vector[rowIndex] = 1;
+ SetNullColIsNullValue(lcv, rowIndex);
+ }
+ }
+ break;
case STRING: {
BytesColumnVector bcv = (BytesColumnVector) batch.cols[i];
if (writableCol != null) {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java Thu May 30 15:06:54 2013
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec.vector;
import java.nio.ByteBuffer;
+import java.sql.Timestamp;
import java.util.List;
import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -28,7 +29,9 @@ import org.apache.hadoop.hive.serde2.Ser
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.lazy.LazyLong;
+import org.apache.hadoop.hive.serde2.lazy.LazyTimestamp;
import org.apache.hadoop.hive.serde2.lazy.LazyUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
@@ -121,6 +124,18 @@ public class VectorizedColumnarSerDe ext
}
switch (poi.getPrimitiveCategory()) {
+ case BOOLEAN: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[k];
+ // In vectorization true is stored as 1 and false as 0
+ boolean b = lcv.vector[rowIndex] == 1 ? true : false;
+ if (b) {
+ serializeVectorStream.write(LazyUtils.trueBytes, 0, LazyUtils.trueBytes.length);
+ } else {
+ serializeVectorStream.write(LazyUtils.trueBytes, 0, LazyUtils.trueBytes.length);
+ }
+ }
+ break;
+ case BYTE:
case SHORT:
case INT:
case LONG:
@@ -141,6 +156,16 @@ public class VectorizedColumnarSerDe ext
serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams
.getNeedsEscape());
break;
+ case TIMESTAMP:
+ LongColumnVector tcv = (LongColumnVector) batch.cols[k];
+ long timeInNanoSec = tcv.vector[rowIndex];
+ Timestamp t = new Timestamp(0);
+ t.setTime((timeInNanoSec)/1000000);
+ t.setNanos((int)((t.getNanos()) + (timeInNanoSec % 1000000)));
+ TimestampWritable tw = new TimestampWritable();
+ tw.set(t);
+ LazyTimestamp.writeUTF8(serializeVectorStream, tw);
+ break;
default:
throw new UnsupportedOperationException(
"Vectorizaton is not supported for datatype:"
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java Thu May 30 15:06:54 2013
@@ -197,11 +197,14 @@ public class VectorizedRowBatchCtx {
case PRIMITIVE: {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
// Vectorization currently only supports the following data types:
- // SHORT, INT, LONG, FLOAT, DOUBLE, STRING
+ // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING and TIMESTAMP
switch (poi.getPrimitiveCategory()) {
+ case BOOLEAN:
+ case BYTE:
case SHORT:
case INT:
case LONG:
+ case TIMESTAMP:
result.cols[j] = new LongColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
break;
case FLOAT:
@@ -232,9 +235,6 @@ public class VectorizedRowBatchCtx {
return result;
}
-
-
-
/**
* Adds the row to the batch after deserializing the row
*
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java Thu May 30 15:06:54 2013
@@ -20,8 +20,10 @@ package org.apache.hadoop.hive.ql.io.orc
import java.io.EOFException;
import java.io.IOException;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+
class BitFieldReader {
- private RunLengthByteReader input;
+ private final RunLengthByteReader input;
private final int bitSize;
private int current;
private int bitsLeft;
@@ -60,6 +62,23 @@ class BitFieldReader {
return result & mask;
}
+ void nextVector(LongColumnVector previous, long previousLen)
+ throws IOException {
+ previous.isRepeating = true;
+ for (int i = 0; i < previousLen; i++) {
+ if (!previous.isNull[i]) {
+ previous.vector[i] = next();
+ } else {
+ // The default value of null for int types in vectorized
+ // processing is 1, so set that if the value is null
+ previous.vector[i] = 1;
+ }
+ if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i])) {
+ previous.isRepeating = false;
+ }
+ }
+ }
+
void seek(PositionProvider index) throws IOException {
input.seek(index);
int consumed = (int) index.getNext();
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Thu May 30 15:06:54 2013
@@ -240,8 +240,19 @@ class RecordReaderImpl implements Record
@Override
Object nextVector(Object previousVector, long batchSize) throws IOException {
- throw new UnsupportedOperationException(
- "NextVector is not supported operation on Boolean type");
+ LongColumnVector result = null;
+ if (previousVector == null) {
+ result = new LongColumnVector();
+ } else {
+ result = (LongColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ // Read value entries based on isNull entries
+ reader.nextVector(result, batchSize);
+ return result;
}
}
@@ -284,8 +295,19 @@ class RecordReaderImpl implements Record
@Override
Object nextVector(Object previousVector, long batchSize) throws IOException {
- throw new UnsupportedOperationException(
- "NextVector is not supported operation for Byte type");
+ LongColumnVector result = null;
+ if (previousVector == null) {
+ result = new LongColumnVector();
+ } else {
+ result = (LongColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ // Read value entries based on isNull entries
+ reader.nextVector(result, batchSize);
+ return result;
}
@Override
@@ -708,6 +730,7 @@ class RecordReaderImpl implements Record
private static class TimestampTreeReader extends TreeReader{
private RunLengthIntegerReader data;
private RunLengthIntegerReader nanos;
+ private final LongColumnVector nanoVector = new LongColumnVector();
TimestampTreeReader(int columnId) {
super(columnId);
@@ -758,8 +781,47 @@ class RecordReaderImpl implements Record
@Override
Object nextVector(Object previousVector, long batchSize) throws IOException {
- throw new UnsupportedOperationException(
- "NextVector is not supported operation for TimeStamp type");
+ LongColumnVector result = null;
+ if (previousVector == null) {
+ result = new LongColumnVector();
+ } else {
+ result = (LongColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ data.nextVector(result, batchSize);
+ nanoVector.isNull = result.isNull;
+ nanos.nextVector(nanoVector, batchSize);
+
+ if (!(result.isRepeating && nanoVector.isRepeating)) {
+
+ // Non repeating values preset in the vector. Iterate thru the vector and populate the time
+ for (int i = 0; i < batchSize; i++) {
+ if (!result.isNull[i]) {
+ result.vector[i] = (result.vector[i] + WriterImpl.BASE_TIMESTAMP)
+ * WriterImpl.MILLIS_PER_SECOND;
+ nanoVector.vector[i] = parseNanos(nanoVector.vector[i]);
+ // fix the rounding when we divided by 1000.
+ if (result.vector[i] >= 0) {
+ result.vector[i] += nanoVector.vector[i] / 1000000;
+ } else {
+ result.vector[i] -= nanoVector.vector[i] / 1000000;
+ }
+ // Convert millis into nanos and add the nano vector value to it
+ result.vector[i] = (result.vector[i] * 1000000)
+ + nanoVector.vector[i];
+ }
+ }
+ } else {
+ // Entire vector has repeating values
+ if (!result.isNull[0]) {
+ result.vector[0] = (result.vector[0] * 1000000)
+ + nanoVector.vector[0];
+ }
+ }
+ return result;
}
private static int parseNanos(long serialized) {
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java Thu May 30 15:06:54 2013
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.io.orc
import java.io.EOFException;
import java.io.IOException;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
+
/**
* A reader that reads a sequence of bytes. A control byte is read before
* each run with positive values 0 to 127 meaning 3 to 130 repetitions. If the
@@ -82,6 +84,23 @@ class RunLengthByteReader {
return result;
}
+ void nextVector(LongColumnVector previous, long previousLen)
+ throws IOException {
+ previous.isRepeating = true;
+ for (int i = 0; i < previousLen; i++) {
+ if (!previous.isNull[i]) {
+ previous.vector[i] = next();
+ } else {
+ // The default value of null for int types in vectorized
+ // processing is 1, so set that if the value is null
+ previous.vector[i] = 1;
+ }
+ if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i])) {
+ previous.isRepeating = false;
+ }
+ }
+ }
+
void seek(PositionProvider index) throws IOException {
input.seek(index);
int consumed = (int) index.getNext();
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorizedRowBatchCtx.java Thu May 30 15:06:54 2013
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.exec.v
import java.io.File;
import java.io.IOException;
+import java.sql.Timestamp;
+import java.util.Calendar;
import java.util.List;
import java.util.Properties;
@@ -36,13 +38,16 @@ import org.apache.hadoop.hive.serde2.Ser
import org.apache.hadoop.hive.serde2.columnar.BytesRefArrayWritable;
import org.apache.hadoop.hive.serde2.columnar.BytesRefWritable;
import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
@@ -82,10 +87,10 @@ public class TestVectorizedRowBatchCtx {
// Set the configuration parameters
tbl.setProperty(serdeConstants.SERIALIZATION_FORMAT, "6");
tbl.setProperty("columns",
- "ashort,aint,along,adouble,afloat,astring");
+ "ashort,aint,along,adouble,afloat,astring,abyte,aboolean,atimestamp");
tbl.setProperty("columns.types",
- "smallint:int:bigint:double:float:string");
- colCount = 6;
+ "smallint:int:bigint:double:float:string:tinyint:boolean:timestamp");
+ colCount = 9;
tbl.setProperty(serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
try {
@@ -109,7 +114,8 @@ public class TestVectorizedRowBatchCtx {
BytesRefArrayWritable bytes = new BytesRefArrayWritable(colCount);
BytesRefWritable cu;
- if (i % 3 != 0) {
+ if (i % 3 != 0) {
+ //if (i < 100) {
cu = new BytesRefWritable((i + "").getBytes("UTF-8"), 0, (i + "").getBytes("UTF-8").length);
bytes.set(0, cu);
@@ -132,6 +138,20 @@ public class TestVectorizedRowBatchCtx {
cu = new BytesRefWritable(("Test string").getBytes("UTF-8"), 0,
("Test string").getBytes("UTF-8").length);
bytes.set(5, cu);
+
+ cu = new BytesRefWritable((1 + "").getBytes("UTF-8"), 0,
+ (1 + "").getBytes("UTF-8").length);
+ bytes.set(6, cu);
+
+ cu = new BytesRefWritable(("true").getBytes("UTF-8"), 0,
+ ("true").getBytes("UTF-8").length);
+ bytes.set(7, cu);
+
+ Timestamp t = new Timestamp(Calendar.getInstance().getTime().getTime());
+ cu = new BytesRefWritable(t.toString().getBytes("UTF-8"), 0,
+ t.toString().getBytes("UTF-8").length);
+ bytes.set(8, cu);
+
} else {
cu = new BytesRefWritable((i + "").getBytes("UTF-8"), 0, (i + "").getBytes("UTF-8").length);
bytes.set(0, cu);
@@ -151,6 +171,19 @@ public class TestVectorizedRowBatchCtx {
cu = new BytesRefWritable(("Test string").getBytes("UTF-8"), 0,
("Test string").getBytes("UTF-8").length);
bytes.set(5, cu);
+
+ cu = new BytesRefWritable(new byte[0], 0, 0);
+ bytes.set(6, cu);
+
+ cu = new BytesRefWritable(new byte[0], 0, 0);
+ bytes.set(7, cu);
+
+// cu = new BytesRefWritable(new byte[0], 0, 0);
+// bytes.set(8, cu);
+ Timestamp t = new Timestamp(Calendar.getInstance().getTime().getTime());
+ cu = new BytesRefWritable(t.toString().getBytes("UTF-8"), 0,
+ t.toString().getBytes("UTF-8").length);
+ bytes.set(8, cu);
}
writer.append(bytes);
}
@@ -166,7 +199,7 @@ public class TestVectorizedRowBatchCtx {
.getObjectInspector();
List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();
- Assert.assertEquals("Field size should be 6", colCount, fieldRefs.size());
+ Assert.assertEquals("Field size should be 9", colCount, fieldRefs.size());
// Create the context
VectorizedRowBatchCtx ctx = new VectorizedRowBatchCtx(oi, oi, serDe, null);
@@ -213,6 +246,17 @@ public class TestVectorizedRowBatchCtx {
Object writableCol = poi.getPrimitiveWritableObject(fieldData);
if (writableCol != null) {
switch (poi.getPrimitiveCategory()) {
+ case BOOLEAN: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[j];
+ Assert.assertEquals(true, lcv.vector[i] == (((BooleanWritable) writableCol).get() ? 1
+ : 0));
+ }
+ break;
+ case BYTE: {
+ LongColumnVector lcv = (LongColumnVector) batch.cols[j];
+ Assert.assertEquals(true, lcv.vector[i] == (long) ((ByteWritable) writableCol).get());
+ }
+ break;
case SHORT: {
LongColumnVector lcv = (LongColumnVector) batch.cols[j];
Assert.assertEquals(true, lcv.vector[i] == ((ShortWritable) writableCol).get());
@@ -247,6 +291,13 @@ public class TestVectorizedRowBatchCtx {
Assert.assertEquals(true, a.equals(b));
}
break;
+ case TIMESTAMP: {
+ LongColumnVector tcv = (LongColumnVector) batch.cols[j];
+ Timestamp t = ((TimestampWritable) writableCol).getTimestamp();
+ long timeInNanoSec = (t.getTime() * 1000000) + (t.getNanos() % 1000000);
+ Assert.assertEquals(true, tcv.vector[i] == timeInNanoSec);
+ }
+ break;
default:
Assert.assertEquals("Unknown type", false);
}
@@ -275,18 +326,18 @@ public class TestVectorizedRowBatchCtx {
@Test
public void TestCtx() throws Exception {
- InitSerde();
- WriteRCFile(this.fs, this.testFilePath, this.conf);
- VectorizedRowBatch batch = GetRowBatch();
- ValidateRowBatch(batch);
-
- // Test VectorizedColumnarSerDe
- VectorizedColumnarSerDe vcs = new VectorizedColumnarSerDe();
- vcs.initialize(this.conf, tbl);
- Writable w = vcs.serializeVector(batch, (StructObjectInspector) serDe
- .getObjectInspector());
- BytesRefArrayWritable[] refArray = (BytesRefArrayWritable[])((ObjectWritable)w).get();
- vcs.deserializeVector(refArray, 10, batch);
- ValidateRowBatch(batch);
+ InitSerde();
+ WriteRCFile(this.fs, this.testFilePath, this.conf);
+ VectorizedRowBatch batch = GetRowBatch();
+ ValidateRowBatch(batch);
+
+ // Test VectorizedColumnarSerDe
+ VectorizedColumnarSerDe vcs = new VectorizedColumnarSerDe();
+ vcs.initialize(this.conf, tbl);
+ Writable w = vcs.serializeVector(batch, (StructObjectInspector) serDe
+ .getObjectInspector());
+ BytesRefArrayWritable[] refArray = (BytesRefArrayWritable[]) ((ObjectWritable) w).get();
+ vcs.deserializeVector(refArray, 10, batch);
+ ValidateRowBatch(batch);
}
}
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java Thu May 30 15:06:54 2013
@@ -19,6 +19,8 @@
package org.apache.hadoop.hive.ql.io.orc;
import java.io.File;
+import java.sql.Timestamp;
+import java.util.Calendar;
import java.util.Random;
import junit.framework.Assert;
@@ -29,8 +31,8 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Writable;
import org.junit.Before;
import org.junit.Test;
@@ -58,18 +60,24 @@ public class TestVectorizedORCReader {
}
static class MyRecord {
+ private final Boolean bo;
+ private final Byte by;
private final Integer i;
private final Long l;
private final Short s;
private final Double d;
private final String k;
+ private final Timestamp t;
- MyRecord(Integer i, Long l, Short s, Double d, String k) {
+ MyRecord(Boolean bo, Byte by, Integer i, Long l, Short s, Double d, String k, Timestamp t) {
+ this.bo = bo;
+ this.by = by;
this.i = i;
this.l = l;
this.s = s;
this.d = d;
this.k = k;
+ this.t = t;
}
}
@@ -96,11 +104,11 @@ public class TestVectorizedORCReader {
"Heaven,", "we", "were", "all", "going", "direct", "the", "other",
"way"};
for (int i = 0; i < 21000; ++i) {
- if ((i % 3) != 0) {
- writer.addRow(new MyRecord(i, (long) 200, (short) (300 + i), (double) (400 + i),
- words[r1.nextInt(words.length)]));
+ if ((i % 7) != 0) {
+ writer.addRow(new MyRecord(((i % 3) == 0), (byte)(i % 5), i, (long) 200, (short) (300 + i), (double) (400 + i),
+ words[r1.nextInt(words.length)], new Timestamp(Calendar.getInstance().getTime().getTime())));
} else {
- writer.addRow(new MyRecord(i, (long) 200, null, null, null));
+ writer.addRow(new MyRecord(null, null, i, (long) 200, null, null, null, null));
}
}
writer.close();
@@ -122,8 +130,21 @@ public class TestVectorizedORCReader {
for (int i = 0; i < batch.size; i++) {
row = (OrcStruct) rr.next((Object) row);
for (int j = 0; j < batch.cols.length; j++) {
- Object a = ((Writable) row.getFieldValue(j));
+ Object a = (row.getFieldValue(j));
Object b = batch.cols[j].getWritableObject(i);
+ // Boolean values are stores a 1's and 0's, so convert and compare
+ if (a instanceof BooleanWritable) {
+ Long temp = (long) (((BooleanWritable) a).get() ? 1 : 0);
+ Assert.assertEquals(true, temp.toString().equals(b.toString()));
+ continue;
+ }
+ // Timestamps are stored as long, so convert and compare
+ if (a instanceof Timestamp) {
+ Timestamp t = ((Timestamp) a);
+ Long timeInNanoSec = (t.getTime() * 1000000) + t.getNanos();
+ Assert.assertEquals(true, timeInNanoSec.toString().equals(b.toString()));
+ continue;
+ }
if (null == a) {
Assert.assertEquals(true, (b == null || (b instanceof NullWritable)));
} else {
@@ -134,17 +155,23 @@ public class TestVectorizedORCReader {
// Check repeating
Assert.assertEquals(false, batch.cols[0].isRepeating);
- Assert.assertEquals(true, batch.cols[1].isRepeating);
+ Assert.assertEquals(false, batch.cols[1].isRepeating);
Assert.assertEquals(false, batch.cols[2].isRepeating);
- Assert.assertEquals(false, batch.cols[3].isRepeating);
+ Assert.assertEquals(true, batch.cols[3].isRepeating);
Assert.assertEquals(false, batch.cols[4].isRepeating);
+ Assert.assertEquals(false, batch.cols[5].isRepeating);
+ Assert.assertEquals(false, batch.cols[6].isRepeating);
+ Assert.assertEquals(false, batch.cols[7].isRepeating);
// Check non null
- Assert.assertEquals(true, batch.cols[0].noNulls);
- Assert.assertEquals(true, batch.cols[1].noNulls);
- Assert.assertEquals(false, batch.cols[2].noNulls);
- Assert.assertEquals(false, batch.cols[3].noNulls);
+ Assert.assertEquals(false, batch.cols[0].noNulls);
+ Assert.assertEquals(false, batch.cols[1].noNulls);
+ Assert.assertEquals(true, batch.cols[2].noNulls);
+ Assert.assertEquals(true, batch.cols[3].noNulls);
Assert.assertEquals(false, batch.cols[4].noNulls);
+ Assert.assertEquals(false, batch.cols[5].noNulls);
+ Assert.assertEquals(false, batch.cols[6].noNulls);
+ Assert.assertEquals(false, batch.cols[7].noNulls);
}
Assert.assertEquals(false, rr.hasNext());
}
Modified: hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java?rev=1487884&r1=1487883&r2=1487884&view=diff
==============================================================================
--- hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java (original)
+++ hive/branches/vectorization/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java Thu May 30 15:06:54 2013
@@ -127,8 +127,8 @@ public final class LazyUtils {
}
}
- private static byte[] trueBytes = {(byte) 't', 'r', 'u', 'e'};
- private static byte[] falseBytes = {(byte) 'f', 'a', 'l', 's', 'e'};
+ public static byte[] trueBytes = {(byte) 't', 'r', 'u', 'e'};
+ public static byte[] falseBytes = {(byte) 'f', 'a', 'l', 's', 'e'};
/**
* Write the bytes with special characters escaped.