You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/05/16 12:16:10 UTC
svn commit: r1483290 - in /hive/branches/vectorization/ql/src:
java/org/apache/hadoop/hive/ql/exec/vector/
java/org/apache/hadoop/hive/ql/io/orc/ test/org/apache/hadoop/hive/ql/io/orc/
Author: hashutosh
Date: Thu May 16 10:16:09 2013
New Revision: 1483290
URL: http://svn.apache.org/r1483290
Log:
HIVE-4552 : Vectorized RecordReader for ORC does not set the ColumnVector.IsRepeating correctly (Sarvesh Sakalanaga via Ashutosh Chauhan)
Modified:
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java
hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java?rev=1483290&r1=1483289&r2=1483290&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java Thu May 16 10:16:09 2013
@@ -208,7 +208,8 @@ public class BytesColumnVector extends C
}
Writable result = null;
if (!isNull[index] && vector[index] != null) {
- result = new Text(vector[index]);
+ result = new Text();
+ ((Text) result).append(vector[index], start[index], length[index]);
} else {
result = NullWritable.get();
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1483290&r1=1483289&r2=1483290&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Thu May 16 10:16:09 2013
@@ -930,34 +930,42 @@ class RecordReaderImpl implements Record
// Read present/isNull stream
super.nextVector(result, batchSize);
- byte[] dictionaryBytes = dictionaryBuffer.get();
+ if (dictionaryBuffer != null) {
+ byte[] dictionaryBytes = dictionaryBuffer.get();
- // Read string offsets
- scratchlcv.isNull = result.isNull;
- reader.nextVector(scratchlcv, batchSize);
- if (!scratchlcv.isRepeating) {
-
- // The vector has non-repeating strings. Iterate thru the batch
- // and set strings one by one
- for (int i = 0; i < batchSize; i++) {
- if (!scratchlcv.isNull[i]) {
- offset = dictionaryOffsets[(int) scratchlcv.vector[i]];
- length = getDictionaryEntryLength((int) scratchlcv.vector[i], offset);
- result.setRef(i, dictionaryBytes, offset, length);
- } else {
- // If the value is null then set offset and length to zero (null string)
- result.setRef(i, dictionaryBytes, 0, 0);
+ // Read string offsets
+ scratchlcv.isNull = result.isNull;
+ reader.nextVector(scratchlcv, batchSize);
+ if (!scratchlcv.isRepeating) {
+
+ // The vector has non-repeating strings. Iterate thru the batch
+ // and set strings one by one
+ for (int i = 0; i < batchSize; i++) {
+ if (!scratchlcv.isNull[i]) {
+ offset = dictionaryOffsets[(int) scratchlcv.vector[i]];
+ length = getDictionaryEntryLength((int) scratchlcv.vector[i], offset);
+ result.setRef(i, dictionaryBytes, offset, length);
+ } else {
+ // If the value is null then set offset and length to zero (null string)
+ result.setRef(i, dictionaryBytes, 0, 0);
+ }
}
+ } else {
+ // If the value is repeating then just set the first value in the
+ // vector and set the isRepeating flag to true. No need to iterate thru and
+ // set all the elements to the same value
+ offset = dictionaryOffsets[(int) scratchlcv.vector[0]];
+ length = getDictionaryEntryLength((int) scratchlcv.vector[0], offset);
+ result.setRef(0, dictionaryBytes, offset, length);
}
+ result.isRepeating = scratchlcv.isRepeating;
} else {
- // If the value is repeating then just set the first value in the
- // vector and set the isRepeating flag to true. No need to iterate thru and
- // set all the elements to the same value
- offset = dictionaryOffsets[(int) scratchlcv.vector[0]];
- length = getDictionaryEntryLength((int) scratchlcv.vector[0], offset);
- result.setRef(0, dictionaryBytes, offset, length);
+ // Entire stripe contains null strings.
+ result.isRepeating = true;
+ result.noNulls = false;
+ result.isNull[0] = true;
+ result.setRef(0, "".getBytes(), 0, 0);
}
- result.isRepeating = scratchlcv.isRepeating;
return result;
}
Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java?rev=1483290&r1=1483289&r2=1483290&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java Thu May 16 10:16:09 2013
@@ -101,7 +101,7 @@ class RunLengthIntegerReader {
// processing is 1, so set that if the value is null
previous.vector[i] = 1;
}
- if (previous.isRepeating && (delta != 0 || !repeat)) {
+ if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i])) {
previous.isRepeating = false;
}
}
Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java?rev=1483290&r1=1483289&r2=1483290&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java Thu May 16 10:16:09 2013
@@ -29,6 +29,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Writable;
import org.junit.Before;
import org.junit.Test;
@@ -124,7 +125,7 @@ public class TestVectorizedORCReader {
Object a = ((Writable) row.getFieldValue(j));
Object b = batch.cols[j].getWritableObject(i);
if (null == a) {
- Assert.assertEquals(true, (b == null));
+ Assert.assertEquals(true, (b == null || (b instanceof NullWritable)));
} else {
Assert.assertEquals(true, b.toString().equals(a.toString()));
}