You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/05/16 12:16:10 UTC
svn commit: r1483290 - in /hive/branches/vectorization/ql/src: java/org/apache/hadoop/hive/ql/exec/vector/ java/org/apache/hadoop/hive/ql/io/orc/ test/org/apache/hadoop/hive/ql/io/orc/

Author: hashutosh
Date: Thu May 16 10:16:09 2013
New Revision: 1483290

URL: http://svn.apache.org/r1483290
Log:
HIVE-4552 : Vectorized RecordReader for ORC does not set the ColumnVector.IsRepeating correctly (Sarvesh Sakalanaga via Ashutosh Chauhan)

Modified:
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java
    hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java?rev=1483290&r1=1483289&r2=1483290&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java Thu May 16 10:16:09 2013
@@ -208,7 +208,8 @@ public class BytesColumnVector extends C
     }
     Writable result = null;
     if (!isNull[index] && vector[index] != null) {
-      result = new Text(vector[index]);
+      result = new Text();
+      ((Text) result).append(vector[index], start[index], length[index]);
     } else {
       result = NullWritable.get();
     }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1483290&r1=1483289&r2=1483290&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Thu May 16 10:16:09 2013
@@ -930,34 +930,42 @@ class RecordReaderImpl implements Record
       // Read present/isNull stream
       super.nextVector(result, batchSize);
 
-      byte[] dictionaryBytes = dictionaryBuffer.get();
+      if (dictionaryBuffer != null) {
+        byte[] dictionaryBytes = dictionaryBuffer.get();
 
-      // Read string offsets
-      scratchlcv.isNull = result.isNull;
-      reader.nextVector(scratchlcv, batchSize);
-      if (!scratchlcv.isRepeating) {
-
-        // The vector has non-repeating strings. Iterate thru the batch
-        // and set strings one by one
-        for (int i = 0; i < batchSize; i++) {
-          if (!scratchlcv.isNull[i]) {
-            offset = dictionaryOffsets[(int) scratchlcv.vector[i]];
-            length = getDictionaryEntryLength((int) scratchlcv.vector[i], offset);
-            result.setRef(i, dictionaryBytes, offset, length);
-          } else {
-            // If the value is null then set offset and length to zero (null string)
-            result.setRef(i, dictionaryBytes, 0, 0);
+        // Read string offsets
+        scratchlcv.isNull = result.isNull;
+        reader.nextVector(scratchlcv, batchSize);
+        if (!scratchlcv.isRepeating) {
+
+          // The vector has non-repeating strings. Iterate thru the batch
+          // and set strings one by one
+          for (int i = 0; i < batchSize; i++) {
+            if (!scratchlcv.isNull[i]) {
+              offset = dictionaryOffsets[(int) scratchlcv.vector[i]];
+              length = getDictionaryEntryLength((int) scratchlcv.vector[i], offset);
+              result.setRef(i, dictionaryBytes, offset, length);
+            } else {
+              // If the value is null then set offset and length to zero (null string)
+              result.setRef(i, dictionaryBytes, 0, 0);
+            }
           }
+        } else {
+          // If the value is repeating then just set the first value in the
+          // vector and set the isRepeating flag to true. No need to iterate thru and
+          // set all the elements to the same value
+          offset = dictionaryOffsets[(int) scratchlcv.vector[0]];
+          length = getDictionaryEntryLength((int) scratchlcv.vector[0], offset);
+          result.setRef(0, dictionaryBytes, offset, length);
         }
+        result.isRepeating = scratchlcv.isRepeating;
       } else {
-        // If the value is repeating then just set the first value in the
-        // vector and set the isRepeating flag to true. No need to iterate thru and
-        // set all the elements to the same value
-        offset = dictionaryOffsets[(int) scratchlcv.vector[0]];
-        length = getDictionaryEntryLength((int) scratchlcv.vector[0], offset);
-        result.setRef(0, dictionaryBytes, offset, length);
+        // Entire stripe contains null strings.
+        result.isRepeating = true;
+        result.noNulls = false;
+        result.isNull[0] = true;
+        result.setRef(0, "".getBytes(), 0, 0);
       }
-      result.isRepeating = scratchlcv.isRepeating;
       return result;
     }
 

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java?rev=1483290&r1=1483289&r2=1483290&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java Thu May 16 10:16:09 2013
@@ -101,7 +101,7 @@ class RunLengthIntegerReader {
         // processing is 1, so set that if the value is null
         previous.vector[i] = 1;
       }
-      if (previous.isRepeating && (delta != 0 || !repeat)) {
+      if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i])) {
         previous.isRepeating = false;
       }
     }

Modified: hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java?rev=1483290&r1=1483289&r2=1483290&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java (original)
+++ hive/branches/vectorization/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestVectorizedORCReader.java Thu May 16 10:16:09 2013
@@ -29,6 +29,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Writable;
 import org.junit.Before;
 import org.junit.Test;
@@ -124,7 +125,7 @@ public class TestVectorizedORCReader {
           Object a = ((Writable) row.getFieldValue(j));
           Object b = batch.cols[j].getWritableObject(i);
           if (null == a) {
-            Assert.assertEquals(true, (b == null));
+            Assert.assertEquals(true, (b == null || (b instanceof NullWritable)));
           } else {
             Assert.assertEquals(true, b.toString().equals(a.toString()));
           }