You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/07/23 20:58:29 UTC

svn commit: r1506213 - in /hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc: BitFieldReader.java RunLengthByteReader.java RunLengthIntegerReader.java

Author: hashutosh
Date: Tue Jul 23 18:58:29 2013
New Revision: 1506213

URL: http://svn.apache.org/r1506213
Log:
HIVE-4909 : Vectorized ORC reader does not set isRepeating flag correctly when 1’s are present is the input stream (Sarvesh Sakalanaga via Ashutosh Chauhan)

Modified:
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
    hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java?rev=1506213&r1=1506212&r2=1506213&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/BitFieldReader.java Tue Jul 23 18:58:29 2013
@@ -64,6 +64,7 @@ class BitFieldReader {
 
   void nextVector(LongColumnVector previous, long previousLen)
       throws IOException {
+
     previous.isRepeating = true;
     for (int i = 0; i < previousLen; i++) {
       if (!previous.isNull[i]) {
@@ -73,7 +74,13 @@ class BitFieldReader {
         // processing is 1, so set that if the value is null
         previous.vector[i] = 1;
       }
-      if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i])) {
+
+      // The default value for nulls in Vectorization for int types is 1
+      // and given that non null value can also be 1, we need to check for isNull also
+      // when determining the isRepeating flag.
+      if (previous.isRepeating
+          && i > 0
+          && ((previous.vector[i - 1] != previous.vector[i]) || (previous.isNull[i - 1] != previous.isNull[i]))) {
         previous.isRepeating = false;
       }
     }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java?rev=1506213&r1=1506212&r2=1506213&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthByteReader.java Tue Jul 23 18:58:29 2013
@@ -95,7 +95,13 @@ class RunLengthByteReader {
         // processing is 1, so set that if the value is null
         previous.vector[i] = 1;
       }
-      if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i])) {
+
+      // The default value for nulls in Vectorization for int types is 1
+      // and given that non null value can also be 1, we need to check for isNull also
+      // when determining the isRepeating flag.
+      if (previous.isRepeating
+          && i > 0
+          && ((previous.vector[i - 1] != previous.vector[i]) || (previous.isNull[i - 1] != previous.isNull[i]))) {
         previous.isRepeating = false;
       }
     }

Modified: hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java
URL: http://svn.apache.org/viewvc/hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java?rev=1506213&r1=1506212&r2=1506213&view=diff
==============================================================================
--- hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java (original)
+++ hive/branches/vectorization/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RunLengthIntegerReader.java Tue Jul 23 18:58:29 2013
@@ -101,7 +101,13 @@ class RunLengthIntegerReader {
         // processing is 1, so set that if the value is null
         previous.vector[i] = 1;
       }
-      if (previous.isRepeating && i > 0 && (previous.vector[i-1] != previous.vector[i])) {
+
+      // The default value for nulls in Vectorization for int types is 1
+      // and given that non null value can also be 1, we need to check for isNull also
+      // when determining the isRepeating flag.
+      if (previous.isRepeating
+          && i > 0
+          && (previous.vector[i - 1] != previous.vector[i] || previous.isNull[i - 1] != previous.isNull[i])) {
         previous.isRepeating = false;
       }
     }