You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2016/04/05 02:33:14 UTC

hive git commit: HIVE-13330: ORC vectorized string dictionary reader does not differentiate null vs empty string dictionary (Prasanth Jayachandran reviewed by Matt McCline)

Repository: hive
Updated Branches:
  refs/heads/master f3358b036 -> b04665948


HIVE-13330: ORC vectorized string dictionary reader does not differentiate null vs empty string dictionary (Prasanth Jayachandran reviewed by Matt McCline)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b0466594
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b0466594
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b0466594

Branch: refs/heads/master
Commit: b04665948acbd6b1a793a287987984f4dfb19631
Parents: f3358b0
Author: Prasanth Jayachandran <pr...@apache.org>
Authored: Mon Apr 4 19:33:01 2016 -0500
Committer: Prasanth Jayachandran <pr...@apache.org>
Committed: Mon Apr 4 19:33:01 2016 -0500

----------------------------------------------------------------------
 .../hive/ql/io/orc/TreeReaderFactory.java       | 20 +++++--
 .../vector_orc_string_reader_empty_dict.q       | 20 +++++++
 .../vector_orc_string_reader_empty_dict.q.out   | 62 ++++++++++++++++++++
 3 files changed, 97 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b0466594/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
index d74a854..8bb32ea 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/TreeReaderFactory.java
@@ -1683,6 +1683,7 @@ public class TreeReaderFactory {
    * stripe.
    */
   public static class StringDictionaryTreeReader extends TreeReader {
+    private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
     private DynamicByteArray dictionaryBuffer;
     private int[] dictionaryOffsets;
     protected IntegerReader reader;
@@ -1862,11 +1863,20 @@ public class TreeReaderFactory {
         }
         result.isRepeating = scratchlcv.isRepeating;
       } else {
-        // Entire stripe contains null strings.
-        result.isRepeating = true;
-        result.noNulls = false;
-        result.isNull[0] = true;
-        result.setRef(0, "".getBytes(), 0, 0);
+        if (dictionaryOffsets == null) {
+          // Entire stripe contains null strings.
+          result.isRepeating = true;
+          result.noNulls = false;
+          result.isNull[0] = true;
+          result.setRef(0, EMPTY_BYTE_ARRAY, 0, 0);
+        } else {
+          // stripe contains nulls and empty strings
+          for (int i = 0; i < batchSize; i++) {
+            if (!result.isNull[i]) {
+              result.setRef(i, EMPTY_BYTE_ARRAY, 0, 0);
+            }
+          }
+        }
       }
       return result;
     }

http://git-wip-us.apache.org/repos/asf/hive/blob/b0466594/ql/src/test/queries/clientpositive/vector_orc_string_reader_empty_dict.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/vector_orc_string_reader_empty_dict.q b/ql/src/test/queries/clientpositive/vector_orc_string_reader_empty_dict.q
new file mode 100644
index 0000000..0e8a743
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/vector_orc_string_reader_empty_dict.q
@@ -0,0 +1,20 @@
+create table orcstr (vcol varchar(20)) stored as orc;
+
+insert overwrite table orcstr select null from src;
+
+SET hive.fetch.task.conversion=none;
+
+SET hive.vectorized.execution.enabled=false;
+select vcol from orcstr limit 1;
+
+SET hive.vectorized.execution.enabled=true;
+select vcol from orcstr limit 1;
+
+insert overwrite table orcstr select "" from src;
+
+SET hive.vectorized.execution.enabled=false;
+select vcol from orcstr limit 1;
+
+SET hive.vectorized.execution.enabled=true;
+select vcol from orcstr limit 1;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/b0466594/ql/src/test/results/clientpositive/vector_orc_string_reader_empty_dict.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/vector_orc_string_reader_empty_dict.q.out b/ql/src/test/results/clientpositive/vector_orc_string_reader_empty_dict.q.out
new file mode 100644
index 0000000..4f00bed
--- /dev/null
+++ b/ql/src/test/results/clientpositive/vector_orc_string_reader_empty_dict.q.out
@@ -0,0 +1,62 @@
+PREHOOK: query: create table orcstr (vcol varchar(20)) stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@orcstr
+POSTHOOK: query: create table orcstr (vcol varchar(20)) stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@orcstr
+PREHOOK: query: insert overwrite table orcstr select null from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcstr
+POSTHOOK: query: insert overwrite table orcstr select null from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcstr
+POSTHOOK: Lineage: orcstr.vcol EXPRESSION []
+PREHOOK: query: select vcol from orcstr limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+POSTHOOK: query: select vcol from orcstr limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: select vcol from orcstr limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+POSTHOOK: query: select vcol from orcstr limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+NULL
+PREHOOK: query: insert overwrite table orcstr select "" from src
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@orcstr
+POSTHOOK: query: insert overwrite table orcstr select "" from src
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@orcstr
+POSTHOOK: Lineage: orcstr.vcol EXPRESSION []
+PREHOOK: query: select vcol from orcstr limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+POSTHOOK: query: select vcol from orcstr limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+
+PREHOOK: query: select vcol from orcstr limit 1
+PREHOOK: type: QUERY
+PREHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+POSTHOOK: query: select vcol from orcstr limit 1
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@orcstr
+#### A masked pattern was here ####
+