You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2013/03/20 00:39:54 UTC

svn commit: r1458570 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java test/queries/clientpositive/orc_empty_strings.q test/results/clientpositive/orc_empty_strings.q.out

Author: namit
Date: Tue Mar 19 23:39:54 2013
New Revision: 1458570

URL: http://svn.apache.org/r1458570
Log:
HIVE-4154 NPE reading column of empty string from ORC file
(Kevin Wilfong via namit)


Added:
    hive/trunk/ql/src/test/queries/clientpositive/orc_empty_strings.q
    hive/trunk/ql/src/test/results/clientpositive/orc_empty_strings.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1458570&r1=1458569&r2=1458570&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Tue Mar 19 23:39:54 2013
@@ -17,6 +17,15 @@
  */
 package org.apache.hadoop.hive.ql.io.orc;
 
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -30,15 +39,6 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
 
-import java.io.EOFException;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 class RecordReaderImpl implements RecordReader {
   private final FSDataInputStream file;
   private final long firstRow;
@@ -686,7 +686,13 @@ class RecordReaderImpl implements Record
         } else {
           length = dictionaryBuffer.size() - offset;
         }
-        dictionaryBuffer.setText(result, offset, length);
+        // If the column is just empty strings, the size will be zero, so the buffer will be null,
+        // in that case just return result as it will default to empty
+        if (dictionaryBuffer != null) {
+          dictionaryBuffer.setText(result, offset, length);
+        } else {
+          result.clear();
+        }
       }
       return result;
     }

Added: hive/trunk/ql/src/test/queries/clientpositive/orc_empty_strings.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/orc_empty_strings.q?rev=1458570&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/orc_empty_strings.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/orc_empty_strings.q Tue Mar 19 23:39:54 2013
@@ -0,0 +1,16 @@
+CREATE TABLE test_orc (key STRING)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat';
+
+INSERT OVERWRITE TABLE test_orc SELECT '' FROM src limit 10;
+
+-- Test reading a column which is just empty strings
+
+SELECT * FROM test_orc; 
+
+INSERT OVERWRITE TABLE test_orc SELECT IF (key % 3 = 0, key, '') FROM src limit 10;
+
+-- Test reading a column which has some empty strings
+
+SELECT * FROM test_orc;

Added: hive/trunk/ql/src/test/results/clientpositive/orc_empty_strings.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/orc_empty_strings.q.out?rev=1458570&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/orc_empty_strings.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/orc_empty_strings.q.out Tue Mar 19 23:39:54 2013
@@ -0,0 +1,77 @@
+PREHOOK: query: CREATE TABLE test_orc (key STRING)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE test_orc (key STRING)
+ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.orc.OrcSerde' 
+STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat' 
+OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@test_orc
+PREHOOK: query: INSERT OVERWRITE TABLE test_orc SELECT '' FROM src limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_orc
+POSTHOOK: query: INSERT OVERWRITE TABLE test_orc SELECT '' FROM src limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_orc
+POSTHOOK: Lineage: test_orc.key SIMPLE []
+PREHOOK: query: -- Test reading a column which is just empty strings
+
+SELECT * FROM test_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_orc
+#### A masked pattern was here ####
+POSTHOOK: query: -- Test reading a column which is just empty strings
+
+SELECT * FROM test_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_orc.key SIMPLE []
+
+
+
+
+
+
+
+
+
+
+PREHOOK: query: INSERT OVERWRITE TABLE test_orc SELECT IF (key % 3 = 0, key, '') FROM src limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@test_orc
+POSTHOOK: query: INSERT OVERWRITE TABLE test_orc SELECT IF (key % 3 = 0, key, '') FROM src limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@test_orc
+POSTHOOK: Lineage: test_orc.key SIMPLE []
+POSTHOOK: Lineage: test_orc.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+PREHOOK: query: -- Test reading a column which has some empty strings
+
+SELECT * FROM test_orc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_orc
+#### A masked pattern was here ####
+POSTHOOK: query: -- Test reading a column which has some empty strings
+
+SELECT * FROM test_orc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_orc
+#### A masked pattern was here ####
+POSTHOOK: Lineage: test_orc.key SIMPLE []
+POSTHOOK: Lineage: test_orc.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+
+
+
+27
+165
+
+255
+
+
+