You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ga...@apache.org on 2014/10/13 22:43:32 UTC

svn commit: r1631539 - in /hive/branches/branch-0.14/ql/src: java/org/apache/hadoop/hive/ql/io/orc/ test/org/apache/hadoop/hive/ql/io/orc/ test/queries/clientpositive/ test/results/clientpositive/ test/results/clientpositive/tez/

Author: gates
Date: Mon Oct 13 20:43:32 2014
New Revision: 1631539

URL: http://svn.apache.org/r1631539
Log:
HIVE-8332 Reading an ACID table with vectorization on results in NPE (Alan Gates, reviewed by Owen O'Malley)

Added:
    hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/acid_vectorization.q.out
Modified:
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
    hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java
    hive/branches/branch-0.14/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
    hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization.q
    hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization.q.out

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java?rev=1631539&r1=1631538&r2=1631539&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java Mon Oct 13 20:43:32 2014
@@ -1134,7 +1134,7 @@ public class OrcInputFormat  implements 
 
       @Override
       public ObjectInspector getObjectInspector() {
-        return ((StructObjectInspector) reader.getObjectInspector())
+        return ((StructObjectInspector) records.getObjectInspector())
             .getAllStructFieldRefs().get(OrcRecordUpdater.ROW)
             .getFieldObjectInspector();
       }

Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java?rev=1631539&r1=1631538&r2=1631539&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcRawRecordMerger.java Mon Oct 13 20:43:32 2014
@@ -28,6 +28,7 @@ import org.apache.hadoop.hive.common.Val
 import org.apache.hadoop.hive.ql.io.AcidInputFormat;
 import org.apache.hadoop.hive.ql.io.AcidUtils;
 import org.apache.hadoop.hive.ql.io.RecordIdentifier;
+import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
@@ -37,9 +38,10 @@ import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 
 import java.io.IOException;
+import java.util.ArrayDeque;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
+import java.util.Deque;
 import java.util.List;
 import java.util.Map;
 import java.util.TreeMap;
@@ -627,8 +629,16 @@ public class OrcRawRecordMerger implemen
 
     // Parse the configuration parameters
     ArrayList<String> columnNames = new ArrayList<String>();
+    Deque<Integer> virtualColumns = new ArrayDeque<Integer>();
     if (columnNameProperty != null && columnNameProperty.length() > 0) {
-      Collections.addAll(columnNames, columnNameProperty.split(","));
+      String[] colNames = columnNameProperty.split(",");
+      for (int i = 0; i < colNames.length; i++) {
+        if (VirtualColumn.VIRTUAL_COLUMN_NAMES.contains(colNames[i])) {
+          virtualColumns.addLast(i);
+        } else {
+          columnNames.add(colNames[i]);
+        }
+      }
     }
     if (columnTypeProperty == null) {
       // Default type: all string
@@ -644,6 +654,9 @@ public class OrcRawRecordMerger implemen
 
     ArrayList<TypeInfo> fieldTypes =
         TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+    while (virtualColumns.size() > 0) {
+      fieldTypes.remove(virtualColumns.removeLast());
+    }
     StructTypeInfo rowType = new StructTypeInfo();
     rowType.setAllStructFieldNames(columnNames);
     rowType.setAllStructFieldTypeInfos(fieldTypes);

Modified: hive/branches/branch-0.14/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java?rev=1631539&r1=1631538&r2=1631539&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java (original)
+++ hive/branches/branch-0.14/ql/src/test/org/apache/hadoop/hive/ql/io/orc/TestInputOutputFormat.java Mon Oct 13 20:43:32 2014
@@ -73,6 +73,7 @@ import org.apache.hadoop.hive.ql.io.sarg
 import org.apache.hadoop.hive.ql.plan.MapWork;
 import org.apache.hadoop.hive.ql.plan.PartitionDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
+import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
 import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
@@ -1266,6 +1267,8 @@ public class TestInputOutputFormat {
     }
     conf.set("hive.io.file.readcolumn.ids", columnIds.toString());
     conf.set("partition_columns", "p");
+    conf.set(serdeConstants.LIST_COLUMNS, columnNames.toString());
+    conf.set(serdeConstants.LIST_COLUMN_TYPES, columnTypes.toString());
     MockFileSystem fs = (MockFileSystem) warehouseDir.getFileSystem(conf);
     fs.clear();
 

Modified: hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization.q
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization.q?rev=1631539&r1=1631538&r2=1631539&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization.q (original)
+++ hive/branches/branch-0.14/ql/src/test/queries/clientpositive/acid_vectorization.q Mon Oct 13 20:43:32 2014
@@ -12,3 +12,5 @@ set hive.vectorized.execution.enabled=tr
 update acid_vectorized set b = 'foo' where b = 'bar';
 set hive.vectorized.execution.enabled=true;
 delete from acid_vectorized where b = 'foo';
+set hive.vectorized.execution.enabled=true;
+select a, b from acid_vectorized order by a, b;

Modified: hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization.q.out?rev=1631539&r1=1631538&r2=1631539&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization.q.out (original)
+++ hive/branches/branch-0.14/ql/src/test/results/clientpositive/acid_vectorization.q.out Mon Oct 13 20:43:32 2014
@@ -42,3 +42,21 @@ POSTHOOK: query: delete from acid_vector
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@acid_vectorized
 POSTHOOK: Output: default@acid_vectorized
+PREHOOK: query: select a, b from acid_vectorized order by a, b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+POSTHOOK: query: select a, b from acid_vectorized order by a, b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+-1073279343	oj1YrV5Wa
+-1073051226	A34p7oRr2WvUJNf
+-1072910839	0iqrc5
+-1072081801	dPkN74F7
+-1072076362	2uLyD28144vklju213J1mr
+-1071480828	aw724t8c5558x2xneC624
+-1071363017	Anj0oF
+-1070883071	0ruyd6Y50JpdGRf6HqD
+-1070551679	iUR3Q
+-1069736047	k17Am8uPHWk02cEf1jet

Added: hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/acid_vectorization.q.out
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/acid_vectorization.q.out?rev=1631539&view=auto
==============================================================================
--- hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/acid_vectorization.q.out (added)
+++ hive/branches/branch-0.14/ql/src/test/results/clientpositive/tez/acid_vectorization.q.out Mon Oct 13 20:43:32 2014
@@ -0,0 +1,62 @@
+PREHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: CREATE TABLE acid_vectorized(a INT, b STRING) CLUSTERED BY(a) INTO 2 BUCKETS STORED AS ORC TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acid_vectorized
+PREHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@alltypesorc
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: insert into table acid_vectorized select cint, cstring1 from alltypesorc where cint is not null order by cint limit 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@alltypesorc
+POSTHOOK: Output: default@acid_vectorized
+POSTHOOK: Lineage: acid_vectorized.a SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ]
+POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ]
+PREHOOK: query: insert into table acid_vectorized values (1, 'bar')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: insert into table acid_vectorized values (1, 'bar')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@acid_vectorized
+POSTHOOK: Lineage: acid_vectorized.a EXPRESSION [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: acid_vectorized.b SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: update acid_vectorized set b = 'foo' where b = 'bar'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: update acid_vectorized set b = 'foo' where b = 'bar'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized
+POSTHOOK: Output: default@acid_vectorized
+PREHOOK: query: delete from acid_vectorized where b = 'foo'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized
+PREHOOK: Output: default@acid_vectorized
+POSTHOOK: query: delete from acid_vectorized where b = 'foo'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized
+POSTHOOK: Output: default@acid_vectorized
+PREHOOK: query: select a, b from acid_vectorized order by a, b
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+POSTHOOK: query: select a, b from acid_vectorized order by a, b
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acid_vectorized
+#### A masked pattern was here ####
+-1073279343	oj1YrV5Wa
+-1073051226	A34p7oRr2WvUJNf
+-1072910839	0iqrc5
+-1072081801	dPkN74F7
+-1072076362	2uLyD28144vklju213J1mr
+-1071480828	aw724t8c5558x2xneC624
+-1071363017	Anj0oF
+-1070883071	0ruyd6Y50JpdGRf6HqD
+-1070551679	iUR3Q
+-1069736047	k17Am8uPHWk02cEf1jet