You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jv...@apache.org on 2011/08/08 21:07:19 UTC

svn commit: r1155059 - in /hive/trunk/ql/src: java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java test/queries/clientpositive/index_auto_empty.q test/results/clientpositive/index_auto_empty.q.out

Author: jvs
Date: Mon Aug  8 19:07:19 2011
New Revision: 1155059

URL: http://svn.apache.org/viewvc?rev=1155059&view=rev
Log:
HIVE-2138. Exception when no splits returned from index.
(Syed Albiz via jvs)


Added:
    hive/trunk/ql/src/test/queries/clientpositive/index_auto_empty.q
    hive/trunk/ql/src/test/results/clientpositive/index_auto_empty.q.out
Modified:
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java?rev=1155059&r1=1155058&r2=1155059&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java Mon Aug  8 19:07:19 2011
@@ -124,6 +124,10 @@ public class HiveIndexedInputFormat exte
       }
 
       Set<String> inputFiles = hiveIndexResult.buckets.keySet();
+      if (inputFiles == null || inputFiles.size() <= 0) {
+        // return empty splits if index results were empty
+        return new InputSplit[0];
+      }
       Iterator<String> iter = inputFiles.iterator();
       while(iter.hasNext()) {
         String path = iter.next();

Added: hive/trunk/ql/src/test/queries/clientpositive/index_auto_empty.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/index_auto_empty.q?rev=1155059&view=auto
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/index_auto_empty.q (added)
+++ hive/trunk/ql/src/test/queries/clientpositive/index_auto_empty.q Mon Aug  8 19:07:19 2011
@@ -0,0 +1,21 @@
+-- Test to ensure that an empty index result is propagated correctly
+
+-- Create temp, and populate it with some values in src.
+CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE;
+
+-- Build an index on temp.
+CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD;
+ALTER INDEX temp_index ON temp REBUILD;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=true;
+SET hive.optimize.index.filter.compact.minsize=0;
+
+-- query should not return any values
+SELECT * FROM default__temp_temp_index__ WHERE key = 86;
+EXPLAIN SELECT * FROM temp WHERE key  = 86;
+SELECT * FROM temp WHERE key  = 86;
+
+SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat;
+SET hive.optimize.index.filter=false;
+DROP table temp;

Added: hive/trunk/ql/src/test/results/clientpositive/index_auto_empty.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/index_auto_empty.q.out?rev=1155059&view=auto
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/index_auto_empty.q.out (added)
+++ hive/trunk/ql/src/test/results/clientpositive/index_auto_empty.q.out Mon Aug  8 19:07:19 2011
@@ -0,0 +1,110 @@
+PREHOOK: query: -- Test to ensure that an empty index result is propagated correctly
+
+-- Create temp, and populate it with some values in src.
+CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: -- Test to ensure that an empty index result is propagated correctly
+
+-- Create temp, and populate it with some values in src.
+CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@temp
+PREHOOK: query: -- Build an index on temp.
+CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD
+PREHOOK: type: CREATEINDEX
+POSTHOOK: query: -- Build an index on temp.
+CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD
+POSTHOOK: type: CREATEINDEX
+PREHOOK: query: ALTER INDEX temp_index ON temp REBUILD
+PREHOOK: type: ALTERINDEX_REBUILD
+PREHOOK: Input: default@temp
+PREHOOK: Output: default@default__temp_temp_index__
+POSTHOOK: query: ALTER INDEX temp_index ON temp REBUILD
+POSTHOOK: type: ALTERINDEX_REBUILD
+POSTHOOK: Input: default@temp
+POSTHOOK: Output: default@default__temp_temp_index__
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+PREHOOK: query: -- query should not return any values
+SELECT * FROM default__temp_temp_index__ WHERE key = 86
+PREHOOK: type: QUERY
+PREHOOK: Input: default@default__temp_temp_index__
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-05_12-02-36_415_5588033903434501946/-mr-10000
+POSTHOOK: query: -- query should not return any values
+SELECT * FROM default__temp_temp_index__ WHERE key = 86
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@default__temp_temp_index__
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-05_12-02-36_415_5588033903434501946/-mr-10000
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+PREHOOK: query: EXPLAIN SELECT * FROM temp WHERE key  = 86
+PREHOOK: type: QUERY
+POSTHOOK: query: EXPLAIN SELECT * FROM temp WHERE key  = 86
+POSTHOOK: type: QUERY
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME temp))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        temp 
+          TableScan
+            alias: temp
+            filterExpr:
+                expr: (key = 86)
+                type: boolean
+            Filter Operator
+              predicate:
+                  expr: (key = 86)
+                  type: boolean
+              Select Operator
+                expressions:
+                      expr: key
+                      type: string
+                      expr: val
+                      type: string
+                outputColumnNames: _col0, _col1
+                File Output Operator
+                  compressed: false
+                  GlobalTableId: 0
+                  table:
+                      input format: org.apache.hadoop.mapred.TextInputFormat
+                      output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+PREHOOK: query: SELECT * FROM temp WHERE key  = 86
+PREHOOK: type: QUERY
+PREHOOK: Input: default@temp
+PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-05_12-02-42_594_420605994856244839/-mr-10000
+POSTHOOK: query: SELECT * FROM temp WHERE key  = 86
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@temp
+POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-05_12-02-42_594_420605994856244839/-mr-10000
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]
+PREHOOK: query: DROP table temp
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@temp
+PREHOOK: Output: default@temp
+POSTHOOK: query: DROP table temp
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@temp
+POSTHOOK: Output: default@temp
+POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ]
+POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ]