You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2009/11/02 20:05:05 UTC

svn commit: r832059 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/io/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/

Author: namit
Date: Mon Nov  2 19:05:05 2009
New Revision: 832059

URL: http://svn.apache.org/viewvc?rev=832059&view=rev
Log:
HIVE-796. RCFile results missing columns from UNION ALL
(He Yongqiang via namit)


Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/rcfile_union.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_union.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=832059&r1=832058&r2=832059&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Mon Nov  2 19:05:05 2009
@@ -228,6 +228,9 @@
     HIVE-902. Fix cli.sh to work with hadoop versions less than 20.
     (Carl Steinbach via zshao)
 
+    HIVE-796. RCFile results missing columns from UNION ALL
+    (He Yongqiang via namit)
+
 Release 0.4.0 -  Unreleased
 
   INCOMPATIBLE CHANGES

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java?rev=832059&r1=832058&r2=832059&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java Mon Nov  2 19:05:05 2009
@@ -197,6 +197,35 @@
    * 
    */
   public static void setReadColumnIDs(Configuration conf, ArrayList<Integer> ids) {
+    String id = toReadColumnIDString(ids);
+    setReadColumnIDConf(conf, id);
+  }
+
+  /**
+   * Sets read columns' ids(start from zero) for RCFile's Reader. Once a column
+   * is included in the list, RCFile's reader will not skip its value.
+   * 
+   */
+  public static void appendReadColumnIDs(Configuration conf, ArrayList<Integer> ids) {
+    String id = toReadColumnIDString(ids);
+    String old = conf.get(READ_COLUMN_IDS_CONF_STR, null);
+    String newConfStr = id;
+    if(old !=null )
+      newConfStr = newConfStr + StringUtils.COMMA_STR + old;
+    
+    setReadColumnIDConf(conf, newConfStr);
+  }
+  
+  private static void setReadColumnIDConf(Configuration conf, String id) {
+    if (id == null || id.length() <= 0) {
+      conf.set(READ_COLUMN_IDS_CONF_STR, "");
+      return;
+    }
+
+    conf.set(READ_COLUMN_IDS_CONF_STR, id);
+  }
+
+  private static String toReadColumnIDString(ArrayList<Integer> ids) {
     String id = null;
     if (ids != null) {
       for (int i = 0; i < ids.size(); i++) {
@@ -207,13 +236,7 @@
         }
       }
     }
-
-    if (id == null || id.length() <= 0) {
-      conf.set(READ_COLUMN_IDS_CONF_STR, "");
-      return;
-    }
-
-    conf.set(READ_COLUMN_IDS_CONF_STR, id);
+    return id;
   }
 
   /**

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=832059&r1=832058&r2=832059&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Mon Nov  2 19:05:05 2009
@@ -311,7 +311,7 @@
         TableScanOperator tableScan = (TableScanOperator) op;
         ArrayList<Integer> list = tableScan.getNeededColumnIDs();
         if (list != null)
-          HiveFileFormatUtils.setReadColumnIDs(jobConf, list);
+          HiveFileFormatUtils.appendReadColumnIDs(jobConf, list);
         else
           HiveFileFormatUtils.setFullyReadColumns(jobConf);
       }

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/rcfile_union.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/rcfile_union.q?rev=832059&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/rcfile_union.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/rcfile_union.q Mon Nov  2 19:05:05 2009
@@ -0,0 +1,15 @@
+DROP TABLE rcfile_unionTable;
+CREATE table rcfile_unionTable (b STRING, c STRING)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+STORED AS RCFILE;
+
+FROM src
+INSERT OVERWRITE TABLE rcfile_unionTable SELECT src.key, src.value LIMIT 10;
+
+SELECT * FROM (
+SELECT b AS cola FROM rcfile_unionTable
+UNION ALL
+SELECT c AS cola FROM rcfile_unionTable) s;
+
+DROP TABLE rcfile_unionTable;
\ No newline at end of file

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_union.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_union.q.out?rev=832059&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_union.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/rcfile_union.q.out Mon Nov  2 19:05:05 2009
@@ -0,0 +1,64 @@
+PREHOOK: query: DROP TABLE rcfile_unionTable
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE rcfile_unionTable
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE table rcfile_unionTable (b STRING, c STRING)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+STORED AS RCFILE
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE table rcfile_unionTable (b STRING, c STRING)
+ROW FORMAT SERDE
+  'org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe'
+STORED AS RCFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@rcfile_unionTable
+PREHOOK: query: FROM src
+INSERT OVERWRITE TABLE rcfile_unionTable SELECT src.key, src.value LIMIT 10
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+PREHOOK: Output: default@rcfile_uniontable
+POSTHOOK: query: FROM src
+INSERT OVERWRITE TABLE rcfile_unionTable SELECT src.key, src.value LIMIT 10
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+POSTHOOK: Output: default@rcfile_uniontable
+PREHOOK: query: SELECT * FROM (
+SELECT b AS cola FROM rcfile_unionTable
+UNION ALL
+SELECT c AS cola FROM rcfile_unionTable) s
+PREHOOK: type: QUERY
+PREHOOK: Input: default@rcfile_uniontable
+PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/1325887249/10000
+POSTHOOK: query: SELECT * FROM (
+SELECT b AS cola FROM rcfile_unionTable
+UNION ALL
+SELECT c AS cola FROM rcfile_unionTable) s
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@rcfile_uniontable
+POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/tmp/1325887249/10000
+val_238
+238
+val_86
+86
+val_311
+311
+val_27
+27
+val_165
+165
+val_409
+409
+val_255
+255
+val_278
+278
+val_98
+98
+val_484
+484
+PREHOOK: query: DROP TABLE rcfile_unionTable
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE rcfile_unionTable
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: default@rcfile_uniontable