You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2009/01/20 21:27:15 UTC

svn commit: r736092 - in /hadoop/hive/trunk: CHANGES.txt data/files/null.txt ql/src/test/queries/clientpositive/input21.q ql/src/test/results/clientpositive/input21.q.out serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDeFieldList.java

Author: zshao
Date: Tue Jan 20 12:27:15 2009
New Revision: 736092

URL: http://svn.apache.org/viewvc?rev=736092&view=rev
Log:
HIVE-235. Fixed DynamicSerDe to work with null values with Thrift Protocols that can have missing fields for null values. (zshao)

Added:
    hadoop/hive/trunk/data/files/null.txt
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/input21.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/input21.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDeFieldList.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=736092&r1=736091&r2=736092&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Jan 20 12:27:15 2009
@@ -80,8 +80,11 @@
 
   BUG FIXES
 
-    HIVE-222. Fixed Group by on a combination of disitinct and non distinct aggregates.
-    (Ashish Thusoo via zshao)
+    HIVE-235. Fixed DynamicSerDe to work with null values with Thrift
+    Protocols that can have missing fields for null values. (zshao)
+
+    HIVE-222. Fixed Group by on a combination of disitinct and non distinct
+    aggregates. (Ashish Thusoo via zshao)
 
     HIVE-161. Fixed xpath x.y when x is a null list. (zshao)
 

Added: hadoop/hive/trunk/data/files/null.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/data/files/null.txt?rev=736092&view=auto
==============================================================================
--- hadoop/hive/trunk/data/files/null.txt (added)
+++ hadoop/hive/trunk/data/files/null.txt Tue Jan 20 12:27:15 2009
@@ -0,0 +1,10 @@
+1.01same0
+1.01same1
+1.01same2
+1.01same3
+1.01same4
+\N1same5
+\N\Nsame6
+1.0\Nsame7
+1.01same8
+1.01same9

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/input21.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/input21.q?rev=736092&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/input21.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/input21.q Tue Jan 20 12:27:15 2009
@@ -0,0 +1,10 @@
+DROP TABLE src_null;
+
+CREATE TABLE src_null(a STRING, b STRING, c STRING, d STRING) STORED AS TEXTFILE;
+LOAD DATA LOCAL INPATH '../data/files/null.txt' INTO TABLE src_null;
+
+EXPLAIN SELECT * FROM src_null DISTRIBUTE BY c SORT BY d;
+
+SELECT * FROM src_null DISTRIBUTE BY c SORT BY d;
+
+DROP TABLE src_null;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/input21.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/input21.q.out?rev=736092&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/input21.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/input21.q.out Tue Jan 20 12:27:15 2009
@@ -0,0 +1,63 @@
+ABSTRACT SYNTAX TREE:
+  (TOK_QUERY (TOK_FROM (TOK_TABREF src_null)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_DISTRIBUTEBY c) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC d))))
+
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-1
+    Map Reduce
+      Alias -> Map Operator Tree:
+        src_null 
+            Select Operator
+              expressions:
+                    expr: a
+                    type: string
+                    expr: b
+                    type: string
+                    expr: c
+                    type: string
+                    expr: d
+                    type: string
+              Reduce Output Operator
+                key expressions:
+                      expr: 3
+                      type: string
+                sort order: +
+                Map-reduce partition columns:
+                      expr: 2
+                      type: string
+                tag: -1
+                value expressions:
+                      expr: 0
+                      type: string
+                      expr: 1
+                      type: string
+                      expr: 2
+                      type: string
+                      expr: 3
+                      type: string
+      Reduce Operator Tree:
+        Extract
+          File Output Operator
+            compressed: false
+            table:
+                input format: org.apache.hadoop.mapred.TextInputFormat
+                output format: org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+
+
+1.0	1	same	0
+1.0	1	same	1
+1.0	1	same	2
+1.0	1	same	3
+1.0	1	same	4
+NULL	1	same	5
+NULL	NULL	same	6
+1.0	NULL	same	7
+1.0	1	same	8
+1.0	1	same	9

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDeFieldList.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDeFieldList.java?rev=736092&r1=736091&r2=736092&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDeFieldList.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/dynamic_type/DynamicSerDeFieldList.java Tue Jan 20 12:27:15 2009
@@ -119,17 +119,18 @@
    */
   protected boolean isRealThrift = false;
 
+  protected boolean[] fieldsPresent; 
   public Object deserialize(Object reuse, TProtocol iprot)  throws SerDeException, TException, IllegalAccessException {
     ArrayList<Object> struct = null;
 
     if (reuse == null) {
       struct = new ArrayList<Object>(this.getNumFields());
-      for(int i=0; i<this.getNumFields(); i++) {
+      for(int i=0; i<ordered_types.length; i++) {
         struct.add(null);
       }
     } else {
       struct = (ArrayList<Object>) reuse;
-      assert(struct.size() == this.getNumFields());
+      assert(struct.size() == ordered_types.length);
     }
 
     boolean fastSkips = iprot instanceof org.apache.hadoop.hive.serde2.thrift.SkippableTProtocol;
@@ -137,6 +138,11 @@
     // may need to strip away the STOP marker when in thrift mode
     boolean stopSeen = false;
 
+    if (fieldsPresent == null) {
+      fieldsPresent = new boolean[ordered_types.length];
+    }
+    Arrays.fill(fieldsPresent, false);
+
     // Read the fields.
     for(int i = 0; i < this.getNumFields(); i++) {
       DynamicSerDeTypeBase mt = null;
@@ -189,7 +195,15 @@
       if(thrift_mode) {
         iprot.readFieldEnd();
       }
+      fieldsPresent[orderedId] = true;
     }
+    
+    for(int i = 0; i < ordered_types.length; i++) {
+      if (!fieldsPresent[i]) {
+        struct.set(i, null);
+      }
+    }
+    
     if(thrift_mode && !stopSeen) {
       // strip off the STOP marker, which may be left if all the fields were in the serialization
       iprot.readFieldBegin();