You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by am...@apache.org on 2014/09/09 11:05:27 UTC

svn commit: r1623717 - in /hive/trunk: ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/

Author: amareshwari
Date: Tue Sep  9 09:05:26 2014
New Revision: 1623717

URL: http://svn.apache.org/r1623717
Log:
HIVE-2390 : Expand support for union types (Suma Shivaprasad via amareshwari)

Added:
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java
Modified:
    hive/trunk/ql/src/test/queries/clientpositive/input_lazyserde.q
    hive/trunk/ql/src/test/results/clientpositive/input_lazyserde.q.out
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java

Modified: hive/trunk/ql/src/test/queries/clientpositive/input_lazyserde.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/input_lazyserde.q?rev=1623717&r1=1623716&r2=1623717&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/input_lazyserde.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/input_lazyserde.q Tue Sep  9 09:05:26 2014
@@ -30,3 +30,7 @@ CREATE TABLE dest1(a map<string,string>)
 INSERT OVERWRITE TABLE dest1 SELECT src_thrift.mstringstring FROM src_thrift DISTRIBUTE BY 1;
 SELECT * from dest1;
 
+CREATE TABLE destBin(a UNIONTYPE<int, double, array<string>, struct<col1:int,col2:string>>) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' STORED AS SEQUENCEFILE;
+INSERT OVERWRITE TABLE destBin SELECT create_union( CASE WHEN key < 100 THEN 0 WHEN key < 200 THEN 1 WHEN key < 300 THEN 2 WHEN key < 400 THEN 3 ELSE 0 END, key, 2.0, array("one","two"), struct(5,"five")) FROM srcbucket2;
+SELECT * from destBin ORDER BY a;
+DROP TABLE destBin;

Modified: hive/trunk/ql/src/test/results/clientpositive/input_lazyserde.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/input_lazyserde.q.out?rev=1623717&r1=1623716&r2=1623717&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/input_lazyserde.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/input_lazyserde.q.out Tue Sep  9 09:05:26 2014
@@ -219,3 +219,536 @@ NULL
 {"key_7":"value_7"}
 {"key_8":"value_8"}
 {"key_9":"value_9"}
+PREHOOK: query: CREATE TABLE destBin(a UNIONTYPE<int, double, array<string>, struct<col1:int,col2:string>>) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' STORED AS SEQUENCEFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@destBin
+POSTHOOK: query: CREATE TABLE destBin(a UNIONTYPE<int, double, array<string>, struct<col1:int,col2:string>>) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' STORED AS SEQUENCEFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@destBin
+PREHOOK: query: INSERT OVERWRITE TABLE destBin SELECT create_union( CASE WHEN key < 100 THEN 0 WHEN key < 200 THEN 1 WHEN key < 300 THEN 2 WHEN key < 400 THEN 3 ELSE 0 END, key, 2.0, array("one","two"), struct(5,"five")) FROM srcbucket2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket2
+PREHOOK: Output: default@destbin
+POSTHOOK: query: INSERT OVERWRITE TABLE destBin SELECT create_union( CASE WHEN key < 100 THEN 0 WHEN key < 200 THEN 1 WHEN key < 300 THEN 2 WHEN key < 400 THEN 3 ELSE 0 END, key, 2.0, array("one","two"), struct(5,"five")) FROM srcbucket2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket2
+POSTHOOK: Output: default@destbin
+POSTHOOK: Lineage: destbin.a EXPRESSION [(srcbucket2)srcbucket2.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: SELECT * from destBin ORDER BY a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@destbin
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * from destBin ORDER BY a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@destbin
+#### A masked pattern was here ####
+{0:0}
+{0:0}
+{0:0}
+{0:10}
+{0:11}
+{0:12}
+{0:12}
+{0:15}
+{0:15}
+{0:17}
+{0:18}
+{0:18}
+{0:19}
+{0:20}
+{0:24}
+{0:24}
+{0:26}
+{0:26}
+{0:27}
+{0:28}
+{0:2}
+{0:30}
+{0:33}
+{0:34}
+{0:35}
+{0:35}
+{0:35}
+{0:37}
+{0:37}
+{0:400}
+{0:401}
+{0:401}
+{0:401}
+{0:401}
+{0:401}
+{0:402}
+{0:403}
+{0:403}
+{0:403}
+{0:404}
+{0:404}
+{0:406}
+{0:406}
+{0:406}
+{0:406}
+{0:407}
+{0:409}
+{0:409}
+{0:409}
+{0:411}
+{0:413}
+{0:413}
+{0:414}
+{0:414}
+{0:417}
+{0:417}
+{0:417}
+{0:418}
+{0:419}
+{0:41}
+{0:421}
+{0:424}
+{0:424}
+{0:427}
+{0:429}
+{0:429}
+{0:42}
+{0:42}
+{0:430}
+{0:430}
+{0:430}
+{0:431}
+{0:431}
+{0:431}
+{0:432}
+{0:435}
+{0:436}
+{0:437}
+{0:438}
+{0:438}
+{0:438}
+{0:439}
+{0:439}
+{0:43}
+{0:443}
+{0:444}
+{0:446}
+{0:448}
+{0:449}
+{0:44}
+{0:452}
+{0:453}
+{0:454}
+{0:454}
+{0:454}
+{0:455}
+{0:457}
+{0:458}
+{0:458}
+{0:459}
+{0:459}
+{0:460}
+{0:462}
+{0:462}
+{0:463}
+{0:463}
+{0:466}
+{0:466}
+{0:466}
+{0:467}
+{0:468}
+{0:468}
+{0:468}
+{0:468}
+{0:469}
+{0:469}
+{0:469}
+{0:469}
+{0:469}
+{0:470}
+{0:472}
+{0:475}
+{0:477}
+{0:478}
+{0:478}
+{0:479}
+{0:47}
+{0:480}
+{0:480}
+{0:480}
+{0:481}
+{0:482}
+{0:483}
+{0:484}
+{0:485}
+{0:487}
+{0:489}
+{0:489}
+{0:489}
+{0:489}
+{0:490}
+{0:491}
+{0:492}
+{0:492}
+{0:493}
+{0:494}
+{0:495}
+{0:496}
+{0:497}
+{0:498}
+{0:498}
+{0:498}
+{0:4}
+{0:51}
+{0:51}
+{0:53}
+{0:54}
+{0:57}
+{0:58}
+{0:58}
+{0:5}
+{0:5}
+{0:5}
+{0:64}
+{0:65}
+{0:66}
+{0:67}
+{0:67}
+{0:69}
+{0:70}
+{0:70}
+{0:70}
+{0:72}
+{0:72}
+{0:74}
+{0:76}
+{0:76}
+{0:77}
+{0:78}
+{0:80}
+{0:82}
+{0:83}
+{0:83}
+{0:84}
+{0:84}
+{0:85}
+{0:86}
+{0:87}
+{0:8}
+{0:90}
+{0:90}
+{0:90}
+{0:92}
+{0:95}
+{0:95}
+{0:96}
+{0:97}
+{0:97}
+{0:98}
+{0:98}
+{0:9}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+PREHOOK: query: DROP TABLE destBin
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@destbin
+PREHOOK: Output: default@destbin
+POSTHOOK: query: DROP TABLE destBin
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@destbin
+POSTHOOK: Output: default@destbin

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java?rev=1623717&r1=1623716&r2=1623717&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java Tue Sep  9 09:05:26 2014
@@ -23,6 +23,7 @@ import java.util.List;
 import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector;
 import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector;
 import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryUnionObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
@@ -106,6 +107,8 @@ public final class LazyBinaryFactory {
       return new LazyBinaryArray((LazyBinaryListObjectInspector) oi);
     case STRUCT:
       return new LazyBinaryStruct((LazyBinaryStructObjectInspector) oi);
+    case UNION:
+      return new LazyBinaryUnion((LazyBinaryUnionObjectInspector) oi);
     }
 
     throw new RuntimeException("Hive LazyBinarySerDe Internal error.");

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java?rev=1623717&r1=1623716&r2=1623717&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java Tue Sep  9 09:05:26 2014
@@ -43,8 +43,8 @@ import org.apache.hadoop.hive.serde2.laz
 import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
@@ -281,6 +281,13 @@ public class LazyBinarySerDe extends Abs
     }
   }
 
+  private static void serializeUnion(RandomAccessOutput byteStream, Object obj,
+    UnionObjectInspector uoi, BooleanRef warnedOnceNullMapKey) throws SerDeException {
+    byte tag = uoi.getTag(obj);
+    byteStream.write(tag);
+    serialize(byteStream, uoi.getField(obj), uoi.getObjectInspectors().get(tag), false, warnedOnceNullMapKey);
+  }
+
   private static void serializeText(
       RandomAccessOutput byteStream, Text t, boolean skipLengthPrefix) {
     /* write byte size of the string which is a vint */
@@ -544,24 +551,31 @@ public class LazyBinarySerDe extends Abs
       }
       return;
     }
-    case STRUCT: {
+    case STRUCT:
+    case UNION:{
       int byteSizeStart = 0;
-      int structStart = 0;
+      int typeStart = 0;
       if (!skipLengthPrefix) {
         // 1/ reserve spaces for the byte size of the struct
         // which is a integer and takes four bytes
         byteSizeStart = byteStream.getLength();
         byteStream.reserve(4);
-        structStart = byteStream.getLength();
+        typeStart = byteStream.getLength();
+      }
+
+      if (ObjectInspector.Category.STRUCT.equals(objInspector.getCategory()) ) {
+        // 2/ serialize the struct
+        serializeStruct(byteStream, obj, (StructObjectInspector) objInspector, warnedOnceNullMapKey);
+      } else {
+        // 2/ serialize the union
+        serializeUnion(byteStream, obj, (UnionObjectInspector) objInspector, warnedOnceNullMapKey);
       }
-      // 2/ serialize the struct
-      serializeStruct(byteStream, obj, (StructObjectInspector) objInspector, warnedOnceNullMapKey);
 
       if (!skipLengthPrefix) {
         // 3/ update the byte size of the struct
-        int structEnd = byteStream.getLength();
-        int structSize = structEnd - structStart;
-        writeSizeAtOffset(byteStream, byteSizeStart, structSize);
+        int typeEnd = byteStream.getLength();
+        int typeSize = typeEnd - typeStart;
+        writeSizeAtOffset(byteStream, byteSizeStart, typeSize);
       }
       return;
     }

Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java?rev=1623717&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java Tue Sep  9 09:05:26 2014
@@ -0,0 +1,196 @@
+/**
+   * Licensed to the Apache Software Foundation (ASF) under one
+   * or more contributor license agreements.  See the NOTICE file
+   * distributed with this work for additional information
+   * regarding copyright ownership.  The ASF licenses this file
+   * to you under the Apache License, Version 2.0 (the
+   * "License"); you may not use this file except in compliance
+   * with the License.  You may obtain a copy of the License at
+   *
+   *     http://www.apache.org/licenses/LICENSE-2.0
+   *
+   * Unless required by applicable law or agreed to in writing, software
+   * distributed under the License is distributed on an "AS IS" BASIS,
+   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   * See the License for the specific language governing permissions and
+   * limitations under the License.
+   */
+  package org.apache.hadoop.hive.serde2.lazybinary;
+
+  import java.util.ArrayList;
+  import java.util.Arrays;
+  import java.util.List;
+
+  import org.apache.commons.logging.Log;
+  import org.apache.commons.logging.LogFactory;
+  import org.apache.hadoop.hive.serde2.SerDeStatsStruct;
+  import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+  import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryUnionObjectInspector;
+  import org.apache.hadoop.hive.serde2.objectinspector.*;
+
+/**
+ * LazyBinaryUnion is serialized as follows: start TAG FIELD end bytes[] ->
+ * |-----|---------|--- ... ---|-----|---------|
+ *
+ * Section TAG is one byte, corresponding to tag of set union field
+ * FIELD is a LazyBinaryObject corresponding to set union field value.
+ *
+ */
+  public class LazyBinaryUnion extends
+          LazyBinaryNonPrimitive<LazyBinaryUnionObjectInspector> implements SerDeStatsStruct {
+
+    private static Log LOG = LogFactory.getLog(LazyBinaryUnion.class.getName());
+
+    /**
+     * Whether the data is already parsed or not.
+     */
+    boolean parsed;
+
+    /**
+     * Size of serialized data
+     */
+    long serializedSize;
+
+    /**
+     * The field of the union which contains the value.
+     */
+    LazyBinaryObject field;
+
+    boolean fieldInited;
+
+    /**
+     * The start positions and lengths of union fields. Only valid when the data
+     * is parsed.
+     */
+    int fieldStart;
+    int fieldLength;
+
+    byte tag;
+
+    final LazyBinaryUtils.VInt vInt = new LazyBinaryUtils.VInt();
+
+    /**
+     * Construct a LazyBinaryUnion object with an ObjectInspector.
+     */
+    protected LazyBinaryUnion(LazyBinaryUnionObjectInspector oi) {
+      super(oi);
+    }
+
+    @Override
+    public void init(ByteArrayRef bytes, int start, int length) {
+      super.init(bytes, start, length);
+      parsed = false;
+      serializedSize = length;
+      fieldInited = false;
+      field = null;
+      cachedObject = null;
+    }
+
+    LazyBinaryUtils.RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();
+    boolean missingFieldWarned = false;
+    boolean extraFieldWarned = false;
+
+    /**
+     * Parse the byte[] and fill fieldStart, fieldLength, fieldInited and
+     * fieldIsNull.
+     */
+    private void parse() {
+      LazyBinaryUnionObjectInspector uoi = (LazyBinaryUnionObjectInspector) oi;
+
+      /**
+       * Please note that tag is followed by field
+       */
+      int unionByteEnd = start + length;
+      byte[] byteArr = this.bytes.getData();
+
+      //Tag of union field is the first byte to be parsed
+      final int tagEnd = start + 1;
+      tag = byteArr[start];
+      field = LazyBinaryFactory.createLazyBinaryObject(uoi.getObjectInspectors().get(tag));
+      //Check the union field's length and offset
+      LazyBinaryUtils.checkObjectByteInfo(uoi.getObjectInspectors().get(tag), byteArr, tagEnd, recordInfo, vInt);
+      fieldStart = tagEnd + recordInfo.elementOffset;
+      // Add 1 for tag
+      fieldLength = recordInfo.elementSize;
+
+      // Extra bytes at the end?
+      if (!extraFieldWarned &&  (fieldStart + fieldLength) < unionByteEnd) {
+        extraFieldWarned = true;
+        LOG.warn("Extra bytes detected at the end of the row! Ignoring similar "
+                         + "problems.");
+      }
+
+      // Missing fields?
+      if (!missingFieldWarned && (fieldStart + fieldLength) > unionByteEnd) {
+        missingFieldWarned = true;
+        LOG.info("Missing fields! Expected 1 fields but "
+                         + "only got " + field + "! Ignoring similar problems.");
+      }
+
+      parsed = true;
+    }
+
+    /**
+     * Get the set field out of the union.
+     *
+     * If the field is a primitive field, return the actual object. Otherwise
+     * return the LazyObject. This is because PrimitiveObjectInspector does not
+     * have control over the object used by the user - the user simply directly
+     * use the Object instead of going through Object
+     * PrimitiveObjectInspector.get(Object).
+     * @return The field as a LazyObject
+     */
+    public Object getField() {
+      if (!parsed) {
+        parse();
+      }
+      if(cachedObject == null) {
+        return uncheckedGetField();
+      }
+      return cachedObject;
+    }
+
+    /**
+     * Get the field out of the row without checking parsed. This is called by
+     * both getField and getFieldsAsList.
+     *
+     * @param fieldID
+     *          The id of the field starting from 0.
+     * @return The value of the field
+     */
+    private Object uncheckedGetField() {
+      // Test the length first so in most cases we avoid doing a byte[]
+      // comparison.
+      if (!fieldInited) {
+        fieldInited = true;
+        field.init(bytes, fieldStart, fieldLength);
+      }
+      cachedObject = field.getObject();
+      return field.getObject();
+    }
+
+    Object cachedObject;
+
+    @Override
+    public Object getObject() {
+      return this;
+    }
+
+    public long getRawDataSerializedSize() {
+      return serializedSize;
+    }
+
+  /**
+   * Get the set field's tag
+   *
+   *
+   * @return The tag of the field set in the union
+   */
+  public byte getTag() {
+    if (!parsed) {
+      parse();
+    }
+    return tag;
+  }
+  }
+

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java?rev=1623717&r1=1623716&r2=1623717&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java Tue Sep  9 09:05:26 2014
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.WritableUtils;
 
@@ -226,6 +227,7 @@ public final class LazyBinaryUtils {
     case LIST:
     case MAP:
     case STRUCT:
+    case UNION:
       recordInfo.elementOffset = 4;
       recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset);
       break;
@@ -474,6 +476,20 @@ public final class LazyBinaryUtils {
             fieldObjectInspectors);
         break;
       }
+      case UNION: {
+        UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
+        final List<TypeInfo> fieldTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
+        List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(
+          fieldTypeInfos.size());
+        for (int i = 0; i < fieldTypeInfos.size(); i++) {
+          fieldObjectInspectors
+            .add(getLazyBinaryObjectInspectorFromTypeInfo(fieldTypeInfos
+                                                            .get(i)));
+        }
+        result = LazyBinaryObjectInspectorFactory
+            .getLazyBinaryUnionObjectInspector(fieldObjectInspectors);
+        break;
+      }
       default: {
         result = null;
       }

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java?rev=1623717&r1=1623716&r2=1623717&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java Tue Sep  9 09:05:26 2014
@@ -40,6 +40,9 @@ public final class LazyBinaryObjectInspe
   static ConcurrentHashMap<ArrayList<Object>, LazyBinaryStructObjectInspector> cachedLazyBinaryStructObjectInspector =
       new ConcurrentHashMap<ArrayList<Object>, LazyBinaryStructObjectInspector>();
 
+  static ConcurrentHashMap<ArrayList<Object>, LazyBinaryUnionObjectInspector> cachedLazyBinaryUnionObjectInspector =
+          new ConcurrentHashMap<ArrayList<Object>, LazyBinaryUnionObjectInspector>();
+
   public static LazyBinaryStructObjectInspector getLazyBinaryStructObjectInspector(
       List<String> structFieldNames,
       List<ObjectInspector> structFieldObjectInspectors) {
@@ -66,6 +69,20 @@ public final class LazyBinaryObjectInspe
     return result;
   }
 
+  public static LazyBinaryUnionObjectInspector getLazyBinaryUnionObjectInspector(
+          List<ObjectInspector> unionFieldObjectInspectors) {
+    ArrayList<Object> signature = new ArrayList<Object>(1);
+    signature.add(unionFieldObjectInspectors);
+
+    LazyBinaryUnionObjectInspector result = cachedLazyBinaryUnionObjectInspector
+            .get(signature);
+    if (result == null) {
+      result = new LazyBinaryUnionObjectInspector(unionFieldObjectInspectors);
+      cachedLazyBinaryUnionObjectInspector.put(signature, result);
+    }
+    return result;
+  }
+
   static ConcurrentHashMap<ArrayList<Object>, LazyBinaryListObjectInspector> cachedLazyBinaryListObjectInspector =
       new ConcurrentHashMap<ArrayList<Object>, LazyBinaryListObjectInspector>();
 

Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java?rev=1623717&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java Tue Sep  9 09:05:26 2014
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary.objectinspector;
+
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUnion;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.UnionObject;
+
+import java.util.List;
+
+/**
+ * ObjectInspector for LazyBinaryUnion.
+ *
+ * @see org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUnion
+ */
+public class LazyBinaryUnionObjectInspector extends
+    StandardUnionObjectInspector {
+
+  protected LazyBinaryUnionObjectInspector() {
+    super();
+  }
+  protected LazyBinaryUnionObjectInspector(List<ObjectInspector> unionFieldObjectInspectors) {
+    super(unionFieldObjectInspectors);
+  }
+
+  /**
+   * Return the tag of the object.
+   */
+  public byte getTag(Object o) {
+    if (o == null) {
+      return -1;
+    }
+    LazyBinaryUnion lazyBinaryUnion = (LazyBinaryUnion) o;
+    return lazyBinaryUnion.getTag();
+  }
+
+  /**
+   * Return the field based on the tag value associated with the Object.
+   */
+  public Object getField(Object o) {
+    if (o == null) {
+      return null;
+    }
+    LazyBinaryUnion lazyBinaryUnion = (LazyBinaryUnion) o;
+    return lazyBinaryUnion.getField();
+  }
+}