You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by am...@apache.org on 2014/09/09 11:05:27 UTC
svn commit: r1623717 - in /hive/trunk: ql/src/test/queries/clientpositive/
ql/src/test/results/clientpositive/
serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/
serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/
Author: amareshwari
Date: Tue Sep 9 09:05:26 2014
New Revision: 1623717
URL: http://svn.apache.org/r1623717
Log:
HIVE-2390 : Expand support for union types (Suma Shivaprasad via amareshwari)
Added:
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java
Modified:
hive/trunk/ql/src/test/queries/clientpositive/input_lazyserde.q
hive/trunk/ql/src/test/results/clientpositive/input_lazyserde.q.out
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java
Modified: hive/trunk/ql/src/test/queries/clientpositive/input_lazyserde.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/input_lazyserde.q?rev=1623717&r1=1623716&r2=1623717&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/input_lazyserde.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/input_lazyserde.q Tue Sep 9 09:05:26 2014
@@ -30,3 +30,7 @@ CREATE TABLE dest1(a map<string,string>)
INSERT OVERWRITE TABLE dest1 SELECT src_thrift.mstringstring FROM src_thrift DISTRIBUTE BY 1;
SELECT * from dest1;
+CREATE TABLE destBin(a UNIONTYPE<int, double, array<string>, struct<col1:int,col2:string>>) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' STORED AS SEQUENCEFILE;
+INSERT OVERWRITE TABLE destBin SELECT create_union( CASE WHEN key < 100 THEN 0 WHEN key < 200 THEN 1 WHEN key < 300 THEN 2 WHEN key < 400 THEN 3 ELSE 0 END, key, 2.0, array("one","two"), struct(5,"five")) FROM srcbucket2;
+SELECT * from destBin ORDER BY a;
+DROP TABLE destBin;
Modified: hive/trunk/ql/src/test/results/clientpositive/input_lazyserde.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/input_lazyserde.q.out?rev=1623717&r1=1623716&r2=1623717&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/input_lazyserde.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/input_lazyserde.q.out Tue Sep 9 09:05:26 2014
@@ -219,3 +219,536 @@ NULL
{"key_7":"value_7"}
{"key_8":"value_8"}
{"key_9":"value_9"}
+PREHOOK: query: CREATE TABLE destBin(a UNIONTYPE<int, double, array<string>, struct<col1:int,col2:string>>) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' STORED AS SEQUENCEFILE
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@destBin
+POSTHOOK: query: CREATE TABLE destBin(a UNIONTYPE<int, double, array<string>, struct<col1:int,col2:string>>) ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe' STORED AS SEQUENCEFILE
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@destBin
+PREHOOK: query: INSERT OVERWRITE TABLE destBin SELECT create_union( CASE WHEN key < 100 THEN 0 WHEN key < 200 THEN 1 WHEN key < 300 THEN 2 WHEN key < 400 THEN 3 ELSE 0 END, key, 2.0, array("one","two"), struct(5,"five")) FROM srcbucket2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srcbucket2
+PREHOOK: Output: default@destbin
+POSTHOOK: query: INSERT OVERWRITE TABLE destBin SELECT create_union( CASE WHEN key < 100 THEN 0 WHEN key < 200 THEN 1 WHEN key < 300 THEN 2 WHEN key < 400 THEN 3 ELSE 0 END, key, 2.0, array("one","two"), struct(5,"five")) FROM srcbucket2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srcbucket2
+POSTHOOK: Output: default@destbin
+POSTHOOK: Lineage: destbin.a EXPRESSION [(srcbucket2)srcbucket2.FieldSchema(name:key, type:int, comment:null), ]
+PREHOOK: query: SELECT * from destBin ORDER BY a
+PREHOOK: type: QUERY
+PREHOOK: Input: default@destbin
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * from destBin ORDER BY a
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@destbin
+#### A masked pattern was here ####
+{0:0}
+{0:0}
+{0:0}
+{0:10}
+{0:11}
+{0:12}
+{0:12}
+{0:15}
+{0:15}
+{0:17}
+{0:18}
+{0:18}
+{0:19}
+{0:20}
+{0:24}
+{0:24}
+{0:26}
+{0:26}
+{0:27}
+{0:28}
+{0:2}
+{0:30}
+{0:33}
+{0:34}
+{0:35}
+{0:35}
+{0:35}
+{0:37}
+{0:37}
+{0:400}
+{0:401}
+{0:401}
+{0:401}
+{0:401}
+{0:401}
+{0:402}
+{0:403}
+{0:403}
+{0:403}
+{0:404}
+{0:404}
+{0:406}
+{0:406}
+{0:406}
+{0:406}
+{0:407}
+{0:409}
+{0:409}
+{0:409}
+{0:411}
+{0:413}
+{0:413}
+{0:414}
+{0:414}
+{0:417}
+{0:417}
+{0:417}
+{0:418}
+{0:419}
+{0:41}
+{0:421}
+{0:424}
+{0:424}
+{0:427}
+{0:429}
+{0:429}
+{0:42}
+{0:42}
+{0:430}
+{0:430}
+{0:430}
+{0:431}
+{0:431}
+{0:431}
+{0:432}
+{0:435}
+{0:436}
+{0:437}
+{0:438}
+{0:438}
+{0:438}
+{0:439}
+{0:439}
+{0:43}
+{0:443}
+{0:444}
+{0:446}
+{0:448}
+{0:449}
+{0:44}
+{0:452}
+{0:453}
+{0:454}
+{0:454}
+{0:454}
+{0:455}
+{0:457}
+{0:458}
+{0:458}
+{0:459}
+{0:459}
+{0:460}
+{0:462}
+{0:462}
+{0:463}
+{0:463}
+{0:466}
+{0:466}
+{0:466}
+{0:467}
+{0:468}
+{0:468}
+{0:468}
+{0:468}
+{0:469}
+{0:469}
+{0:469}
+{0:469}
+{0:469}
+{0:470}
+{0:472}
+{0:475}
+{0:477}
+{0:478}
+{0:478}
+{0:479}
+{0:47}
+{0:480}
+{0:480}
+{0:480}
+{0:481}
+{0:482}
+{0:483}
+{0:484}
+{0:485}
+{0:487}
+{0:489}
+{0:489}
+{0:489}
+{0:489}
+{0:490}
+{0:491}
+{0:492}
+{0:492}
+{0:493}
+{0:494}
+{0:495}
+{0:496}
+{0:497}
+{0:498}
+{0:498}
+{0:498}
+{0:4}
+{0:51}
+{0:51}
+{0:53}
+{0:54}
+{0:57}
+{0:58}
+{0:58}
+{0:5}
+{0:5}
+{0:5}
+{0:64}
+{0:65}
+{0:66}
+{0:67}
+{0:67}
+{0:69}
+{0:70}
+{0:70}
+{0:70}
+{0:72}
+{0:72}
+{0:74}
+{0:76}
+{0:76}
+{0:77}
+{0:78}
+{0:80}
+{0:82}
+{0:83}
+{0:83}
+{0:84}
+{0:84}
+{0:85}
+{0:86}
+{0:87}
+{0:8}
+{0:90}
+{0:90}
+{0:90}
+{0:92}
+{0:95}
+{0:95}
+{0:96}
+{0:97}
+{0:97}
+{0:98}
+{0:98}
+{0:9}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{1:2.0}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{2:["one","two"]}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+{3:{"col1":5,"col2":"five"}}
+PREHOOK: query: DROP TABLE destBin
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@destbin
+PREHOOK: Output: default@destbin
+POSTHOOK: query: DROP TABLE destBin
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@destbin
+POSTHOOK: Output: default@destbin
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java?rev=1623717&r1=1623716&r2=1623717&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java Tue Sep 9 09:05:26 2014
@@ -23,6 +23,7 @@ import java.util.List;
import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector;
import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector;
import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryUnionObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
@@ -106,6 +107,8 @@ public final class LazyBinaryFactory {
return new LazyBinaryArray((LazyBinaryListObjectInspector) oi);
case STRUCT:
return new LazyBinaryStruct((LazyBinaryStructObjectInspector) oi);
+ case UNION:
+ return new LazyBinaryUnion((LazyBinaryUnionObjectInspector) oi);
}
throw new RuntimeException("Hive LazyBinarySerDe Internal error.");
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java?rev=1623717&r1=1623716&r2=1623717&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java Tue Sep 9 09:05:26 2014
@@ -43,8 +43,8 @@ import org.apache.hadoop.hive.serde2.laz
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
@@ -281,6 +281,13 @@ public class LazyBinarySerDe extends Abs
}
}
+ private static void serializeUnion(RandomAccessOutput byteStream, Object obj,
+ UnionObjectInspector uoi, BooleanRef warnedOnceNullMapKey) throws SerDeException {
+ byte tag = uoi.getTag(obj);
+ byteStream.write(tag);
+ serialize(byteStream, uoi.getField(obj), uoi.getObjectInspectors().get(tag), false, warnedOnceNullMapKey);
+ }
+
private static void serializeText(
RandomAccessOutput byteStream, Text t, boolean skipLengthPrefix) {
/* write byte size of the string which is a vint */
@@ -544,24 +551,31 @@ public class LazyBinarySerDe extends Abs
}
return;
}
- case STRUCT: {
+ case STRUCT:
+ case UNION:{
int byteSizeStart = 0;
- int structStart = 0;
+ int typeStart = 0;
if (!skipLengthPrefix) {
// 1/ reserve spaces for the byte size of the struct
// which is a integer and takes four bytes
byteSizeStart = byteStream.getLength();
byteStream.reserve(4);
- structStart = byteStream.getLength();
+ typeStart = byteStream.getLength();
+ }
+
+ if (ObjectInspector.Category.STRUCT.equals(objInspector.getCategory()) ) {
+ // 2/ serialize the struct
+ serializeStruct(byteStream, obj, (StructObjectInspector) objInspector, warnedOnceNullMapKey);
+ } else {
+ // 2/ serialize the union
+ serializeUnion(byteStream, obj, (UnionObjectInspector) objInspector, warnedOnceNullMapKey);
}
- // 2/ serialize the struct
- serializeStruct(byteStream, obj, (StructObjectInspector) objInspector, warnedOnceNullMapKey);
if (!skipLengthPrefix) {
// 3/ update the byte size of the struct
- int structEnd = byteStream.getLength();
- int structSize = structEnd - structStart;
- writeSizeAtOffset(byteStream, byteSizeStart, structSize);
+ int typeEnd = byteStream.getLength();
+ int typeSize = typeEnd - typeStart;
+ writeSizeAtOffset(byteStream, byteSizeStart, typeSize);
}
return;
}
Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java?rev=1623717&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java Tue Sep 9 09:05:26 2014
@@ -0,0 +1,196 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+ package org.apache.hadoop.hive.serde2.lazybinary;
+
+ import java.util.ArrayList;
+ import java.util.Arrays;
+ import java.util.List;
+
+ import org.apache.commons.logging.Log;
+ import org.apache.commons.logging.LogFactory;
+ import org.apache.hadoop.hive.serde2.SerDeStatsStruct;
+ import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+ import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryUnionObjectInspector;
+ import org.apache.hadoop.hive.serde2.objectinspector.*;
+
+/**
+ * LazyBinaryUnion is serialized as follows: start TAG FIELD end bytes[] ->
+ * |-----|---------|--- ... ---|-----|---------|
+ *
+ * Section TAG is one byte, corresponding to tag of set union field
+ * FIELD is a LazyBinaryObject corresponding to set union field value.
+ *
+ */
+ public class LazyBinaryUnion extends
+ LazyBinaryNonPrimitive<LazyBinaryUnionObjectInspector> implements SerDeStatsStruct {
+
+ private static Log LOG = LogFactory.getLog(LazyBinaryUnion.class.getName());
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed;
+
+ /**
+ * Size of serialized data
+ */
+ long serializedSize;
+
+ /**
+ * The field of the union which contains the value.
+ */
+ LazyBinaryObject field;
+
+ boolean fieldInited;
+
+ /**
+ * The start positions and lengths of union fields. Only valid when the data
+ * is parsed.
+ */
+ int fieldStart;
+ int fieldLength;
+
+ byte tag;
+
+ final LazyBinaryUtils.VInt vInt = new LazyBinaryUtils.VInt();
+
+ /**
+ * Construct a LazyBinaryUnion object with an ObjectInspector.
+ */
+ protected LazyBinaryUnion(LazyBinaryUnionObjectInspector oi) {
+ super(oi);
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ serializedSize = length;
+ fieldInited = false;
+ field = null;
+ cachedObject = null;
+ }
+
+ LazyBinaryUtils.RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();
+ boolean missingFieldWarned = false;
+ boolean extraFieldWarned = false;
+
+ /**
+ * Parse the byte[] and fill fieldStart, fieldLength, fieldInited and
+ * fieldIsNull.
+ */
+ private void parse() {
+ LazyBinaryUnionObjectInspector uoi = (LazyBinaryUnionObjectInspector) oi;
+
+ /**
+ * Please note that tag is followed by field
+ */
+ int unionByteEnd = start + length;
+ byte[] byteArr = this.bytes.getData();
+
+ //Tag of union field is the first byte to be parsed
+ final int tagEnd = start + 1;
+ tag = byteArr[start];
+ field = LazyBinaryFactory.createLazyBinaryObject(uoi.getObjectInspectors().get(tag));
+ //Check the union field's length and offset
+ LazyBinaryUtils.checkObjectByteInfo(uoi.getObjectInspectors().get(tag), byteArr, tagEnd, recordInfo, vInt);
+ fieldStart = tagEnd + recordInfo.elementOffset;
+ // Add 1 for tag
+ fieldLength = recordInfo.elementSize;
+
+ // Extra bytes at the end?
+ if (!extraFieldWarned && (fieldStart + fieldLength) < unionByteEnd) {
+ extraFieldWarned = true;
+ LOG.warn("Extra bytes detected at the end of the row! Ignoring similar "
+ + "problems.");
+ }
+
+ // Missing fields?
+ if (!missingFieldWarned && (fieldStart + fieldLength) > unionByteEnd) {
+ missingFieldWarned = true;
+ LOG.info("Missing fields! Expected 1 fields but "
+ + "only got " + field + "! Ignoring similar problems.");
+ }
+
+ parsed = true;
+ }
+
+ /**
+ * Get the set field out of the union.
+ *
+ * If the field is a primitive field, return the actual object. Otherwise
+ * return the LazyObject. This is because PrimitiveObjectInspector does not
+ * have control over the object used by the user - the user simply directly
+ * use the Object instead of going through Object
+ * PrimitiveObjectInspector.get(Object).
+ * @return The field as a LazyObject
+ */
+ public Object getField() {
+ if (!parsed) {
+ parse();
+ }
+ if(cachedObject == null) {
+ return uncheckedGetField();
+ }
+ return cachedObject;
+ }
+
+ /**
+ * Get the field out of the row without checking parsed. This is called by
+ * both getField and getFieldsAsList.
+ *
+ * @param fieldID
+ * The id of the field starting from 0.
+ * @return The value of the field
+ */
+ private Object uncheckedGetField() {
+ // Test the length first so in most cases we avoid doing a byte[]
+ // comparison.
+ if (!fieldInited) {
+ fieldInited = true;
+ field.init(bytes, fieldStart, fieldLength);
+ }
+ cachedObject = field.getObject();
+ return field.getObject();
+ }
+
+ Object cachedObject;
+
+ @Override
+ public Object getObject() {
+ return this;
+ }
+
+ public long getRawDataSerializedSize() {
+ return serializedSize;
+ }
+
+ /**
+ * Get the set field's tag
+ *
+ *
+ * @return The tag of the field set in the union
+ */
+ public byte getTag() {
+ if (!parsed) {
+ parse();
+ }
+ return tag;
+ }
+ }
+
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java?rev=1623717&r1=1623716&r2=1623717&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java Tue Sep 9 09:05:26 2014
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.WritableUtils;
@@ -226,6 +227,7 @@ public final class LazyBinaryUtils {
case LIST:
case MAP:
case STRUCT:
+ case UNION:
recordInfo.elementOffset = 4;
recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset);
break;
@@ -474,6 +476,20 @@ public final class LazyBinaryUtils {
fieldObjectInspectors);
break;
}
+ case UNION: {
+ UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
+ final List<TypeInfo> fieldTypeInfos = unionTypeInfo.getAllUnionObjectTypeInfos();
+ List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(
+ fieldTypeInfos.size());
+ for (int i = 0; i < fieldTypeInfos.size(); i++) {
+ fieldObjectInspectors
+ .add(getLazyBinaryObjectInspectorFromTypeInfo(fieldTypeInfos
+ .get(i)));
+ }
+ result = LazyBinaryObjectInspectorFactory
+ .getLazyBinaryUnionObjectInspector(fieldObjectInspectors);
+ break;
+ }
default: {
result = null;
}
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java?rev=1623717&r1=1623716&r2=1623717&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java Tue Sep 9 09:05:26 2014
@@ -40,6 +40,9 @@ public final class LazyBinaryObjectInspe
static ConcurrentHashMap<ArrayList<Object>, LazyBinaryStructObjectInspector> cachedLazyBinaryStructObjectInspector =
new ConcurrentHashMap<ArrayList<Object>, LazyBinaryStructObjectInspector>();
+ static ConcurrentHashMap<ArrayList<Object>, LazyBinaryUnionObjectInspector> cachedLazyBinaryUnionObjectInspector =
+ new ConcurrentHashMap<ArrayList<Object>, LazyBinaryUnionObjectInspector>();
+
public static LazyBinaryStructObjectInspector getLazyBinaryStructObjectInspector(
List<String> structFieldNames,
List<ObjectInspector> structFieldObjectInspectors) {
@@ -66,6 +69,20 @@ public final class LazyBinaryObjectInspe
return result;
}
+ public static LazyBinaryUnionObjectInspector getLazyBinaryUnionObjectInspector(
+ List<ObjectInspector> unionFieldObjectInspectors) {
+ ArrayList<Object> signature = new ArrayList<Object>(1);
+ signature.add(unionFieldObjectInspectors);
+
+ LazyBinaryUnionObjectInspector result = cachedLazyBinaryUnionObjectInspector
+ .get(signature);
+ if (result == null) {
+ result = new LazyBinaryUnionObjectInspector(unionFieldObjectInspectors);
+ cachedLazyBinaryUnionObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
static ConcurrentHashMap<ArrayList<Object>, LazyBinaryListObjectInspector> cachedLazyBinaryListObjectInspector =
new ConcurrentHashMap<ArrayList<Object>, LazyBinaryListObjectInspector>();
Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java?rev=1623717&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java Tue Sep 9 09:05:26 2014
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary.objectinspector;
+
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUnion;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.UnionObject;
+
+import java.util.List;
+
+/**
+ * ObjectInspector for LazyBinaryUnion.
+ *
+ * @see org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUnion
+ */
+public class LazyBinaryUnionObjectInspector extends
+ StandardUnionObjectInspector {
+
+ protected LazyBinaryUnionObjectInspector() {
+ super();
+ }
+ protected LazyBinaryUnionObjectInspector(List<ObjectInspector> unionFieldObjectInspectors) {
+ super(unionFieldObjectInspectors);
+ }
+
+ /**
+ * Return the tag of the object.
+ */
+ public byte getTag(Object o) {
+ if (o == null) {
+ return -1;
+ }
+ LazyBinaryUnion lazyBinaryUnion = (LazyBinaryUnion) o;
+ return lazyBinaryUnion.getTag();
+ }
+
+ /**
+ * Return the field based on the tag value associated with the Object.
+ */
+ public Object getField(Object o) {
+ if (o == null) {
+ return null;
+ }
+ LazyBinaryUnion lazyBinaryUnion = (LazyBinaryUnion) o;
+ return lazyBinaryUnion.getField();
+ }
+}