You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/07/30 19:09:30 UTC
svn commit: r1508528 - in /hive/trunk: data/files/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
serde/src/java/org/apache/hadoop/hive/serde2/avro/
serde/src/test/org/apache/hadoop/hive/serde2/avro/
Author: hashutosh
Date: Tue Jul 30 17:09:30 2013
New Revision: 1508528
URL: http://svn.apache.org/r1508528
Log:
HIVE-3264 : Add support for binary dataype to AvroSerde (Eli Reisman & Mark Wagner via Ashutosh Chauhan)
Modified:
hive/trunk/data/files/csv.txt
hive/trunk/ql/src/test/queries/clientpositive/avro_nullable_fields.q
hive/trunk/ql/src/test/results/clientpositive/avro_nullable_fields.q.out
hive/trunk/ql/src/test/results/clientpositive/avro_schema_literal.q.out
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java
hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java
Modified: hive/trunk/data/files/csv.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/csv.txt?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/data/files/csv.txt (original)
+++ hive/trunk/data/files/csv.txt Tue Jul 30 17:09:30 2013
@@ -1,18 +1,18 @@
-why hello there,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-another record,98,4,101,9999999,false,99.89,0.00000009,beta,Earth#101,1134:false:wazzup,RED,\N,6:7:8:9:10,54:55:56
-third record,45,5,102,999999999,true,89.99,0.00000000000009,alpha:gamma,Earth#237:Bob#723,102:false:BNL,GREEN,\N,11:12:13,57:58:59
-\N,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,\N,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,\N,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,\N,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,\N,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,\N,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,\N,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,\N,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,\N,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,\N,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,\N,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,\N,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,\N,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,\N,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,\N
+why hello there,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+another record,98,4,101,9999999,false,99.89,0.00000009,beta,Earth#101,1134:false:wazzup,RED,\N,,ef
+third record,45,5,102,999999999,true,89.99,0.00000000000009,alpha:gamma,Earth#237:Bob#723,102:false:BNL,GREEN,\N,,hi
+\N,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,\N,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,\N,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,\N,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,\N,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,1412341,\N,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,1412341,true,\N,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,1412341,true,42.43,\N,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,1412341,true,42.43,85.23423424,\N,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,\N,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,\N,BLUE,72,,bc
+string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,\N,72,,bc
+string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,\N,,bc
+string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,\N,bc
+string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,\N
Modified: hive/trunk/ql/src/test/queries/clientpositive/avro_nullable_fields.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/avro_nullable_fields.q?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/avro_nullable_fields.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/avro_nullable_fields.q Tue Jul 30 17:09:30 2013
@@ -12,8 +12,8 @@ CREATE TABLE test_serializer(string1 STR
struct1 STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>,
enum1 STRING,
nullableint INT,
- bytes1 ARRAY<TINYINT>,
- fixed1 ARRAY<TINYINT>)
+ bytes1 BINARY,
+ fixed1 BINARY)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n'
STORED AS TEXTFILE;
Modified: hive/trunk/ql/src/test/results/clientpositive/avro_nullable_fields.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/avro_nullable_fields.q.out?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/avro_nullable_fields.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/avro_nullable_fields.q.out Tue Jul 30 17:09:30 2013
@@ -12,8 +12,8 @@ CREATE TABLE test_serializer(string1 STR
struct1 STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>,
enum1 STRING,
nullableint INT,
- bytes1 ARRAY<TINYINT>,
- fixed1 ARRAY<TINYINT>)
+ bytes1 BINARY,
+ fixed1 BINARY)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n'
STORED AS TEXTFILE
PREHOOK: type: CREATETABLE
@@ -31,8 +31,8 @@ CREATE TABLE test_serializer(string1 STR
struct1 STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>,
enum1 STRING,
nullableint INT,
- bytes1 ARRAY<TINYINT>,
- fixed1 ARRAY<TINYINT>)
+ bytes1 BINARY,
+ fixed1 BINARY)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n'
STORED AS TEXTFILE
POSTHOOK: type: CREATETABLE
@@ -124,10 +124,10 @@ POSTHOOK: Input: default@test_serializer
POSTHOOK: Output: default@as_avro
POSTHOOK: Lineage: as_avro.bigint1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bigint1, type:bigint, comment:null), ]
POSTHOOK: Lineage: as_avro.boolean1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:boolean1, type:boolean, comment:null), ]
-POSTHOOK: Lineage: as_avro.bytes1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bytes1, type:array<tinyint>, comment:null), ]
+POSTHOOK: Lineage: as_avro.bytes1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bytes1, type:binary, comment:null), ]
POSTHOOK: Lineage: as_avro.double1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:double1, type:double, comment:null), ]
POSTHOOK: Lineage: as_avro.enum1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:enum1, type:string, comment:null), ]
-POSTHOOK: Lineage: as_avro.fixed1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:fixed1, type:array<tinyint>, comment:null), ]
+POSTHOOK: Lineage: as_avro.fixed1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:fixed1, type:binary, comment:null), ]
POSTHOOK: Lineage: as_avro.float1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:float1, type:float, comment:null), ]
POSTHOOK: Lineage: as_avro.int1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:int1, type:int, comment:null), ]
POSTHOOK: Lineage: as_avro.list1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:list1, type:array<string>, comment:null), ]
@@ -147,10 +147,10 @@ POSTHOOK: Input: default@as_avro
#### A masked pattern was here ####
POSTHOOK: Lineage: as_avro.bigint1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bigint1, type:bigint, comment:null), ]
POSTHOOK: Lineage: as_avro.boolean1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:boolean1, type:boolean, comment:null), ]
-POSTHOOK: Lineage: as_avro.bytes1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bytes1, type:array<tinyint>, comment:null), ]
+POSTHOOK: Lineage: as_avro.bytes1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bytes1, type:binary, comment:null), ]
POSTHOOK: Lineage: as_avro.double1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:double1, type:double, comment:null), ]
POSTHOOK: Lineage: as_avro.enum1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:enum1, type:string, comment:null), ]
-POSTHOOK: Lineage: as_avro.fixed1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:fixed1, type:array<tinyint>, comment:null), ]
+POSTHOOK: Lineage: as_avro.fixed1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:fixed1, type:binary, comment:null), ]
POSTHOOK: Lineage: as_avro.float1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:float1, type:float, comment:null), ]
POSTHOOK: Lineage: as_avro.int1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:int1, type:int, comment:null), ]
POSTHOOK: Lineage: as_avro.list1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:list1, type:array<string>, comment:null), ]
@@ -160,21 +160,21 @@ POSTHOOK: Lineage: as_avro.smallint1 EXP
POSTHOOK: Lineage: as_avro.string1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:string1, type:string, comment:null), ]
POSTHOOK: Lineage: as_avro.struct1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:struct1, type:struct<sint:int,sboolean:boolean,sstring:string>, comment:null), ]
POSTHOOK: Lineage: as_avro.tinyint1 EXPRESSION [(test_serializer)test_serializer.FieldSchema(name:tinyint1, type:tinyint, comment:null), ]
-why hello there 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 [0,1,2,3,4,5] [50,51,53]
-another record 98 4 101 9999999 false 99.89 9.0E-8 ["beta"] {"Earth":101} {"sint":1134,"sboolean":false,"sstring":"wazzup"} RED NULL [6,7,8,9,10] [54,55,56]
-third record 45 5 102 999999999 true 89.99 9.0E-14 ["alpha","gamma"] {"Earth":237,"Bob":723} {"sint":102,"sboolean":false,"sstring":"BNL"} GREEN NULL [11,12,13] [57,58,59]
-NULL 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 [0,1,2,3,4,5] [50,51,53]
-string NULL 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 [0,1,2,3,4,5] [50,51,53]
-string 42 NULL 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 [0,1,2,3,4,5] [50,51,53]
-string 42 3 NULL 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 [0,1,2,3,4,5] [50,51,53]
-string 42 3 100 NULL true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 [0,1,2,3,4,5] [50,51,53]
-string 42 3 100 1412341 NULL 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 [0,1,2,3,4,5] [50,51,53]
-string 42 3 100 1412341 true NULL 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 [0,1,2,3,4,5] [50,51,53]
-string 42 3 100 1412341 true 42.43 NULL ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 [0,1,2,3,4,5] [50,51,53]
-string 42 3 100 1412341 true 42.43 85.23423424 NULL {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 [0,1,2,3,4,5] [50,51,53]
-string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] NULL {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 [0,1,2,3,4,5] [50,51,53]
-string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} NULL BLUE 72 [0,1,2,3,4,5] [50,51,53]
-string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} NULL 72 [0,1,2,3,4,5] [50,51,53]
-string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE NULL [0,1,2,3,4,5] [50,51,53]
-string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 NULL [50,51,53]
-string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 [0,1,2,3,4,5] NULL
+why hello there 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 bc
+another record 98 4 101 9999999 false 99.89 9.0E-8 ["beta"] {"Earth":101} {"sint":1134,"sboolean":false,"sstring":"wazzup"} RED NULL ef
+third record 45 5 102 999999999 true 89.99 9.0E-14 ["alpha","gamma"] {"Earth":237,"Bob":723} {"sint":102,"sboolean":false,"sstring":"BNL"} GREEN NULL hi
+NULL 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 bc
+string NULL 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 bc
+string 42 NULL 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 bc
+string 42 3 NULL 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 bc
+string 42 3 100 NULL true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 bc
+string 42 3 100 1412341 NULL 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 bc
+string 42 3 100 1412341 true NULL 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 bc
+string 42 3 100 1412341 true 42.43 NULL ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 bc
+string 42 3 100 1412341 true 42.43 85.23423424 NULL {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 bc
+string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] NULL {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 bc
+string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} NULL BLUE 72 bc
+string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} NULL 72 bc
+string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE NULL bc
+string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 NULL bc
+string 42 3 100 1412341 true 42.43 85.23423424 ["alpha","beta","gamma"] {"Earth":42,"Bob":31,"Control":86} {"sint":17,"sboolean":true,"sstring":"Abe Linkedin"} BLUE 72 NULL
Modified: hive/trunk/ql/src/test/results/clientpositive/avro_schema_literal.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/avro_schema_literal.q.out?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/avro_schema_literal.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/avro_schema_literal.q.out Tue Jul 30 17:09:30 2013
@@ -77,5 +77,5 @@ struct1 struct<sint:int,sbo
union1 uniontype<float,boolean,string> from deserializer
enum1 string from deserializer
nullableint int from deserializer
-bytes1 array<tinyint> from deserializer
-fixed1 array<tinyint> from deserializer
+bytes1 binary from deserializer
+fixed1 binary from deserializer
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java Tue Jul 30 17:09:30 2013
@@ -17,8 +17,19 @@
*/
package org.apache.hadoop.hive.serde2.avro;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericData.Fixed;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
@@ -32,21 +43,12 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.io.Writable;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
class AvroDeserializer {
private static final Log LOG = LogFactory.getLog(AvroDeserializer.class);
/**
@@ -62,7 +64,7 @@ class AvroDeserializer {
private final ByteArrayOutputStream baos = new ByteArrayOutputStream();
private final GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>();
private BinaryDecoder binaryDecoder = null;
- private InstanceCache<ReaderWriterSchemaPair, GenericDatumReader<GenericRecord>> gdrCache
+ private final InstanceCache<ReaderWriterSchemaPair, GenericDatumReader<GenericRecord>> gdrCache
= new InstanceCache<ReaderWriterSchemaPair, GenericDatumReader<GenericRecord>>() {
@Override
protected GenericDatumReader<GenericRecord> makeInstance(ReaderWriterSchemaPair hv) {
@@ -112,13 +114,15 @@ class AvroDeserializer {
*/
public Object deserialize(List<String> columnNames, List<TypeInfo> columnTypes,
Writable writable, Schema readerSchema) throws AvroSerdeException {
- if(!(writable instanceof AvroGenericRecordWritable))
+ if(!(writable instanceof AvroGenericRecordWritable)) {
throw new AvroSerdeException("Expecting a AvroGenericRecordWritable");
+ }
- if(row == null || row.size() != columnNames.size())
+ if(row == null || row.size() != columnNames.size()) {
row = new ArrayList<Object>(columnNames.size());
- else
+ } else {
row.clear();
+ }
AvroGenericRecordWritable recordWritable = (AvroGenericRecordWritable) writable;
GenericRecord r = recordWritable.getRecord();
@@ -127,7 +131,9 @@ class AvroDeserializer {
if(!r.getSchema().equals(readerSchema)) {
LOG.warn("Received different schemas. Have to re-encode: " +
r.getSchema().toString(false));
- if(reEncoder == null) reEncoder = new SchemaReEncoder();
+ if(reEncoder == null) {
+ reEncoder = new SchemaReEncoder();
+ }
r = reEncoder.reencode(r, readerSchema);
}
@@ -156,25 +162,49 @@ class AvroDeserializer {
// Klaxon! Klaxon! Klaxon!
// Avro requires NULLable types to be defined as unions of some type T
// and NULL. This is annoying and we're going to hide it from the user.
- if(AvroSerdeUtils.isNullableType(recordSchema))
+ if(AvroSerdeUtils.isNullableType(recordSchema)) {
return deserializeNullableUnion(datum, recordSchema, columnType);
+ }
- if(columnType == TypeInfoFactory.stringTypeInfo)
- return datum.toString(); // To workaround AvroUTF8
- // This also gets us around the Enum issue since we just take the value
- // and convert it to a string. Yay!
switch(columnType.getCategory()) {
case STRUCT:
return deserializeStruct((GenericData.Record) datum, (StructTypeInfo) columnType);
- case UNION:
+ case UNION:
return deserializeUnion(datum, recordSchema, (UnionTypeInfo) columnType);
case LIST:
return deserializeList(datum, recordSchema, (ListTypeInfo) columnType);
case MAP:
return deserializeMap(datum, recordSchema, (MapTypeInfo) columnType);
+ case PRIMITIVE:
+ return deserializePrimitive(datum, recordSchema, (PrimitiveTypeInfo) columnType);
default:
- return datum; // Simple type.
+ throw new AvroSerdeException("Unknown TypeInfo: " + columnType.getCategory());
+ }
+ }
+
+ private Object deserializePrimitive(Object datum, Schema recordSchema,
+ PrimitiveTypeInfo columnType) throws AvroSerdeException {
+ switch (columnType.getPrimitiveCategory()){
+ case STRING:
+ return datum.toString(); // To workaround AvroUTF8
+ // This also gets us around the Enum issue since we just take the value
+ // and convert it to a string. Yay!
+ case BINARY:
+ if (recordSchema.getType() == Type.FIXED){
+ Fixed fixed = (Fixed) datum;
+ return fixed.bytes();
+ } else if (recordSchema.getType() == Type.BYTES){
+ ByteBuffer bb = (ByteBuffer) datum;
+ bb.rewind();
+ byte[] result = new byte[bb.limit()];
+ bb.get(result);
+ return result;
+ } else {
+ throw new AvroSerdeException("Unexpected Avro schema for Binary TypeInfo: " + recordSchema.getType());
+ }
+ default:
+ return datum;
}
}
@@ -186,8 +216,9 @@ class AvroDeserializer {
TypeInfo columnType) throws AvroSerdeException {
int tag = GenericData.get().resolveUnion(recordSchema, datum); // Determine index of value
Schema schema = recordSchema.getTypes().get(tag);
- if(schema.getType().equals(Schema.Type.NULL))
+ if(schema.getType().equals(Schema.Type.NULL)) {
return null;
+ }
return worker(datum, schema, SchemaToTypeInfo.generateTypeInfo(schema));
}
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java Tue Jul 30 17:09:30 2013
@@ -18,9 +18,17 @@
package org.apache.hadoop.hive.serde2.avro;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
+import org.apache.avro.Schema.Type;
import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericData.Fixed;
import org.apache.avro.generic.GenericEnumSymbol;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -38,15 +46,6 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.apache.hadoop.io.Writable;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import static org.apache.avro.Schema.Type.BYTES;
-import static org.apache.avro.Schema.Type.FIXED;
-
class AvroSerializer {
private static final Log LOG = LogFactory.getLog(AvroSerializer.class);
@@ -67,12 +66,14 @@ class AvroSerializer {
GenericData.Record record = new GenericData.Record(schema);
List<? extends StructField> outputFieldRefs = soi.getAllStructFieldRefs();
- if(outputFieldRefs.size() != columnNames.size())
+ if(outputFieldRefs.size() != columnNames.size()) {
throw new AvroSerdeException("Number of input columns was different than output columns (in = " + columnNames.size() + " vs out = " + outputFieldRefs.size());
+ }
int size = schema.getFields().size();
- if(outputFieldRefs.size() != size) // Hive does this check for us, so we should be ok.
+ if(outputFieldRefs.size() != size) {
throw new AvroSerdeException("Hive passed in a different number of fields than the schema expected: (Hive wanted " + outputFieldRefs.size() +", Avro expected " + schema.getFields().size());
+ }
List<? extends StructField> allStructFieldRefs = soi.getAllStructFieldRefs();
List<Object> structFieldsDataAsList = soi.getStructFieldsDataAsList(o);
@@ -88,8 +89,9 @@ class AvroSerializer {
record.put(field.name(), val);
}
- if(!GenericData.get().validate(schema, record))
+ if(!GenericData.get().validate(schema, record)) {
throw new SerializeToAvroException(schema, record);
+ }
cache.setRecord(record);
@@ -111,7 +113,7 @@ class AvroSerializer {
switch(typeInfo.getCategory()) {
case PRIMITIVE:
assert fieldOI instanceof PrimitiveObjectInspector;
- return serializePrimitive(typeInfo, (PrimitiveObjectInspector) fieldOI, structFieldData);
+ return serializePrimitive(typeInfo, (PrimitiveObjectInspector) fieldOI, structFieldData, schema);
case MAP:
assert fieldOI instanceof MapObjectInspector;
assert typeInfo instanceof MapTypeInfo;
@@ -153,7 +155,7 @@ class AvroSerializer {
};
private Object serializeEnum(TypeInfo typeInfo, PrimitiveObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
- return enums.retrieve(schema).retrieve(serializePrimitive(typeInfo, fieldOI, structFieldData));
+ return enums.retrieve(schema).retrieve(serializePrimitive(typeInfo, fieldOI, structFieldData, schema));
}
private Object serializeStruct(StructTypeInfo typeInfo, StructObjectInspector ssoi, Object o, Schema schema) throws AvroSerdeException {
@@ -176,14 +178,24 @@ class AvroSerializer {
return record;
}
- private Object serializePrimitive(TypeInfo typeInfo, PrimitiveObjectInspector fieldOI, Object structFieldData) throws AvroSerdeException {
+ private Object serializePrimitive(TypeInfo typeInfo, PrimitiveObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
switch(fieldOI.getPrimitiveCategory()) {
- case UNKNOWN:
- throw new AvroSerdeException("Received UNKNOWN primitive category.");
- case VOID:
- return null;
- default: // All other primitive types are simple
- return fieldOI.getPrimitiveJavaObject(structFieldData);
+ case BINARY:
+ if (schema.getType() == Type.BYTES){
+ ByteBuffer bb = ByteBuffer.wrap((byte[])fieldOI.getPrimitiveJavaObject(structFieldData));
+ return bb.rewind();
+ } else if (schema.getType() == Type.FIXED){
+ Fixed fixed = new GenericData.Fixed(schema, (byte[])fieldOI.getPrimitiveJavaObject(structFieldData));
+ return fixed;
+ } else {
+ throw new AvroSerdeException("Unexpected Avro schema for Binary TypeInfo: " + schema.getType());
+ }
+ case UNKNOWN:
+ throw new AvroSerdeException("Received UNKNOWN primitive category.");
+ case VOID:
+ return null;
+ default: // All other primitive types are simple
+ return fieldOI.getPrimitiveJavaObject(structFieldData);
}
}
@@ -197,53 +209,7 @@ class AvroSerializer {
schema.getTypes().get(tag));
}
- // We treat FIXED and BYTES as arrays of tinyints within Hive. Check
- // if we're dealing with either of these types and thus need to serialize
- // them as their Avro types.
- private boolean isTransformedType(Schema schema) {
- return schema.getType().equals(FIXED) || schema.getType().equals(BYTES);
- }
-
- private Object serializeTransformedType(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
- if(LOG.isDebugEnabled()) {
- LOG.debug("Beginning to transform " + typeInfo + " with Avro schema " + schema.toString(false));
- }
- if(schema.getType().equals(FIXED)) return serializedAvroFixed(typeInfo, fieldOI, structFieldData, schema);
- else return serializeAvroBytes(typeInfo, fieldOI, structFieldData, schema);
-
- }
-
- private Object serializeAvroBytes(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
- ByteBuffer bb = ByteBuffer.wrap(extraByteArray(fieldOI, structFieldData));
- return bb.rewind();
- }
-
- private Object serializedAvroFixed(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
- return new GenericData.Fixed(schema, extraByteArray(fieldOI, structFieldData));
- }
-
- // For transforming to BYTES and FIXED, pull out the byte array Avro will want
- private byte[] extraByteArray(ListObjectInspector fieldOI, Object structFieldData) throws AvroSerdeException {
- // Grab a book. This is going to be slow.
- int listLength = fieldOI.getListLength(structFieldData);
- byte[] bytes = new byte[listLength];
- assert fieldOI.getListElementObjectInspector() instanceof PrimitiveObjectInspector;
- PrimitiveObjectInspector poi = (PrimitiveObjectInspector)fieldOI.getListElementObjectInspector();
- List<?> list = fieldOI.getList(structFieldData);
-
- for(int i = 0; i < listLength; i++) {
- Object b = poi.getPrimitiveJavaObject(list.get(i));
- if(!(b instanceof Byte))
- throw new AvroSerdeException("Attempting to transform to bytes, element was not byte but " + b.getClass().getCanonicalName());
- bytes[i] = (Byte)b;
- }
- return bytes;
- }
-
private Object serializeList(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
- if(isTransformedType(schema))
- return serializeTransformedType(typeInfo, fieldOI, structFieldData, schema);
-
List<?> list = fieldOI.getList(structFieldData);
List<Object> deserialized = new ArrayList<Object>(list.size());
@@ -260,8 +226,9 @@ class AvroSerializer {
private Object serializeMap(MapTypeInfo typeInfo, MapObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
// Avro only allows maps with string keys
- if(!mapHasStringKey(fieldOI.getMapKeyObjectInspector()))
+ if(!mapHasStringKey(fieldOI.getMapKeyObjectInspector())) {
throw new AvroSerdeException("Avro only supports maps with keys as Strings. Current Map is: " + typeInfo.toString());
+ }
ObjectInspector mapKeyObjectInspector = fieldOI.getMapKeyObjectInspector();
ObjectInspector mapValueObjectInspector = fieldOI.getMapValueObjectInspector();
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java Tue Jul 30 17:09:30 2013
@@ -17,24 +17,26 @@
*/
package org.apache.hadoop.hive.serde2.avro;
-import org.apache.avro.Schema;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Hashtable;
-import java.util.List;
-import java.util.Map;
-
import static org.apache.avro.Schema.Type.BOOLEAN;
+import static org.apache.avro.Schema.Type.BYTES;
import static org.apache.avro.Schema.Type.DOUBLE;
+import static org.apache.avro.Schema.Type.FIXED;
import static org.apache.avro.Schema.Type.FLOAT;
import static org.apache.avro.Schema.Type.INT;
import static org.apache.avro.Schema.Type.LONG;
import static org.apache.avro.Schema.Type.NULL;
import static org.apache.avro.Schema.Type.STRING;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
/**
* Convert an Avro Schema to a Hive TypeInfo
*/
@@ -47,7 +49,8 @@ class SchemaToTypeInfo {
// long bigint check
// float double check
// double double check
- // bytes
+ // bytes binary check
+ // fixed binary check
// string string check
// tinyint
// smallint
@@ -56,13 +59,15 @@ class SchemaToTypeInfo {
private static final Map<Schema.Type, TypeInfo> primitiveTypeToTypeInfo = initTypeMap();
private static Map<Schema.Type, TypeInfo> initTypeMap() {
Map<Schema.Type, TypeInfo> theMap = new Hashtable<Schema.Type, TypeInfo>();
- theMap.put(STRING, TypeInfoFactory.getPrimitiveTypeInfo("string"));
- theMap.put(INT, TypeInfoFactory.getPrimitiveTypeInfo("int"));
+ theMap.put(NULL, TypeInfoFactory.getPrimitiveTypeInfo("void"));
theMap.put(BOOLEAN, TypeInfoFactory.getPrimitiveTypeInfo("boolean"));
+ theMap.put(INT, TypeInfoFactory.getPrimitiveTypeInfo("int"));
theMap.put(LONG, TypeInfoFactory.getPrimitiveTypeInfo("bigint"));
theMap.put(FLOAT, TypeInfoFactory.getPrimitiveTypeInfo("float"));
theMap.put(DOUBLE, TypeInfoFactory.getPrimitiveTypeInfo("double"));
- theMap.put(NULL, TypeInfoFactory.getPrimitiveTypeInfo("void"));
+ theMap.put(BYTES, TypeInfoFactory.getPrimitiveTypeInfo("binary"));
+ theMap.put(FIXED, TypeInfoFactory.getPrimitiveTypeInfo("binary"));
+ theMap.put(STRING, TypeInfoFactory.getPrimitiveTypeInfo("string"));
return Collections.unmodifiableMap(theMap);
}
@@ -106,22 +111,22 @@ class SchemaToTypeInfo {
private static TypeInfo generateTypeInfoWorker(Schema schema) throws AvroSerdeException {
// Avro requires NULLable types to be defined as unions of some type T
// and NULL. This is annoying and we're going to hide it from the user.
- if(AvroSerdeUtils.isNullableType(schema))
+ if(AvroSerdeUtils.isNullableType(schema)) {
return generateTypeInfo(AvroSerdeUtils.getOtherTypeFromNullableType(schema));
+ }
Schema.Type type = schema.getType();
- if(primitiveTypeToTypeInfo.containsKey(type))
+ if(primitiveTypeToTypeInfo.containsKey(type)) {
return primitiveTypeToTypeInfo.get(type);
+ }
switch(type) {
- case BYTES: return generateBytesTypeInfo(schema);
case RECORD: return generateRecordTypeInfo(schema);
case MAP: return generateMapTypeInfo(schema);
case ARRAY: return generateArrayTypeInfo(schema);
case UNION: return generateUnionTypeInfo(schema);
case ENUM: return generateEnumTypeInfo(schema);
- case FIXED: return generateFixedTypeInfo(schema);
default: throw new AvroSerdeException("Do not yet support: " + schema);
}
}
@@ -183,22 +188,4 @@ class SchemaToTypeInfo {
return TypeInfoFactory.getPrimitiveTypeInfo("string");
}
-
- // Hive doesn't have a Fixed type, so we're going to treat them as arrays of
- // bytes
- // TODO: Make note in documentation that Hive sends these out as signed bytes.
- private static final TypeInfo FIXED_AND_BYTES_EQUIV =
- TypeInfoFactory.getListTypeInfo(TypeInfoFactory.byteTypeInfo);
- private static TypeInfo generateFixedTypeInfo(Schema schema) {
- assert schema.getType().equals(Schema.Type.FIXED);
-
- return FIXED_AND_BYTES_EQUIV;
- }
-
- // Avro considers bytes to be a primitive type, but Hive doesn't. We'll
- // convert them to a list of bytes, just like Fixed. Sigh.
- private static TypeInfo generateBytesTypeInfo(Schema schema) {
- assert schema.getType().equals(Schema.Type.BYTES);
- return FIXED_AND_BYTES_EQUIV;
- }
}
Modified: hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java (original)
+++ hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java Tue Jul 30 17:09:30 2013
@@ -17,6 +17,18 @@
*/
package org.apache.hadoop.hive.serde2.avro;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericData;
import org.apache.hadoop.hive.serde2.SerDeException;
@@ -30,18 +42,6 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
import org.junit.Test;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Hashtable;
-import java.util.List;
-import java.util.Map;
-import java.util.HashMap;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
public class TestAvroDeserializer {
private final GenericData GENERIC_DATA = GenericData.get();
@@ -338,12 +338,12 @@ public class TestAvroDeserializer {
ArrayList<Object> row =
(ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
assertEquals(1, row.size());
- Object theArrayObject = row.get(0);
- assertTrue(theArrayObject instanceof List);
- List theList = (List)theArrayObject;
+ Object byteObject = row.get(0);
+ assertTrue(byteObject instanceof byte[]);
+ byte[] outBytes = (byte[]) byteObject;
// Verify the raw object that's been created
for(int i = 0; i < bytes.length; i++) {
- assertEquals(bytes[i], theList.get(i));
+ assertEquals(bytes[i], outBytes[i]);
}
// Now go the correct way, through objectinspectors
@@ -352,9 +352,9 @@ public class TestAvroDeserializer {
assertEquals(1, fieldsDataAsList.size());
StructField fieldRef = oi.getStructFieldRef("hash");
- List theList2 = (List)oi.getStructFieldData(row, fieldRef);
- for(int i = 0; i < bytes.length; i++) {
- assertEquals(bytes[i], theList2.get(i));
+ outBytes = (byte[]) oi.getStructFieldData(row, fieldRef);
+ for(int i = 0; i < outBytes.length; i++) {
+ assertEquals(bytes[i], outBytes[i]);
}
}
@@ -377,8 +377,13 @@ public class TestAvroDeserializer {
ArrayList<Object> row =
(ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
assertEquals(1, row.size());
- Object theArrayObject = row.get(0);
- assertTrue(theArrayObject instanceof List);
+ Object byteObject = row.get(0);
+ assertTrue(byteObject instanceof byte[]);
+ byte[] outBytes = (byte[]) byteObject;
+ // Verify the raw object that's been created
+ for(int i = 0; i < bytes.length; i++) {
+ assertEquals(bytes[i], outBytes[i]);
+ }
// Now go the correct way, through objectinspectors
StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector();
@@ -386,9 +391,9 @@ public class TestAvroDeserializer {
assertEquals(1, fieldsDataAsList.size());
StructField fieldRef = oi.getStructFieldRef("bytesField");
- List theList2 = (List)oi.getStructFieldData(row, fieldRef);
- for(int i = 0; i < bytes.length; i++) {
- assertEquals(bytes[i], theList2.get(i));
+ outBytes = (byte[]) oi.getStructFieldData(row, fieldRef);
+ for(int i = 0; i < outBytes.length; i++) {
+ assertEquals(bytes[i], outBytes[i]);
}
}
@@ -489,9 +494,10 @@ public class TestAvroDeserializer {
ObjectInspector fieldObjectInspector = fieldRef.getFieldObjectInspector();
StringObjectInspector soi = (StringObjectInspector)fieldObjectInspector;
- if(expected == null)
+ if(expected == null) {
assertNull(soi.getPrimitiveJavaObject(rowElement));
- else
+ } else {
assertEquals(expected, soi.getPrimitiveJavaObject(rowElement));
+ }
}
}
Modified: hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java (original)
+++ hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java Tue Jul 30 17:09:30 2013
@@ -17,10 +17,18 @@
*/
package org.apache.hadoop.hive.serde2.avro;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.ArrayList;
+import java.util.List;
+
import org.apache.avro.Schema;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
@@ -32,13 +40,6 @@ import org.apache.hadoop.hive.serde2.typ
import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
import org.junit.Test;
-import java.util.ArrayList;
-import java.util.List;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
public class TestAvroObjectInspectorGenerator {
private final TypeInfo STRING = TypeInfoFactory.getPrimitiveTypeInfo("string");
private final TypeInfo INT = TypeInfoFactory.getPrimitiveTypeInfo("int");
@@ -353,7 +354,7 @@ public class TestAvroObjectInspectorGene
AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
verifyMap(aoig, "aMap");
}
-
+
/**
* Check a given AvroObjectInspectorGenerator to verify that it matches our test
* schema's expected map.
@@ -476,10 +477,8 @@ public class TestAvroObjectInspectorGene
// Column types
assertEquals(1, aoig.getColumnTypes().size());
TypeInfo typeInfo = aoig.getColumnTypes().get(0);
- assertTrue(typeInfo instanceof ListTypeInfo);
- ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
- assertTrue(listTypeInfo.getListElementTypeInfo() instanceof PrimitiveTypeInfo);
- assertEquals("tinyint", listTypeInfo.getListElementTypeInfo().getTypeName());
+ assertTrue(typeInfo instanceof PrimitiveTypeInfo);
+ assertEquals(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(), PrimitiveCategory.BINARY);
}
@Test // Avro considers bytes primitive, Hive doesn't. Make them list of tinyint.
@@ -495,10 +494,8 @@ public class TestAvroObjectInspectorGene
// Column types
assertEquals(1, aoig.getColumnTypes().size());
TypeInfo typeInfo = aoig.getColumnTypes().get(0);
- assertTrue(typeInfo instanceof ListTypeInfo);
- ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
- assertTrue(listTypeInfo.getListElementTypeInfo() instanceof PrimitiveTypeInfo);
- assertEquals("tinyint", listTypeInfo.getListElementTypeInfo().getTypeName());
+ assertTrue(typeInfo instanceof PrimitiveTypeInfo);
+ assertEquals(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(), PrimitiveCategory.BINARY);
}
@Test // That Union[T, NULL] is converted to just T.