You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2013/07/30 19:09:30 UTC

svn commit: r1508528 - in /hive/trunk: data/files/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ serde/src/java/org/apache/hadoop/hive/serde2/avro/ serde/src/test/org/apache/hadoop/hive/serde2/avro/

Author: hashutosh
Date: Tue Jul 30 17:09:30 2013
New Revision: 1508528

URL: http://svn.apache.org/r1508528
Log:
HIVE-3264 : Add support for binary dataype to AvroSerde (Eli Reisman & Mark Wagner via Ashutosh Chauhan)

Modified:
    hive/trunk/data/files/csv.txt
    hive/trunk/ql/src/test/queries/clientpositive/avro_nullable_fields.q
    hive/trunk/ql/src/test/results/clientpositive/avro_nullable_fields.q.out
    hive/trunk/ql/src/test/results/clientpositive/avro_schema_literal.q.out
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java
    hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
    hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java

Modified: hive/trunk/data/files/csv.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/csv.txt?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/data/files/csv.txt (original)
+++ hive/trunk/data/files/csv.txt Tue Jul 30 17:09:30 2013
@@ -1,18 +1,18 @@
-why hello there,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-another record,98,4,101,9999999,false,99.89,0.00000009,beta,Earth#101,1134:false:wazzup,RED,\N,6:7:8:9:10,54:55:56
-third record,45,5,102,999999999,true,89.99,0.00000000000009,alpha:gamma,Earth#237:Bob#723,102:false:BNL,GREEN,\N,11:12:13,57:58:59
-\N,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,\N,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,\N,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,\N,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,\N,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,\N,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,\N,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,\N,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,\N,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,\N,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,\N,BLUE,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,\N,72,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,\N,0:1:2:3:4:5,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,\N,50:51:53
-string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,0:1:2:3:4:5,\N
+why hello there,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+another record,98,4,101,9999999,false,99.89,0.00000009,beta,Earth#101,1134:false:wazzup,RED,\N,,ef
+third record,45,5,102,999999999,true,89.99,0.00000000000009,alpha:gamma,Earth#237:Bob#723,102:false:BNL,GREEN,\N,,hi
+\N,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,\N,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,\N,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,\N,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,\N,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,1412341,\N,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,1412341,true,\N,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,1412341,true,42.43,\N,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,1412341,true,42.43,85.23423424,\N,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,\N,17:true:Abe Linkedin,BLUE,72,,bc
+string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,\N,BLUE,72,,bc
+string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,\N,72,,bc
+string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,\N,,bc
+string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,\N,bc
+string,42,3,100,1412341,true,42.43,85.23423424,alpha:beta:gamma,Earth#42:Control#86:Bob#31,17:true:Abe Linkedin,BLUE,72,,\N

Modified: hive/trunk/ql/src/test/queries/clientpositive/avro_nullable_fields.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/avro_nullable_fields.q?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/avro_nullable_fields.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/avro_nullable_fields.q Tue Jul 30 17:09:30 2013
@@ -12,8 +12,8 @@ CREATE TABLE test_serializer(string1 STR
                              struct1 STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>,
                              enum1 STRING,
                              nullableint INT,
-                             bytes1 ARRAY<TINYINT>,
-                             fixed1 ARRAY<TINYINT>)
+                             bytes1 BINARY,
+                             fixed1 BINARY)
  ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n'
  STORED AS TEXTFILE;
 

Modified: hive/trunk/ql/src/test/results/clientpositive/avro_nullable_fields.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/avro_nullable_fields.q.out?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/avro_nullable_fields.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/avro_nullable_fields.q.out Tue Jul 30 17:09:30 2013
@@ -12,8 +12,8 @@ CREATE TABLE test_serializer(string1 STR
                              struct1 STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>,
                              enum1 STRING,
                              nullableint INT,
-                             bytes1 ARRAY<TINYINT>,
-                             fixed1 ARRAY<TINYINT>)
+                             bytes1 BINARY,
+                             fixed1 BINARY)
  ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n'
  STORED AS TEXTFILE
 PREHOOK: type: CREATETABLE
@@ -31,8 +31,8 @@ CREATE TABLE test_serializer(string1 STR
                              struct1 STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>,
                              enum1 STRING,
                              nullableint INT,
-                             bytes1 ARRAY<TINYINT>,
-                             fixed1 ARRAY<TINYINT>)
+                             bytes1 BINARY,
+                             fixed1 BINARY)
  ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' COLLECTION ITEMS TERMINATED BY ':' MAP KEYS TERMINATED BY '#' LINES TERMINATED BY '\n'
  STORED AS TEXTFILE
 POSTHOOK: type: CREATETABLE
@@ -124,10 +124,10 @@ POSTHOOK: Input: default@test_serializer
 POSTHOOK: Output: default@as_avro
 POSTHOOK: Lineage: as_avro.bigint1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bigint1, type:bigint, comment:null), ]
 POSTHOOK: Lineage: as_avro.boolean1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:boolean1, type:boolean, comment:null), ]
-POSTHOOK: Lineage: as_avro.bytes1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bytes1, type:array<tinyint>, comment:null), ]
+POSTHOOK: Lineage: as_avro.bytes1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bytes1, type:binary, comment:null), ]
 POSTHOOK: Lineage: as_avro.double1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:double1, type:double, comment:null), ]
 POSTHOOK: Lineage: as_avro.enum1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:enum1, type:string, comment:null), ]
-POSTHOOK: Lineage: as_avro.fixed1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:fixed1, type:array<tinyint>, comment:null), ]
+POSTHOOK: Lineage: as_avro.fixed1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:fixed1, type:binary, comment:null), ]
 POSTHOOK: Lineage: as_avro.float1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:float1, type:float, comment:null), ]
 POSTHOOK: Lineage: as_avro.int1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:int1, type:int, comment:null), ]
 POSTHOOK: Lineage: as_avro.list1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:list1, type:array<string>, comment:null), ]
@@ -147,10 +147,10 @@ POSTHOOK: Input: default@as_avro
 #### A masked pattern was here ####
 POSTHOOK: Lineage: as_avro.bigint1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bigint1, type:bigint, comment:null), ]
 POSTHOOK: Lineage: as_avro.boolean1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:boolean1, type:boolean, comment:null), ]
-POSTHOOK: Lineage: as_avro.bytes1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bytes1, type:array<tinyint>, comment:null), ]
+POSTHOOK: Lineage: as_avro.bytes1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:bytes1, type:binary, comment:null), ]
 POSTHOOK: Lineage: as_avro.double1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:double1, type:double, comment:null), ]
 POSTHOOK: Lineage: as_avro.enum1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:enum1, type:string, comment:null), ]
-POSTHOOK: Lineage: as_avro.fixed1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:fixed1, type:array<tinyint>, comment:null), ]
+POSTHOOK: Lineage: as_avro.fixed1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:fixed1, type:binary, comment:null), ]
 POSTHOOK: Lineage: as_avro.float1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:float1, type:float, comment:null), ]
 POSTHOOK: Lineage: as_avro.int1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:int1, type:int, comment:null), ]
 POSTHOOK: Lineage: as_avro.list1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:list1, type:array<string>, comment:null), ]
@@ -160,21 +160,21 @@ POSTHOOK: Lineage: as_avro.smallint1 EXP
 POSTHOOK: Lineage: as_avro.string1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:string1, type:string, comment:null), ]
 POSTHOOK: Lineage: as_avro.struct1 SIMPLE [(test_serializer)test_serializer.FieldSchema(name:struct1, type:struct<sint:int,sboolean:boolean,sstring:string>, comment:null), ]
 POSTHOOK: Lineage: as_avro.tinyint1 EXPRESSION [(test_serializer)test_serializer.FieldSchema(name:tinyint1, type:tinyint, comment:null), ]
-why hello there	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	[0,1,2,3,4,5]	[50,51,53]
-another record	98	4	101	9999999	false	99.89	9.0E-8	["beta"]	{"Earth":101}	{"sint":1134,"sboolean":false,"sstring":"wazzup"}	RED	NULL	[6,7,8,9,10]	[54,55,56]
-third record	45	5	102	999999999	true	89.99	9.0E-14	["alpha","gamma"]	{"Earth":237,"Bob":723}	{"sint":102,"sboolean":false,"sstring":"BNL"}	GREEN	NULL	[11,12,13]	[57,58,59]
-NULL	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	[0,1,2,3,4,5]	[50,51,53]
-string	NULL	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	[0,1,2,3,4,5]	[50,51,53]
-string	42	NULL	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	[0,1,2,3,4,5]	[50,51,53]
-string	42	3	NULL	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	[0,1,2,3,4,5]	[50,51,53]
-string	42	3	100	NULL	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	[0,1,2,3,4,5]	[50,51,53]
-string	42	3	100	1412341	NULL	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	[0,1,2,3,4,5]	[50,51,53]
-string	42	3	100	1412341	true	NULL	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	[0,1,2,3,4,5]	[50,51,53]
-string	42	3	100	1412341	true	42.43	NULL	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	[0,1,2,3,4,5]	[50,51,53]
-string	42	3	100	1412341	true	42.43	85.23423424	NULL	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	[0,1,2,3,4,5]	[50,51,53]
-string	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	NULL	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	[0,1,2,3,4,5]	[50,51,53]
-string	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	NULL	BLUE	72	[0,1,2,3,4,5]	[50,51,53]
-string	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	NULL	72	[0,1,2,3,4,5]	[50,51,53]
-string	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	NULL	[0,1,2,3,4,5]	[50,51,53]
-string	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	NULL	[50,51,53]
-string	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	[0,1,2,3,4,5]	NULL
+why hello there	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72		bc
+another record	98	4	101	9999999	false	99.89	9.0E-8	["beta"]	{"Earth":101}	{"sint":1134,"sboolean":false,"sstring":"wazzup"}	RED	NULL		ef
+third record	45	5	102	999999999	true	89.99	9.0E-14	["alpha","gamma"]	{"Earth":237,"Bob":723}	{"sint":102,"sboolean":false,"sstring":"BNL"}	GREEN	NULL		hi
+NULL	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72		bc
+string	NULL	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72		bc
+string	42	NULL	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72		bc
+string	42	3	NULL	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72		bc
+string	42	3	100	NULL	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72		bc
+string	42	3	100	1412341	NULL	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72		bc
+string	42	3	100	1412341	true	NULL	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72		bc
+string	42	3	100	1412341	true	42.43	NULL	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72		bc
+string	42	3	100	1412341	true	42.43	85.23423424	NULL	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72		bc
+string	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	NULL	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72		bc
+string	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	NULL	BLUE	72		bc
+string	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	NULL	72		bc
+string	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	NULL		bc
+string	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72	NULL	bc
+string	42	3	100	1412341	true	42.43	85.23423424	["alpha","beta","gamma"]	{"Earth":42,"Bob":31,"Control":86}	{"sint":17,"sboolean":true,"sstring":"Abe Linkedin"}	BLUE	72		NULL

Modified: hive/trunk/ql/src/test/results/clientpositive/avro_schema_literal.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/avro_schema_literal.q.out?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/avro_schema_literal.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/avro_schema_literal.q.out Tue Jul 30 17:09:30 2013
@@ -77,5 +77,5 @@ struct1             	struct<sint:int,sbo
 union1              	uniontype<float,boolean,string>	from deserializer   
 enum1               	string              	from deserializer   
 nullableint         	int                 	from deserializer   
-bytes1              	array<tinyint>      	from deserializer   
-fixed1              	array<tinyint>      	from deserializer   
+bytes1              	binary              	from deserializer   
+fixed1              	binary              	from deserializer   

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroDeserializer.java Tue Jul 30 17:09:30 2013
@@ -17,8 +17,19 @@
  */
 package org.apache.hadoop.hive.serde2.avro;
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.avro.Schema;
+import org.apache.avro.Schema.Type;
 import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericData.Fixed;
 import org.apache.avro.generic.GenericDatumReader;
 import org.apache.avro.generic.GenericDatumWriter;
 import org.apache.avro.generic.GenericRecord;
@@ -32,21 +43,12 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
 import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
 import org.apache.hadoop.io.Writable;
 
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
 class AvroDeserializer {
   private static final Log LOG = LogFactory.getLog(AvroDeserializer.class);
   /**
@@ -62,7 +64,7 @@ class AvroDeserializer {
     private final ByteArrayOutputStream baos = new ByteArrayOutputStream();
     private final GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>();
     private BinaryDecoder binaryDecoder = null;
-    private InstanceCache<ReaderWriterSchemaPair, GenericDatumReader<GenericRecord>> gdrCache
+    private final InstanceCache<ReaderWriterSchemaPair, GenericDatumReader<GenericRecord>> gdrCache
         = new InstanceCache<ReaderWriterSchemaPair, GenericDatumReader<GenericRecord>>() {
             @Override
             protected GenericDatumReader<GenericRecord> makeInstance(ReaderWriterSchemaPair hv) {
@@ -112,13 +114,15 @@ class AvroDeserializer {
    */
   public Object deserialize(List<String> columnNames, List<TypeInfo> columnTypes,
                             Writable writable, Schema readerSchema) throws AvroSerdeException {
-    if(!(writable instanceof AvroGenericRecordWritable))
+    if(!(writable instanceof AvroGenericRecordWritable)) {
       throw new AvroSerdeException("Expecting a AvroGenericRecordWritable");
+    }
 
-    if(row == null || row.size() != columnNames.size())
+    if(row == null || row.size() != columnNames.size()) {
       row = new ArrayList<Object>(columnNames.size());
-    else
+    } else {
       row.clear();
+    }
 
     AvroGenericRecordWritable recordWritable = (AvroGenericRecordWritable) writable;
     GenericRecord r = recordWritable.getRecord();
@@ -127,7 +131,9 @@ class AvroDeserializer {
     if(!r.getSchema().equals(readerSchema)) {
       LOG.warn("Received different schemas.  Have to re-encode: " +
               r.getSchema().toString(false));
-      if(reEncoder == null) reEncoder = new SchemaReEncoder();
+      if(reEncoder == null) {
+        reEncoder = new SchemaReEncoder();
+      }
       r = reEncoder.reencode(r, readerSchema);
     }
 
@@ -156,25 +162,49 @@ class AvroDeserializer {
     // Klaxon! Klaxon! Klaxon!
     // Avro requires NULLable types to be defined as unions of some type T
     // and NULL.  This is annoying and we're going to hide it from the user.
-    if(AvroSerdeUtils.isNullableType(recordSchema))
+    if(AvroSerdeUtils.isNullableType(recordSchema)) {
       return deserializeNullableUnion(datum, recordSchema, columnType);
+    }
 
-    if(columnType == TypeInfoFactory.stringTypeInfo)
-      return datum.toString(); // To workaround AvroUTF8
-      // This also gets us around the Enum issue since we just take the value
-      // and convert it to a string. Yay!
 
     switch(columnType.getCategory()) {
     case STRUCT:
       return deserializeStruct((GenericData.Record) datum, (StructTypeInfo) columnType);
-     case UNION:
+    case UNION:
       return deserializeUnion(datum, recordSchema, (UnionTypeInfo) columnType);
     case LIST:
       return deserializeList(datum, recordSchema, (ListTypeInfo) columnType);
     case MAP:
       return deserializeMap(datum, recordSchema, (MapTypeInfo) columnType);
+    case PRIMITIVE:
+      return deserializePrimitive(datum, recordSchema, (PrimitiveTypeInfo) columnType);
     default:
-      return datum; // Simple type.
+      throw new AvroSerdeException("Unknown TypeInfo: " + columnType.getCategory());
+    }
+  }
+
+  private Object deserializePrimitive(Object datum, Schema recordSchema,
+      PrimitiveTypeInfo columnType) throws AvroSerdeException {
+    switch (columnType.getPrimitiveCategory()){
+    case STRING:
+      return datum.toString(); // To workaround AvroUTF8
+      // This also gets us around the Enum issue since we just take the value
+      // and convert it to a string. Yay!
+    case BINARY:
+      if (recordSchema.getType() == Type.FIXED){
+        Fixed fixed = (Fixed) datum;
+        return fixed.bytes();
+      } else if (recordSchema.getType() == Type.BYTES){
+        ByteBuffer bb = (ByteBuffer) datum;
+        bb.rewind();
+        byte[] result = new byte[bb.limit()];
+        bb.get(result);
+        return result;
+      } else {
+        throw new AvroSerdeException("Unexpected Avro schema for Binary TypeInfo: " + recordSchema.getType());
+      }
+    default:
+      return datum;
     }
   }
 
@@ -186,8 +216,9 @@ class AvroDeserializer {
                                           TypeInfo columnType) throws AvroSerdeException {
     int tag = GenericData.get().resolveUnion(recordSchema, datum); // Determine index of value
     Schema schema = recordSchema.getTypes().get(tag);
-    if(schema.getType().equals(Schema.Type.NULL))
+    if(schema.getType().equals(Schema.Type.NULL)) {
       return null;
+    }
     return worker(datum, schema, SchemaToTypeInfo.generateTypeInfo(schema));
 
   }

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerializer.java Tue Jul 30 17:09:30 2013
@@ -18,9 +18,17 @@
 package org.apache.hadoop.hive.serde2.avro;
 
 
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.avro.Schema;
 import org.apache.avro.Schema.Field;
+import org.apache.avro.Schema.Type;
 import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericData.Fixed;
 import org.apache.avro.generic.GenericEnumSymbol;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
@@ -38,15 +46,6 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
 import org.apache.hadoop.io.Writable;
 
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-import static org.apache.avro.Schema.Type.BYTES;
-import static org.apache.avro.Schema.Type.FIXED;
-
 class AvroSerializer {
   private static final Log LOG = LogFactory.getLog(AvroSerializer.class);
 
@@ -67,12 +66,14 @@ class AvroSerializer {
     GenericData.Record record = new GenericData.Record(schema);
 
     List<? extends StructField> outputFieldRefs = soi.getAllStructFieldRefs();
-    if(outputFieldRefs.size() != columnNames.size())
+    if(outputFieldRefs.size() != columnNames.size()) {
       throw new AvroSerdeException("Number of input columns was different than output columns (in = " + columnNames.size() + " vs out = " + outputFieldRefs.size());
+    }
 
     int size = schema.getFields().size();
-    if(outputFieldRefs.size() != size) // Hive does this check for us, so we should be ok.
+    if(outputFieldRefs.size() != size) {
       throw new AvroSerdeException("Hive passed in a different number of fields than the schema expected: (Hive wanted " + outputFieldRefs.size() +", Avro expected " + schema.getFields().size());
+    }
 
     List<? extends StructField> allStructFieldRefs = soi.getAllStructFieldRefs();
     List<Object> structFieldsDataAsList = soi.getStructFieldsDataAsList(o);
@@ -88,8 +89,9 @@ class AvroSerializer {
       record.put(field.name(), val);
     }
 
-    if(!GenericData.get().validate(schema, record))
+    if(!GenericData.get().validate(schema, record)) {
       throw new SerializeToAvroException(schema, record);
+    }
 
     cache.setRecord(record);
 
@@ -111,7 +113,7 @@ class AvroSerializer {
     switch(typeInfo.getCategory()) {
       case PRIMITIVE:
         assert fieldOI instanceof PrimitiveObjectInspector;
-        return serializePrimitive(typeInfo, (PrimitiveObjectInspector) fieldOI, structFieldData);
+        return serializePrimitive(typeInfo, (PrimitiveObjectInspector) fieldOI, structFieldData, schema);
       case MAP:
         assert fieldOI instanceof MapObjectInspector;
         assert typeInfo instanceof MapTypeInfo;
@@ -153,7 +155,7 @@ class AvroSerializer {
         };
 
   private Object serializeEnum(TypeInfo typeInfo, PrimitiveObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
-    return enums.retrieve(schema).retrieve(serializePrimitive(typeInfo, fieldOI, structFieldData));
+    return enums.retrieve(schema).retrieve(serializePrimitive(typeInfo, fieldOI, structFieldData, schema));
   }
 
   private Object serializeStruct(StructTypeInfo typeInfo, StructObjectInspector ssoi, Object o, Schema schema) throws AvroSerdeException {
@@ -176,14 +178,24 @@ class AvroSerializer {
     return record;
   }
 
-  private Object serializePrimitive(TypeInfo typeInfo, PrimitiveObjectInspector fieldOI, Object structFieldData) throws AvroSerdeException {
+  private Object serializePrimitive(TypeInfo typeInfo, PrimitiveObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
     switch(fieldOI.getPrimitiveCategory()) {
-      case UNKNOWN:
-        throw new AvroSerdeException("Received UNKNOWN primitive category.");
-      case VOID:
-        return null;
-      default: // All other primitive types are simple
-        return fieldOI.getPrimitiveJavaObject(structFieldData);
+    case BINARY:
+      if (schema.getType() == Type.BYTES){
+        ByteBuffer bb = ByteBuffer.wrap((byte[])fieldOI.getPrimitiveJavaObject(structFieldData));
+        return bb.rewind();
+      } else if (schema.getType() == Type.FIXED){
+        Fixed fixed = new GenericData.Fixed(schema, (byte[])fieldOI.getPrimitiveJavaObject(structFieldData));
+        return fixed;
+      } else {
+        throw new AvroSerdeException("Unexpected Avro schema for Binary TypeInfo: " + schema.getType());
+      }
+    case UNKNOWN:
+      throw new AvroSerdeException("Received UNKNOWN primitive category.");
+    case VOID:
+      return null;
+    default: // All other primitive types are simple
+      return fieldOI.getPrimitiveJavaObject(structFieldData);
     }
   }
 
@@ -197,53 +209,7 @@ class AvroSerializer {
                      schema.getTypes().get(tag));
   }
 
-  // We treat FIXED and BYTES as arrays of tinyints within Hive.  Check
-  // if we're dealing with either of these types and thus need to serialize
-  // them as their Avro types.
-  private boolean isTransformedType(Schema schema) {
-    return schema.getType().equals(FIXED) || schema.getType().equals(BYTES);
-  }
-
-  private Object serializeTransformedType(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
-    if(LOG.isDebugEnabled()) {
-      LOG.debug("Beginning to transform " + typeInfo + " with Avro schema " + schema.toString(false));
-    }
-    if(schema.getType().equals(FIXED)) return serializedAvroFixed(typeInfo, fieldOI, structFieldData, schema);
-    else return serializeAvroBytes(typeInfo, fieldOI, structFieldData, schema);
-
-  }
-
-  private Object serializeAvroBytes(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
-    ByteBuffer bb = ByteBuffer.wrap(extraByteArray(fieldOI, structFieldData));
-    return bb.rewind();
-  }
-
-  private Object serializedAvroFixed(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
-    return new GenericData.Fixed(schema, extraByteArray(fieldOI, structFieldData));
-  }
-
-  // For transforming to BYTES and FIXED, pull out the byte array Avro will want
-  private byte[] extraByteArray(ListObjectInspector fieldOI, Object structFieldData) throws AvroSerdeException {
-    // Grab a book.  This is going to be slow.
-    int listLength = fieldOI.getListLength(structFieldData);
-    byte[] bytes = new byte[listLength];
-    assert fieldOI.getListElementObjectInspector() instanceof PrimitiveObjectInspector;
-    PrimitiveObjectInspector poi = (PrimitiveObjectInspector)fieldOI.getListElementObjectInspector();
-    List<?> list = fieldOI.getList(structFieldData);
-
-    for(int i = 0; i < listLength; i++) {
-      Object b = poi.getPrimitiveJavaObject(list.get(i));
-      if(!(b instanceof Byte))
-        throw new AvroSerdeException("Attempting to transform to bytes, element was not byte but " + b.getClass().getCanonicalName());
-      bytes[i] = (Byte)b;
-    }
-    return bytes;
-  }
-
   private Object serializeList(ListTypeInfo typeInfo, ListObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
-    if(isTransformedType(schema))
-      return serializeTransformedType(typeInfo, fieldOI, structFieldData, schema);
-    
     List<?> list = fieldOI.getList(structFieldData);
     List<Object> deserialized = new ArrayList<Object>(list.size());
 
@@ -260,8 +226,9 @@ class AvroSerializer {
 
   private Object serializeMap(MapTypeInfo typeInfo, MapObjectInspector fieldOI, Object structFieldData, Schema schema) throws AvroSerdeException {
     // Avro only allows maps with string keys
-    if(!mapHasStringKey(fieldOI.getMapKeyObjectInspector()))
+    if(!mapHasStringKey(fieldOI.getMapKeyObjectInspector())) {
       throw new AvroSerdeException("Avro only supports maps with keys as Strings.  Current Map is: " + typeInfo.toString());
+    }
 
     ObjectInspector mapKeyObjectInspector = fieldOI.getMapKeyObjectInspector();
     ObjectInspector mapValueObjectInspector = fieldOI.getMapValueObjectInspector();

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/SchemaToTypeInfo.java Tue Jul 30 17:09:30 2013
@@ -17,24 +17,26 @@
  */
 package org.apache.hadoop.hive.serde2.avro;
 
-import org.apache.avro.Schema;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Hashtable;
-import java.util.List;
-import java.util.Map;
-
 import static org.apache.avro.Schema.Type.BOOLEAN;
+import static org.apache.avro.Schema.Type.BYTES;
 import static org.apache.avro.Schema.Type.DOUBLE;
+import static org.apache.avro.Schema.Type.FIXED;
 import static org.apache.avro.Schema.Type.FLOAT;
 import static org.apache.avro.Schema.Type.INT;
 import static org.apache.avro.Schema.Type.LONG;
 import static org.apache.avro.Schema.Type.NULL;
 import static org.apache.avro.Schema.Type.STRING;
 
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+
 /**
  * Convert an Avro Schema to a Hive TypeInfo
  */
@@ -47,7 +49,8 @@ class SchemaToTypeInfo {
   // long             bigint     check
   // float            double     check
   // double           double     check
-  // bytes
+  // bytes            binary     check
+  // fixed            binary     check
   // string           string     check
   //                  tinyint
   //                  smallint
@@ -56,13 +59,15 @@ class SchemaToTypeInfo {
   private static final Map<Schema.Type, TypeInfo> primitiveTypeToTypeInfo = initTypeMap();
   private static Map<Schema.Type, TypeInfo> initTypeMap() {
     Map<Schema.Type, TypeInfo> theMap = new Hashtable<Schema.Type, TypeInfo>();
-    theMap.put(STRING, TypeInfoFactory.getPrimitiveTypeInfo("string"));
-    theMap.put(INT, TypeInfoFactory.getPrimitiveTypeInfo("int"));
+    theMap.put(NULL, TypeInfoFactory.getPrimitiveTypeInfo("void"));
     theMap.put(BOOLEAN, TypeInfoFactory.getPrimitiveTypeInfo("boolean"));
+    theMap.put(INT, TypeInfoFactory.getPrimitiveTypeInfo("int"));
     theMap.put(LONG, TypeInfoFactory.getPrimitiveTypeInfo("bigint"));
     theMap.put(FLOAT, TypeInfoFactory.getPrimitiveTypeInfo("float"));
     theMap.put(DOUBLE, TypeInfoFactory.getPrimitiveTypeInfo("double"));
-    theMap.put(NULL, TypeInfoFactory.getPrimitiveTypeInfo("void"));
+    theMap.put(BYTES, TypeInfoFactory.getPrimitiveTypeInfo("binary"));
+    theMap.put(FIXED, TypeInfoFactory.getPrimitiveTypeInfo("binary"));
+    theMap.put(STRING, TypeInfoFactory.getPrimitiveTypeInfo("string"));
     return Collections.unmodifiableMap(theMap);
   }
 
@@ -106,22 +111,22 @@ class SchemaToTypeInfo {
   private static TypeInfo generateTypeInfoWorker(Schema schema) throws AvroSerdeException {
     // Avro requires NULLable types to be defined as unions of some type T
     // and NULL.  This is annoying and we're going to hide it from the user.
-    if(AvroSerdeUtils.isNullableType(schema))
+    if(AvroSerdeUtils.isNullableType(schema)) {
       return generateTypeInfo(AvroSerdeUtils.getOtherTypeFromNullableType(schema));
+    }
 
     Schema.Type type = schema.getType();
 
-    if(primitiveTypeToTypeInfo.containsKey(type))
+    if(primitiveTypeToTypeInfo.containsKey(type)) {
       return primitiveTypeToTypeInfo.get(type);
+    }
 
     switch(type) {
-      case BYTES:  return generateBytesTypeInfo(schema);
       case RECORD: return generateRecordTypeInfo(schema);
       case MAP:    return generateMapTypeInfo(schema);
       case ARRAY:  return generateArrayTypeInfo(schema);
       case UNION:  return generateUnionTypeInfo(schema);
       case ENUM:   return generateEnumTypeInfo(schema);
-      case FIXED:  return generateFixedTypeInfo(schema);
       default:     throw new AvroSerdeException("Do not yet support: " + schema);
     }
   }
@@ -183,22 +188,4 @@ class SchemaToTypeInfo {
 
     return TypeInfoFactory.getPrimitiveTypeInfo("string");
   }
-
-  // Hive doesn't have a Fixed type, so we're going to treat them as arrays of
-  // bytes
-  // TODO: Make note in documentation that Hive sends these out as signed bytes.
-  private static final TypeInfo FIXED_AND_BYTES_EQUIV =
-          TypeInfoFactory.getListTypeInfo(TypeInfoFactory.byteTypeInfo);
-  private static TypeInfo generateFixedTypeInfo(Schema schema) {
-    assert schema.getType().equals(Schema.Type.FIXED);
-
-    return FIXED_AND_BYTES_EQUIV;
-  }
-
-  // Avro considers bytes to be a primitive type, but Hive doesn't.  We'll
-  // convert them to a list of bytes, just like Fixed.  Sigh.
-  private static TypeInfo generateBytesTypeInfo(Schema schema) {
-    assert schema.getType().equals(Schema.Type.BYTES);
-    return FIXED_AND_BYTES_EQUIV;
-  }
 }

Modified: hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java (original)
+++ hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroDeserializer.java Tue Jul 30 17:09:30 2013
@@ -17,6 +17,18 @@
  */
 package org.apache.hadoop.hive.serde2.avro;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Hashtable;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.avro.Schema;
 import org.apache.avro.generic.GenericData;
 import org.apache.hadoop.hive.serde2.SerDeException;
@@ -30,18 +42,6 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
 import org.junit.Test;
 
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.util.ArrayList;
-import java.util.Hashtable;
-import java.util.List;
-import java.util.Map;
-import java.util.HashMap;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
 public class TestAvroDeserializer {
   private final GenericData GENERIC_DATA = GenericData.get();
 
@@ -338,12 +338,12 @@ public class TestAvroDeserializer {
     ArrayList<Object> row =
             (ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
     assertEquals(1, row.size());
-    Object theArrayObject = row.get(0);
-    assertTrue(theArrayObject instanceof List);
-    List theList = (List)theArrayObject;
+    Object byteObject = row.get(0);
+    assertTrue(byteObject instanceof byte[]);
+    byte[] outBytes = (byte[]) byteObject;
     // Verify the raw object that's been created
     for(int i = 0; i < bytes.length; i++) {
-      assertEquals(bytes[i], theList.get(i));
+      assertEquals(bytes[i], outBytes[i]);
     }
 
     // Now go the correct way, through objectinspectors
@@ -352,9 +352,9 @@ public class TestAvroDeserializer {
     assertEquals(1, fieldsDataAsList.size());
     StructField fieldRef = oi.getStructFieldRef("hash");
 
-    List theList2 = (List)oi.getStructFieldData(row, fieldRef);
-    for(int i = 0; i < bytes.length; i++) {
-      assertEquals(bytes[i], theList2.get(i));
+    outBytes = (byte[]) oi.getStructFieldData(row, fieldRef);
+    for(int i = 0; i < outBytes.length; i++) {
+      assertEquals(bytes[i], outBytes[i]);
     }
   }
 
@@ -377,8 +377,13 @@ public class TestAvroDeserializer {
     ArrayList<Object> row =
             (ArrayList<Object>)de.deserialize(aoig.getColumnNames(), aoig.getColumnTypes(), garw, s);
     assertEquals(1, row.size());
-    Object theArrayObject = row.get(0);
-    assertTrue(theArrayObject instanceof List);
+    Object byteObject = row.get(0);
+    assertTrue(byteObject instanceof byte[]);
+    byte[] outBytes = (byte[]) byteObject;
+    // Verify the raw object that's been created
+    for(int i = 0; i < bytes.length; i++) {
+      assertEquals(bytes[i], outBytes[i]);
+    }
 
     // Now go the correct way, through objectinspectors
     StandardStructObjectInspector oi = (StandardStructObjectInspector)aoig.getObjectInspector();
@@ -386,9 +391,9 @@ public class TestAvroDeserializer {
     assertEquals(1, fieldsDataAsList.size());
     StructField fieldRef = oi.getStructFieldRef("bytesField");
 
-    List theList2 = (List)oi.getStructFieldData(row, fieldRef);
-    for(int i = 0; i < bytes.length; i++) {
-      assertEquals(bytes[i], theList2.get(i));
+    outBytes = (byte[]) oi.getStructFieldData(row, fieldRef);
+    for(int i = 0; i < outBytes.length; i++) {
+      assertEquals(bytes[i], outBytes[i]);
     }
   }
 
@@ -489,9 +494,10 @@ public class TestAvroDeserializer {
     ObjectInspector fieldObjectInspector = fieldRef.getFieldObjectInspector();
     StringObjectInspector soi = (StringObjectInspector)fieldObjectInspector;
 
-    if(expected == null)
+    if(expected == null) {
       assertNull(soi.getPrimitiveJavaObject(rowElement));
-    else
+    } else {
       assertEquals(expected, soi.getPrimitiveJavaObject(rowElement));
+    }
   }
 }

Modified: hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java?rev=1508528&r1=1508527&r2=1508528&view=diff
==============================================================================
--- hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java (original)
+++ hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/avro/TestAvroObjectInspectorGenerator.java Tue Jul 30 17:09:30 2013
@@ -17,10 +17,18 @@
  */
 package org.apache.hadoop.hive.serde2.avro;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
+
+import java.util.ArrayList;
+import java.util.List;
+
 import org.apache.avro.Schema;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
@@ -32,13 +40,6 @@ import org.apache.hadoop.hive.serde2.typ
 import org.apache.hadoop.hive.serde2.typeinfo.UnionTypeInfo;
 import org.junit.Test;
 
-import java.util.ArrayList;
-import java.util.List;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.junit.Assert.fail;
-
 public class TestAvroObjectInspectorGenerator {
   private final TypeInfo STRING = TypeInfoFactory.getPrimitiveTypeInfo("string");
   private final TypeInfo INT = TypeInfoFactory.getPrimitiveTypeInfo("int");
@@ -353,7 +354,7 @@ public class TestAvroObjectInspectorGene
     AvroObjectInspectorGenerator aoig = new AvroObjectInspectorGenerator(s);
     verifyMap(aoig, "aMap");
   }
- 
+
   /**
    * Check a given AvroObjectInspectorGenerator to verify that it matches our test
    * schema's expected map.
@@ -476,10 +477,8 @@ public class TestAvroObjectInspectorGene
     // Column types
     assertEquals(1, aoig.getColumnTypes().size());
     TypeInfo typeInfo = aoig.getColumnTypes().get(0);
-    assertTrue(typeInfo instanceof ListTypeInfo);
-    ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
-    assertTrue(listTypeInfo.getListElementTypeInfo() instanceof PrimitiveTypeInfo);
-    assertEquals("tinyint", listTypeInfo.getListElementTypeInfo().getTypeName());
+    assertTrue(typeInfo instanceof PrimitiveTypeInfo);
+    assertEquals(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(), PrimitiveCategory.BINARY);
   }
 
   @Test // Avro considers bytes primitive, Hive doesn't. Make them list of tinyint.
@@ -495,10 +494,8 @@ public class TestAvroObjectInspectorGene
     // Column types
     assertEquals(1, aoig.getColumnTypes().size());
     TypeInfo typeInfo = aoig.getColumnTypes().get(0);
-    assertTrue(typeInfo instanceof ListTypeInfo);
-    ListTypeInfo listTypeInfo = (ListTypeInfo) typeInfo;
-    assertTrue(listTypeInfo.getListElementTypeInfo() instanceof PrimitiveTypeInfo);
-    assertEquals("tinyint", listTypeInfo.getListElementTypeInfo().getTypeName());
+    assertTrue(typeInfo instanceof PrimitiveTypeInfo);
+    assertEquals(((PrimitiveTypeInfo) typeInfo).getPrimitiveCategory(), PrimitiveCategory.BINARY);
   }
 
   @Test // That Union[T, NULL] is converted to just T.