You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/12/09 20:00:15 UTC

svn commit: r1644151 - in /hive/trunk: data/files/ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/ ql/src/test/queries/clientpositive/ q...

Author: brock
Date: Tue Dec  9 19:00:15 2014
New Revision: 1644151

URL: http://svn.apache.org/r1644151
Log:
HIVE-7073 - Implement Binary in ParquetSerDe (Ferdinand Xu via Brock)

Modified:
    hive/trunk/data/files/parquet_types.txt
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
    hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java
    hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
    hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out

Modified: hive/trunk/data/files/parquet_types.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/parquet_types.txt?rev=1644151&r1=1644150&r2=1644151&view=diff
==============================================================================
--- hive/trunk/data/files/parquet_types.txt (original)
+++ hive/trunk/data/files/parquet_types.txt Tue Dec  9 19:00:15 2014
@@ -1,21 +1,22 @@
-100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a   |a  |k1:v1|101,200|10,abc
-101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab  |ab |k2:v2|102,200|10,def
-102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc|k3:v3|103,200|10,ghi
-103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd|k4:v4|104,200|10,jkl
-104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde|k5:v5|105,200|10,mno
-105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef|k6:v6|106,200|10,pqr
-106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg|k7:v7|107,200|10,stu
-107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh|k8:v8|108,200|10,vwx
-108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|abcdefghijklmnop|k9:v9|109,200|10,yza
-109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef|k10:v10|110,200|10,bcd
-110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede|k11:v11|111,200|10,efg
-111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded|k12:v12|112,200|10,hij
-112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd|k13:v13|113,200|10,klm
-113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc|k14:v14|114,200|10,nop
-114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b|k15:v15|115,200|10,qrs
-115|1|1|1.0|4.5|qrs|2026-04-04 16:16:16.161616161|rstuv|abcded|k16:v16|116,200|10,qrs
-116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded|k17:v17|117,200|10,wxy
-117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded|k18:v18|118,200|10,zab
-118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede|k19:v19|119,200|10,cde
-119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede|k20:v20|120,200|10,fgh
-120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde|k21:v21|121,200|10,ijk
+100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a   |a  |B4F3CAFDBEDD|k1:v1|101,200|10,abc
+101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab  |ab |68692CCAC0BDE7|k2:v2|102,200|10,def
+102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc|B4F3CAFDBEDD|k3:v3|103,200|10,ghi
+103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd|68692CCAC0BDE7|k4:v4|104,200|10,jkl
+104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde|B4F3CAFDBEDD|k5:v5|105,200|10,mno
+105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef|68692CCAC0BDE7|k6:v6|106,200|10,pqr
+106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg|B4F3CAFDBEDD|k7:v7|107,200|10,stu
+107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh|68692CCAC0BDE7|k8:v8|108,200|10,vwx
+108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|B4F3CAFDBEDD|68656C6C6F|k9:v9|109,200|10,yza
+109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef|68692CCAC0BDE7|k10:v10|110,200|10,bcd
+110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede|B4F3CAFDBEDD|k11:v11|111,200|10,efg
+111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded|68692CCAC0BDE7|k12:v12|112,200|10,hij
+112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd|B4F3CAFDBEDD|k13:v13|113,200|10,klm
+113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc|68692CCAC0BDE7|k14:v14|114,200|10,nop
+114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b|B4F3CAFDBEDD|k15:v15|115,200|10,qrs
+115|1|1|1.0|4.5|qrs|2026-04-04 16:16:16.161616161|rstuv|abcded|68692CCAC0BDE7|k16:v16|116,200|10,qrs
+116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded|B4F3CAFDBEDD|k17:v17|117,200|10,wxy
+117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded|68692CCAC0BDE7|k18:v18|118,200|10,zab
+118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede|B4F3CAFDBEDD|k19:v19|119,200|10,cde
+119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede|68692CCAC0BDE7|k20:v20|120,200|10,fgh
+120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde|B4F3CAFDBEDD|k21:v21|121,200|10,ijk
+121|1|2|1.1|6.3|lmn|2032-10-10 22:22:22.222222222|bcdef|abcde||k22:v22|122,200|10,lmn
\ No newline at end of file

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java?rev=1644151&r1=1644150&r2=1644151&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveSchemaConverter.java Tue Dec  9 19:00:15 2014
@@ -75,8 +75,7 @@ public class HiveSchemaConverter {
       } else if (typeInfo.equals(TypeInfoFactory.booleanTypeInfo)) {
         return new PrimitiveType(repetition, PrimitiveTypeName.BOOLEAN, name);
       } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)) {
-        // TODO : binaryTypeInfo is a byte array. Need to map it
-        throw new UnsupportedOperationException("Binary type not implemented");
+        return new PrimitiveType(repetition, PrimitiveTypeName.BINARY, name);
       } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
         return new PrimitiveType(repetition, PrimitiveTypeName.INT96, name);
       } else if (typeInfo.equals(TypeInfoFactory.voidTypeInfo)) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java?rev=1644151&r1=1644150&r2=1644151&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java Tue Dec  9 19:00:15 2014
@@ -102,7 +102,9 @@ public class ArrayWritableObjectInspecto
       return ParquetPrimitiveInspectorFactory.parquetShortInspector;
     } else if (typeInfo.equals(TypeInfoFactory.timestampTypeInfo)) {
       return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector;
-    } else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
+    } else if (typeInfo.equals(TypeInfoFactory.binaryTypeInfo)){
+      return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector;
+    }else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) {
       throw new UnsupportedOperationException("Parquet does not support date. See HIVE-6384");
     } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.CHAR_TYPE_NAME)) {
       return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((CharTypeInfo) typeInfo);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java?rev=1644151&r1=1644150&r2=1644151&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ParquetHiveSerDe.java Tue Dec  9 19:00:15 2014
@@ -37,8 +37,7 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
-import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
@@ -49,8 +48,11 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
-import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -131,6 +133,7 @@ public class ParquetHiveSerDe extends Ab
     } else {
       columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
     }
+
     if (columnNames.size() != columnTypes.size()) {
       throw new IllegalArgumentException("ParquetHiveSerde initialization failed. Number of column " +
         "name and column type differs. columnNames = " + columnNames + ", columnTypes = " +
@@ -298,7 +301,9 @@ public class ParquetHiveSerDe extends Ab
       return new BytesWritable(Binary.fromString(strippedValue).getBytes());
     case VARCHAR:
       String value = ((HiveVarcharObjectInspector) inspector).getPrimitiveJavaObject(obj).getValue();
-        return new BytesWritable(Binary.fromString(value).getBytes());
+      return new BytesWritable(Binary.fromString(value).getBytes());
+    case BINARY:
+      return new BytesWritable(((BinaryObjectInspector) inspector).getPrimitiveJavaObject(obj));
     default:
       throw new SerDeException("Unknown primitive : " + inspector.getPrimitiveCategory());
     }

Modified: hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java?rev=1644151&r1=1644150&r2=1644151&view=diff
==============================================================================
--- hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java (original)
+++ hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestParquetSerDe.java Tue Dec  9 19:00:15 2014
@@ -29,11 +29,8 @@ import org.apache.hadoop.io.ArrayWritabl
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 
-import parquet.io.api.Binary;
-
 public class TestParquetSerDe extends TestCase {
 
   public void testParquetHiveSerDe() throws Throwable {
@@ -47,15 +44,16 @@ public class TestParquetSerDe extends Te
       SerDeUtils.initializeSerDe(serDe, conf, tbl, null);
 
       // Data
-      final Writable[] arr = new Writable[8];
+      final Writable[] arr = new Writable[9];
 
+      //primitive types
       arr[0] = new ByteWritable((byte) 123);
       arr[1] = new ShortWritable((short) 456);
       arr[2] = new IntWritable(789);
       arr[3] = new LongWritable(1000l);
       arr[4] = new DoubleWritable((double) 5.3);
       arr[5] = new BytesWritable("hive and hadoop and parquet. Big family.".getBytes("UTF-8"));
-
+      arr[6] = new BytesWritable("parquetSerde binary".getBytes("UTF-8"));
       final Writable[] mapContainer = new Writable[1];
       final Writable[] map = new Writable[3];
       for (int i = 0; i < 3; ++i) {
@@ -65,7 +63,7 @@ public class TestParquetSerDe extends Te
         map[i] = new ArrayWritable(Writable.class, pair);
       }
       mapContainer[0] = new ArrayWritable(Writable.class, map);
-      arr[6] = new ArrayWritable(Writable.class, mapContainer);
+      arr[7] = new ArrayWritable(Writable.class, mapContainer);
 
       final Writable[] arrayContainer = new Writable[1];
       final Writable[] array = new Writable[5];
@@ -73,7 +71,7 @@ public class TestParquetSerDe extends Te
         array[i] = new BytesWritable(("elem_" + i).getBytes("UTF-8"));
       }
       arrayContainer[0] = new ArrayWritable(Writable.class, array);
-      arr[7] = new ArrayWritable(Writable.class, arrayContainer);
+      arr[8] = new ArrayWritable(Writable.class, arrayContainer);
 
       final ArrayWritable arrWritable = new ArrayWritable(Writable.class, arr);
       // Test
@@ -107,8 +105,9 @@ public class TestParquetSerDe extends Te
     final Properties tbl = new Properties();
 
     // Set the configuration parameters
-    tbl.setProperty("columns", "abyte,ashort,aint,along,adouble,astring,amap,alist");
-    tbl.setProperty("columns.types", "tinyint:smallint:int:bigint:double:string:map<string,int>:array<string>");
+    tbl.setProperty("columns", "abyte,ashort,aint,along,adouble,astring,abinary,amap,alist");
+    tbl.setProperty("columns.types",
+      "tinyint:smallint:int:bigint:double:string:binary:map<string,int>:array<string>");
     tbl.setProperty(org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_NULL_FORMAT, "NULL");
     return tbl;
   }

Modified: hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q?rev=1644151&r1=1644150&r2=1644151&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q Tue Dec  9 19:00:15 2014
@@ -11,6 +11,7 @@ CREATE TABLE parquet_types_staging (
   t timestamp,
   cchar char(5),
   cvarchar varchar(10),
+  cbinary string,
   m1 map<string, varchar(3)>,
   l1 array<int>,
   st1 struct<c1:int, c2:char(1)>
@@ -29,6 +30,7 @@ CREATE TABLE parquet_types (
   t timestamp,
   cchar char(5),
   cvarchar varchar(10),
+  cbinary binary,
   m1 map<string, varchar(3)>,
   l1 array<int>,
   st1 struct<c1:int, c2:char(1)>
@@ -36,9 +38,14 @@ CREATE TABLE parquet_types (
 
 LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging;
 
-INSERT OVERWRITE TABLE parquet_types SELECT * FROM parquet_types_staging;
+SELECT * FROM parquet_types_staging;
 
-SELECT * FROM parquet_types;
+INSERT OVERWRITE TABLE parquet_types
+SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar,
+unhex(cbinary), m1, l1, st1 FROM parquet_types_staging;
+
+SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar,
+hex(cbinary), m1, l1, st1 FROM parquet_types;
 
 SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar) FROM parquet_types;
 

Modified: hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out?rev=1644151&r1=1644150&r2=1644151&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out Tue Dec  9 19:00:15 2014
@@ -16,6 +16,7 @@ PREHOOK: query: CREATE TABLE parquet_typ
   t timestamp,
   cchar char(5),
   cvarchar varchar(10),
+  cbinary string,
   m1 map<string, varchar(3)>,
   l1 array<int>,
   st1 struct<c1:int, c2:char(1)>
@@ -36,6 +37,7 @@ POSTHOOK: query: CREATE TABLE parquet_ty
   t timestamp,
   cchar char(5),
   cvarchar varchar(10),
+  cbinary string,
   m1 map<string, varchar(3)>,
   l1 array<int>,
   st1 struct<c1:int, c2:char(1)>
@@ -56,6 +58,7 @@ PREHOOK: query: CREATE TABLE parquet_typ
   t timestamp,
   cchar char(5),
   cvarchar varchar(10),
+  cbinary binary,
   m1 map<string, varchar(3)>,
   l1 array<int>,
   st1 struct<c1:int, c2:char(1)>
@@ -73,6 +76,7 @@ POSTHOOK: query: CREATE TABLE parquet_ty
   t timestamp,
   cchar char(5),
   cvarchar varchar(10),
+  cbinary binary,
   m1 map<string, varchar(3)>,
   l1 array<int>,
   st1 struct<c1:int, c2:char(1)>
@@ -88,14 +92,49 @@ POSTHOOK: query: LOAD DATA LOCAL INPATH
 POSTHOOK: type: LOAD
 #### A masked pattern was here ####
 POSTHOOK: Output: default@parquet_types_staging
-PREHOOK: query: INSERT OVERWRITE TABLE parquet_types SELECT * FROM parquet_types_staging
+PREHOOK: query: SELECT * FROM parquet_types_staging
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types_staging
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT * FROM parquet_types_staging
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types_staging
+#### A masked pattern was here ####
+100	1	1	1.0	0.0	abc	2011-01-01 01:01:01.111111111	a    	a  	B4F3CAFDBEDD	{"k1":"v1"}	[101,200]	{"c1":10,"c2":"a"}
+101	2	2	1.1	0.3	def	2012-02-02 02:02:02.222222222	ab   	ab 	68692CCAC0BDE7	{"k2":"v2"}	[102,200]	{"c1":10,"c2":"d"}
+102	3	3	1.2	0.6	ghi	2013-03-03 03:03:03.333333333	abc  	abc	B4F3CAFDBEDD	{"k3":"v3"}	[103,200]	{"c1":10,"c2":"g"}
+103	1	4	1.3	0.9	jkl	2014-04-04 04:04:04.444444444	abcd 	abcd	68692CCAC0BDE7	{"k4":"v4"}	[104,200]	{"c1":10,"c2":"j"}
+104	2	5	1.4	1.2	mno	2015-05-05 05:05:05.555555555	abcde	abcde	B4F3CAFDBEDD	{"k5":"v5"}	[105,200]	{"c1":10,"c2":"m"}
+105	3	1	1.0	1.5	pqr	2016-06-06 06:06:06.666666666	abcde	abcdef	68692CCAC0BDE7	{"k6":"v6"}	[106,200]	{"c1":10,"c2":"p"}
+106	1	2	1.1	1.8	stu	2017-07-07 07:07:07.777777777	abcde	abcdefg	B4F3CAFDBEDD	{"k7":"v7"}	[107,200]	{"c1":10,"c2":"s"}
+107	2	3	1.2	2.1	vwx	2018-08-08 08:08:08.888888888	bcdef	abcdefgh	68692CCAC0BDE7	{"k8":"v8"}	[108,200]	{"c1":10,"c2":"v"}
+108	3	4	1.3	2.4	yza	2019-09-09 09:09:09.999999999	cdefg	B4F3CAFDBE	68656C6C6F	{"k9":"v9"}	[109,200]	{"c1":10,"c2":"y"}
+109	1	5	1.4	2.7	bcd	2020-10-10 10:10:10.101010101	klmno	abcdedef	68692CCAC0BDE7	{"k10":"v10"}	[110,200]	{"c1":10,"c2":"b"}
+110	2	1	1.0	3.0	efg	2021-11-11 11:11:11.111111111	pqrst	abcdede	B4F3CAFDBEDD	{"k11":"v11"}	[111,200]	{"c1":10,"c2":"e"}
+111	3	2	1.1	3.3	hij	2022-12-12 12:12:12.121212121	nopqr	abcded	68692CCAC0BDE7	{"k12":"v12"}	[112,200]	{"c1":10,"c2":"h"}
+112	1	3	1.2	3.6	klm	2023-01-02 13:13:13.131313131	opqrs	abcdd	B4F3CAFDBEDD	{"k13":"v13"}	[113,200]	{"c1":10,"c2":"k"}
+113	2	4	1.3	3.9	nop	2024-02-02 14:14:14.141414141	pqrst	abc	68692CCAC0BDE7	{"k14":"v14"}	[114,200]	{"c1":10,"c2":"n"}
+114	3	5	1.4	4.2	qrs	2025-03-03 15:15:15.151515151	qrstu	b	B4F3CAFDBEDD	{"k15":"v15"}	[115,200]	{"c1":10,"c2":"q"}
+115	1	1	1.0	4.5	qrs	2026-04-04 16:16:16.161616161	rstuv	abcded	68692CCAC0BDE7	{"k16":"v16"}	[116,200]	{"c1":10,"c2":"q"}
+116	2	2	1.1	4.8	wxy	2027-05-05 17:17:17.171717171	stuvw	abcded	B4F3CAFDBEDD	{"k17":"v17"}	[117,200]	{"c1":10,"c2":"w"}
+117	3	3	1.2	5.1	zab	2028-06-06 18:18:18.181818181	tuvwx	abcded	68692CCAC0BDE7	{"k18":"v18"}	[118,200]	{"c1":10,"c2":"z"}
+118	1	4	1.3	5.4	cde	2029-07-07 19:19:19.191919191	uvwzy	abcdede	B4F3CAFDBEDD	{"k19":"v19"}	[119,200]	{"c1":10,"c2":"c"}
+119	2	5	1.4	5.7	fgh	2030-08-08 20:20:20.202020202	vwxyz	abcdede	68692CCAC0BDE7	{"k20":"v20"}	[120,200]	{"c1":10,"c2":"f"}
+120	3	1	1.0	6.0	ijk	2031-09-09 21:21:21.212121212	wxyza	abcde	B4F3CAFDBEDD	{"k21":"v21"}	[121,200]	{"c1":10,"c2":"i"}
+121	1	2	1.1	6.3	lmn	2032-10-10 22:22:22.222222222	bcdef	abcde		{"k22":"v22"}	[122,200]	{"c1":10,"c2":"l"}
+PREHOOK: query: INSERT OVERWRITE TABLE parquet_types
+SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar,
+unhex(cbinary), m1, l1, st1 FROM parquet_types_staging
 PREHOOK: type: QUERY
 PREHOOK: Input: default@parquet_types_staging
 PREHOOK: Output: default@parquet_types
-POSTHOOK: query: INSERT OVERWRITE TABLE parquet_types SELECT * FROM parquet_types_staging
+POSTHOOK: query: INSERT OVERWRITE TABLE parquet_types
+SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar,
+unhex(cbinary), m1, l1, st1 FROM parquet_types_staging
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@parquet_types_staging
 POSTHOOK: Output: default@parquet_types
+POSTHOOK: Lineage: parquet_types.cbinary EXPRESSION [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cbinary, type:string, comment:null), ]
 POSTHOOK: Lineage: parquet_types.cchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cchar, type:char(5), comment:null), ]
 POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ]
 POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ]
@@ -108,35 +147,38 @@ POSTHOOK: Lineage: parquet_types.l1 SIMP
 POSTHOOK: Lineage: parquet_types.m1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:m1, type:map<string,varchar(3)>, comment:null), ]
 POSTHOOK: Lineage: parquet_types.st1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:st1, type:struct<c1:int,c2:char(1)>, comment:null), ]
 POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ]
-PREHOOK: query: SELECT * FROM parquet_types
+PREHOOK: query: SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar,
+hex(cbinary), m1, l1, st1 FROM parquet_types
 PREHOOK: type: QUERY
 PREHOOK: Input: default@parquet_types
 #### A masked pattern was here ####
-POSTHOOK: query: SELECT * FROM parquet_types
+POSTHOOK: query: SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar,
+hex(cbinary), m1, l1, st1 FROM parquet_types
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@parquet_types
 #### A masked pattern was here ####
-100	1	1	1.0	0.0	abc	2011-01-01 01:01:01.111111111	a    	a  	{"k1":"v1"}	[101,200]	{"c1":10,"c2":"a"}
-101	2	2	1.1	0.3	def	2012-02-02 02:02:02.222222222	ab   	ab 	{"k2":"v2"}	[102,200]	{"c1":10,"c2":"d"}
-102	3	3	1.2	0.6	ghi	2013-03-03 03:03:03.333333333	abc  	abc	{"k3":"v3"}	[103,200]	{"c1":10,"c2":"g"}
-103	1	4	1.3	0.9	jkl	2014-04-04 04:04:04.444444444	abcd 	abcd	{"k4":"v4"}	[104,200]	{"c1":10,"c2":"j"}
-104	2	5	1.4	1.2	mno	2015-05-05 05:05:05.555555555	abcde	abcde	{"k5":"v5"}	[105,200]	{"c1":10,"c2":"m"}
-105	3	1	1.0	1.5	pqr	2016-06-06 06:06:06.666666666	abcde	abcdef	{"k6":"v6"}	[106,200]	{"c1":10,"c2":"p"}
-106	1	2	1.1	1.8	stu	2017-07-07 07:07:07.777777777	abcde	abcdefg	{"k7":"v7"}	[107,200]	{"c1":10,"c2":"s"}
-107	2	3	1.2	2.1	vwx	2018-08-08 08:08:08.888888888	bcdef	abcdefgh	{"k8":"v8"}	[108,200]	{"c1":10,"c2":"v"}
-108	3	4	1.3	2.4	yza	2019-09-09 09:09:09.999999999	cdefg	abcdefghij	{"k9":"v9"}	[109,200]	{"c1":10,"c2":"y"}
-109	1	5	1.4	2.7	bcd	2020-10-10 10:10:10.101010101	klmno	abcdedef	{"k10":"v10"}	[110,200]	{"c1":10,"c2":"b"}
-110	2	1	1.0	3.0	efg	2021-11-11 11:11:11.111111111	pqrst	abcdede	{"k11":"v11"}	[111,200]	{"c1":10,"c2":"e"}
-111	3	2	1.1	3.3	hij	2022-12-12 12:12:12.121212121	nopqr	abcded	{"k12":"v12"}	[112,200]	{"c1":10,"c2":"h"}
-112	1	3	1.2	3.6	klm	2023-01-02 13:13:13.131313131	opqrs	abcdd	{"k13":"v13"}	[113,200]	{"c1":10,"c2":"k"}
-113	2	4	1.3	3.9	nop	2024-02-02 14:14:14.141414141	pqrst	abc	{"k14":"v14"}	[114,200]	{"c1":10,"c2":"n"}
-114	3	5	1.4	4.2	qrs	2025-03-03 15:15:15.151515151	qrstu	b	{"k15":"v15"}	[115,200]	{"c1":10,"c2":"q"}
-115	1	1	1.0	4.5	qrs	2026-04-04 16:16:16.161616161	rstuv	abcded	{"k16":"v16"}	[116,200]	{"c1":10,"c2":"q"}
-116	2	2	1.1	4.8	wxy	2027-05-05 17:17:17.171717171	stuvw	abcded	{"k17":"v17"}	[117,200]	{"c1":10,"c2":"w"}
-117	3	3	1.2	5.1	zab	2028-06-06 18:18:18.181818181	tuvwx	abcded	{"k18":"v18"}	[118,200]	{"c1":10,"c2":"z"}
-118	1	4	1.3	5.4	cde	2029-07-07 19:19:19.191919191	uvwzy	abcdede	{"k19":"v19"}	[119,200]	{"c1":10,"c2":"c"}
-119	2	5	1.4	5.7	fgh	2030-08-08 20:20:20.202020202	vwxyz	abcdede	{"k20":"v20"}	[120,200]	{"c1":10,"c2":"f"}
-120	3	1	1.0	6.0	ijk	2031-09-09 21:21:21.212121212	wxyza	abcde	{"k21":"v21"}	[121,200]	{"c1":10,"c2":"i"}
+100	1	1	1.0	0.0	abc	2011-01-01 01:01:01.111111111	a    	a  	B4F3CAFDBEDD	{"k1":"v1"}	[101,200]	{"c1":10,"c2":"a"}
+101	2	2	1.1	0.3	def	2012-02-02 02:02:02.222222222	ab   	ab 	68692CCAC0BDE7	{"k2":"v2"}	[102,200]	{"c1":10,"c2":"d"}
+102	3	3	1.2	0.6	ghi	2013-03-03 03:03:03.333333333	abc  	abc	B4F3CAFDBEDD	{"k3":"v3"}	[103,200]	{"c1":10,"c2":"g"}
+103	1	4	1.3	0.9	jkl	2014-04-04 04:04:04.444444444	abcd 	abcd	68692CCAC0BDE7	{"k4":"v4"}	[104,200]	{"c1":10,"c2":"j"}
+104	2	5	1.4	1.2	mno	2015-05-05 05:05:05.555555555	abcde	abcde	B4F3CAFDBEDD	{"k5":"v5"}	[105,200]	{"c1":10,"c2":"m"}
+105	3	1	1.0	1.5	pqr	2016-06-06 06:06:06.666666666	abcde	abcdef	68692CCAC0BDE7	{"k6":"v6"}	[106,200]	{"c1":10,"c2":"p"}
+106	1	2	1.1	1.8	stu	2017-07-07 07:07:07.777777777	abcde	abcdefg	B4F3CAFDBEDD	{"k7":"v7"}	[107,200]	{"c1":10,"c2":"s"}
+107	2	3	1.2	2.1	vwx	2018-08-08 08:08:08.888888888	bcdef	abcdefgh	68692CCAC0BDE7	{"k8":"v8"}	[108,200]	{"c1":10,"c2":"v"}
+108	3	4	1.3	2.4	yza	2019-09-09 09:09:09.999999999	cdefg	B4F3CAFDBE	68656C6C6F	{"k9":"v9"}	[109,200]	{"c1":10,"c2":"y"}
+109	1	5	1.4	2.7	bcd	2020-10-10 10:10:10.101010101	klmno	abcdedef	68692CCAC0BDE7	{"k10":"v10"}	[110,200]	{"c1":10,"c2":"b"}
+110	2	1	1.0	3.0	efg	2021-11-11 11:11:11.111111111	pqrst	abcdede	B4F3CAFDBEDD	{"k11":"v11"}	[111,200]	{"c1":10,"c2":"e"}
+111	3	2	1.1	3.3	hij	2022-12-12 12:12:12.121212121	nopqr	abcded	68692CCAC0BDE7	{"k12":"v12"}	[112,200]	{"c1":10,"c2":"h"}
+112	1	3	1.2	3.6	klm	2023-01-02 13:13:13.131313131	opqrs	abcdd	B4F3CAFDBEDD	{"k13":"v13"}	[113,200]	{"c1":10,"c2":"k"}
+113	2	4	1.3	3.9	nop	2024-02-02 14:14:14.141414141	pqrst	abc	68692CCAC0BDE7	{"k14":"v14"}	[114,200]	{"c1":10,"c2":"n"}
+114	3	5	1.4	4.2	qrs	2025-03-03 15:15:15.151515151	qrstu	b	B4F3CAFDBEDD	{"k15":"v15"}	[115,200]	{"c1":10,"c2":"q"}
+115	1	1	1.0	4.5	qrs	2026-04-04 16:16:16.161616161	rstuv	abcded	68692CCAC0BDE7	{"k16":"v16"}	[116,200]	{"c1":10,"c2":"q"}
+116	2	2	1.1	4.8	wxy	2027-05-05 17:17:17.171717171	stuvw	abcded	B4F3CAFDBEDD	{"k17":"v17"}	[117,200]	{"c1":10,"c2":"w"}
+117	3	3	1.2	5.1	zab	2028-06-06 18:18:18.181818181	tuvwx	abcded	68692CCAC0BDE7	{"k18":"v18"}	[118,200]	{"c1":10,"c2":"z"}
+118	1	4	1.3	5.4	cde	2029-07-07 19:19:19.191919191	uvwzy	abcdede	B4F3CAFDBEDD	{"k19":"v19"}	[119,200]	{"c1":10,"c2":"c"}
+119	2	5	1.4	5.7	fgh	2030-08-08 20:20:20.202020202	vwxyz	abcdede	68692CCAC0BDE7	{"k20":"v20"}	[120,200]	{"c1":10,"c2":"f"}
+120	3	1	1.0	6.0	ijk	2031-09-09 21:21:21.212121212	wxyza	abcde	B4F3CAFDBEDD	{"k21":"v21"}	[121,200]	{"c1":10,"c2":"i"}
+121	1	2	1.1	6.3	lmn	2032-10-10 22:22:22.222222222	bcdef	abcde		{"k22":"v22"}	[122,200]	{"c1":10,"c2":"l"}
 PREHOOK: query: SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar) FROM parquet_types
 PREHOOK: type: QUERY
 PREHOOK: Input: default@parquet_types
@@ -153,7 +195,7 @@ abcde	5	abcde	5
 abcde	5	abcdef	6
 abcde	5	abcdefg	7
 bcdef	5	abcdefgh	8
-cdefg	5	abcdefghij	10
+cdefg	5	B4F3CAFDBE	10
 klmno	5	abcdedef	8
 pqrst	5	abcdede	7
 nopqr	5	abcded	6
@@ -166,6 +208,7 @@ tuvwx	5	abcded	6
 uvwzy	5	abcdede	7
 vwxyz	5	abcdede	7
 wxyza	5	abcde	5
+bcdef	5	abcde	5
 PREHOOK: query: SELECT ctinyint,
   MAX(cint),
   MIN(csmallint),
@@ -190,6 +233,6 @@ ORDER BY ctinyint
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@parquet_types
 #### A masked pattern was here ####
-1	118	1	7	1.1857142789023263	1.8000000000000003
+1	121	1	8	1.1749999970197678	2.0621590627301285
 2	119	1	7	1.2142857142857142	1.8
 3	120	1	7	1.171428578240531	1.7999999999999996