You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/09/24 18:42:29 UTC

svn commit: r1627365 - in /hive/trunk: data/files/ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/ serde/src/java/o...

Author: xuefu
Date: Wed Sep 24 16:42:28 2014
New Revision: 1627365

URL: http://svn.apache.org/r1627365
Log:
HIVE-8205: Using strings in group type fails in ParquetSerDe (Mohit via Xuefu)

Modified:
    hive/trunk/data/files/parquet_types.txt
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
    hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
    hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java

Modified: hive/trunk/data/files/parquet_types.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/parquet_types.txt?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/data/files/parquet_types.txt (original)
+++ hive/trunk/data/files/parquet_types.txt Wed Sep 24 16:42:28 2014
@@ -1,21 +1,21 @@
-100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a   |a  
-101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab  |ab  
-102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc
-103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd
-104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde
-105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef
-106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg
-107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh
-108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|abcdefghijklmnop
-109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef
-110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede
-111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded
-112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd
-113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc
-114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b
-115|1|1|1.0|4.5|tuv|2026-04-04 16:16:16.161616161|rstuv|abcded
-116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded
-117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded
-118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede
-119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede
-120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde
+100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a   |a  |k1:v1|101,200|10,abc
+101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab  |ab |k2:v2|102,200|10,def
+102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc|k3:v3|103,200|10,ghi
+103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd|k4:v4|104,200|10,jkl
+104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde|k5:v5|105,200|10,mno
+105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef|k6:v6|106,200|10,pqr
+106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg|k7:v7|107,200|10,stu
+107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh|k8:v8|108,200|10,vwx
+108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|abcdefghijklmnop|k9:v9|109,200|10,yza
+109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef|k10:v10|110,200|10,bcd
+110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede|k11:v11|111,200|10,efg
+111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded|k12:v12|112,200|10,hij
+112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd|k13:v13|113,200|10,klm
+113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc|k14:v14|114,200|10,nop
+114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b|k15:v15|115,200|10,qrs
+115|1|1|1.0|4.5|qrs|2026-04-04 16:16:16.161616161|rstuv|abcded|k16:v16|116,200|10,qrs
+116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded|k17:v17|117,200|10,wxy
+117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded|k18:v18|118,200|10,zab
+118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede|k19:v19|119,200|10,cde
+119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede|k20:v20|120,200|10,fgh
+120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde|k21:v21|121,200|10,ijk

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java Wed Sep 24 16:42:28 2014
@@ -13,9 +13,6 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.convert;
 
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.Writable;
 
@@ -33,7 +30,7 @@ public class ArrayWritableGroupConverter
   private Writable[] mapPairContainer;
 
   public ArrayWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent,
-      final int index, List<TypeInfo> hiveSchemaTypeInfos) {
+      final int index) {
     this.parent = parent;
     this.index = index;
     int count = groupType.getFieldCount();
@@ -43,8 +40,7 @@ public class ArrayWritableGroupConverter
     isMap = count == 2;
     converters = new Converter[count];
     for (int i = 0; i < count; i++) {
-      converters[i] = getConverterFromDescription(groupType.getType(i), i, this,
-          hiveSchemaTypeInfos);
+      converters[i] = getConverterFromDescription(groupType.getType(i), i, this);
     }
   }
 

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java Wed Sep 24 16:42:28 2014
@@ -16,7 +16,6 @@ package org.apache.hadoop.hive.ql.io.par
 import java.util.ArrayList;
 import java.util.List;
 
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.io.Writable;
 
@@ -37,21 +36,19 @@ public class DataWritableGroupConverter 
   private final Object[] currentArr;
   private Writable[] rootMap;
 
-  public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema,
-      final List<TypeInfo> hiveSchemaTypeInfos) {
-    this(requestedSchema, null, 0, tableSchema, hiveSchemaTypeInfos);
+  public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema) {
+    this(requestedSchema, null, 0, tableSchema);
     final int fieldCount = tableSchema.getFieldCount();
     this.rootMap = new Writable[fieldCount];
   }
 
   public DataWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent,
-      final int index, final List<TypeInfo> hiveSchemaTypeInfos) {
-    this(groupType, parent, index, groupType, hiveSchemaTypeInfos);
+      final int index) {
+    this(groupType, parent, index, groupType);
   }
 
   public DataWritableGroupConverter(final GroupType selectedGroupType,
-      final HiveGroupConverter parent, final int index, final GroupType containingGroupType,
-      final List<TypeInfo> hiveSchemaTypeInfos) {
+      final HiveGroupConverter parent, final int index, final GroupType containingGroupType) {
     this.parent = parent;
     this.index = index;
     final int totalFieldCount = containingGroupType.getFieldCount();
@@ -65,8 +62,7 @@ public class DataWritableGroupConverter 
       Type subtype = selectedFields.get(i);
       if (containingGroupType.getFields().contains(subtype)) {
         converters[i] = getConverterFromDescription(subtype,
-            containingGroupType.getFieldIndex(subtype.getName()), this,
-            hiveSchemaTypeInfos);
+            containingGroupType.getFieldIndex(subtype.getName()), this);
       } else {
         throw new IllegalStateException("Group type [" + containingGroupType +
             "] does not contain requested field: " + subtype);

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java Wed Sep 24 16:42:28 2014
@@ -31,10 +31,8 @@ public class DataWritableRecordConverter
 
   private final DataWritableGroupConverter root;
 
-  public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema,
-      final List<TypeInfo> hiveColumnTypeInfos) {
-    this.root = new DataWritableGroupConverter(requestedSchema, tableSchema,
-        hiveColumnTypeInfos);
+  public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema) {
+    this.root = new DataWritableGroupConverter(requestedSchema, tableSchema);
   }
 
   @Override

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java Wed Sep 24 16:42:28 2014
@@ -16,19 +16,12 @@ package org.apache.hadoop.hive.ql.io.par
 import java.math.BigDecimal;
 import java.sql.Timestamp;
 import java.util.ArrayList;
-import java.util.List;
 
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
 import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
 import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
-import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
 import org.apache.hadoop.hive.serde2.io.TimestampWritable;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.FloatWritable;
@@ -152,32 +145,6 @@ public enum ETypeConverter {
         }
       };
     }
-  },
-  ECHAR_CONVERTER(HiveCharWritable.class) {
-    @Override
-    Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
-      return new BinaryConverter<HiveCharWritable>(type, parent, index) {
-        @Override
-        protected HiveCharWritable convert(Binary binary) {
-          HiveChar hiveChar = new HiveChar();
-          hiveChar.setValue(binary.toStringUsingUTF8());
-          return new HiveCharWritable(hiveChar);
-        }
-      };
-    }
-  },
-  EVARCHAR_CONVERTER(HiveVarcharWritable.class) {
-    @Override
-    Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
-      return new BinaryConverter<HiveVarcharWritable>(type, parent, index) {
-        @Override
-        protected HiveVarcharWritable convert(Binary binary) {
-          HiveVarchar hiveVarchar = new HiveVarchar();
-          hiveVarchar.setValue(binary.toStringUsingUTF8());
-          return new HiveVarcharWritable(hiveVarchar);
-        }
-      };
-    }
   };
 
   final Class<?> _type;
@@ -193,7 +160,7 @@ public enum ETypeConverter {
   abstract Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent);
 
   public static Converter getNewConverter(final PrimitiveType type, final int index,
-      final HiveGroupConverter parent, List<TypeInfo> hiveSchemaTypeInfos) {
+      final HiveGroupConverter parent) {
     if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) {
       //TODO- cleanup once parquet support Timestamp type annotation.
       return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent);
@@ -201,15 +168,7 @@ public enum ETypeConverter {
     if (OriginalType.DECIMAL == type.getOriginalType()) {
       return EDECIMAL_CONVERTER.getConverter(type, index, parent);
     } else if (OriginalType.UTF8 == type.getOriginalType()) {
-      if (hiveSchemaTypeInfos.get(index).getTypeName()
-          .startsWith(serdeConstants.CHAR_TYPE_NAME)) {
-        return ECHAR_CONVERTER.getConverter(type, index, parent);
-      } else if (hiveSchemaTypeInfos.get(index).getTypeName()
-          .startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
-        return EVARCHAR_CONVERTER.getConverter(type, index, parent);
-      } else if (type.isPrimitive()) {
-        return ESTRING_CONVERTER.getConverter(type, index, parent);
-      }
+      return ESTRING_CONVERTER.getConverter(type, index, parent);
     }
 
     Class<?> javaType = type.getPrimitiveTypeName().javaType;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java Wed Sep 24 16:42:28 2014
@@ -13,9 +13,6 @@
  */
 package org.apache.hadoop.hive.ql.io.parquet.convert;
 
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 import org.apache.hadoop.io.Writable;
 
 import parquet.io.api.Converter;
@@ -26,20 +23,17 @@ import parquet.schema.Type.Repetition;
 public abstract class HiveGroupConverter extends GroupConverter {
 
   protected static Converter getConverterFromDescription(final Type type, final int index,
-      final HiveGroupConverter parent, List<TypeInfo> hiveSchemaTypeInfos) {
+      final HiveGroupConverter parent) {
     if (type == null) {
       return null;
     }
     if (type.isPrimitive()) {
-      return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent,
-          hiveSchemaTypeInfos);
+      return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent);
     } else {
       if (type.asGroupType().getRepetition() == Repetition.REPEATED) {
-        return new ArrayWritableGroupConverter(type.asGroupType(), parent, index,
-            hiveSchemaTypeInfos);
+        return new ArrayWritableGroupConverter(type.asGroupType(), parent, index);
       } else {
-        return new DataWritableGroupConverter(type.asGroupType(), parent, index,
-            hiveSchemaTypeInfos);
+        return new DataWritableGroupConverter(type.asGroupType(), parent, index);
       }
     }
   }

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java Wed Sep 24 16:42:28 2014
@@ -14,7 +14,6 @@
 package org.apache.hadoop.hive.ql.io.parquet.read;
 
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -24,8 +23,6 @@ import org.apache.hadoop.hive.ql.io.IOCo
 import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter;
 import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
 import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.apache.hadoop.io.ArrayWritable;
 import org.apache.hadoop.util.StringUtils;
 
@@ -56,7 +53,7 @@ public class DataWritableReadSupport ext
    * From a string which columns names (including hive column), return a list
    * of string columns
    *
-   * @param comma separated list of columns
+   * @param columns comma separated list of columns
    * @return list with virtual columns removed
    */
   private static List<String> getColumns(final String columns) {
@@ -64,27 +61,6 @@ public class DataWritableReadSupport ext
         removeVirtualColumns(StringUtils.getStringCollection(columns));
   }
 
-  private static List<TypeInfo> getColumnTypes(Configuration configuration) {
-
-    List<String> columnNames;
-    String columnNamesProperty = configuration.get(IOConstants.COLUMNS);
-    if (columnNamesProperty.length() == 0) {
-      columnNames = new ArrayList<String>();
-    } else {
-      columnNames = Arrays.asList(columnNamesProperty.split(","));
-    }
-    List<TypeInfo> columnTypes;
-    String columnTypesProperty = configuration.get(IOConstants.COLUMNS_TYPES);
-    if (columnTypesProperty.length() == 0) {
-      columnTypes = new ArrayList<TypeInfo>();
-    } else {
-      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesProperty);
-    }
-
-    columnTypes = VirtualColumn.removeVirtualColumnTypes(columnNames, columnTypes);
-    return columnTypes;
-  }
-
   /**
    *
    * It creates the readContext for Parquet side with the requested schema during the init phase.
@@ -173,8 +149,7 @@ public class DataWritableReadSupport ext
     }
     final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser.
         parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration);
-    return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema,
-        getColumnTypes(configuration));
+    return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema);
   }
 
   /**
@@ -194,4 +169,4 @@ public class DataWritableReadSupport ext
     }
     return requestedSchema;
   }
-}
+}
\ No newline at end of file

Modified: hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q Wed Sep 24 16:42:28 2014
@@ -10,9 +10,14 @@ CREATE TABLE parquet_types_staging (
   cstring1 string,
   t timestamp,
   cchar char(5),
-  cvarchar varchar(10)
+  cvarchar varchar(10),
+  m1 map<string, varchar(3)>,
+  l1 array<int>,
+  st1 struct<c1:int, c2:char(1)>
 ) ROW FORMAT DELIMITED
-FIELDS TERMINATED BY '|';
+FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':';
 
 CREATE TABLE parquet_types (
   cint int,
@@ -23,7 +28,10 @@ CREATE TABLE parquet_types (
   cstring1 string,
   t timestamp,
   cchar char(5),
-  cvarchar varchar(10)
+  cvarchar varchar(10),
+  m1 map<string, varchar(3)>,
+  l1 array<int>,
+  st1 struct<c1:int, c2:char(1)>
 ) STORED AS PARQUET;
 
 LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging;
@@ -32,6 +40,8 @@ INSERT OVERWRITE TABLE parquet_types SEL
 
 SELECT * FROM parquet_types;
 
+SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar) FROM parquet_types;
+
 SELECT ctinyint,
   MAX(cint),
   MIN(csmallint),

Modified: hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out Wed Sep 24 16:42:28 2014
@@ -15,9 +15,14 @@ PREHOOK: query: CREATE TABLE parquet_typ
   cstring1 string,
   t timestamp,
   cchar char(5),
-  cvarchar varchar(10)
+  cvarchar varchar(10),
+  m1 map<string, varchar(3)>,
+  l1 array<int>,
+  st1 struct<c1:int, c2:char(1)>
 ) ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':'
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
 PREHOOK: Output: default@parquet_types_staging
@@ -30,9 +35,14 @@ POSTHOOK: query: CREATE TABLE parquet_ty
   cstring1 string,
   t timestamp,
   cchar char(5),
-  cvarchar varchar(10)
+  cvarchar varchar(10),
+  m1 map<string, varchar(3)>,
+  l1 array<int>,
+  st1 struct<c1:int, c2:char(1)>
 ) ROW FORMAT DELIMITED
 FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':'
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
 POSTHOOK: Output: default@parquet_types_staging
@@ -45,7 +55,10 @@ PREHOOK: query: CREATE TABLE parquet_typ
   cstring1 string,
   t timestamp,
   cchar char(5),
-  cvarchar varchar(10)
+  cvarchar varchar(10),
+  m1 map<string, varchar(3)>,
+  l1 array<int>,
+  st1 struct<c1:int, c2:char(1)>
 ) STORED AS PARQUET
 PREHOOK: type: CREATETABLE
 PREHOOK: Output: database:default
@@ -59,7 +72,10 @@ POSTHOOK: query: CREATE TABLE parquet_ty
   cstring1 string,
   t timestamp,
   cchar char(5),
-  cvarchar varchar(10)
+  cvarchar varchar(10),
+  m1 map<string, varchar(3)>,
+  l1 array<int>,
+  st1 struct<c1:int, c2:char(1)>
 ) STORED AS PARQUET
 POSTHOOK: type: CREATETABLE
 POSTHOOK: Output: database:default
@@ -88,6 +104,9 @@ POSTHOOK: Lineage: parquet_types.csmalli
 POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ]
 POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
 POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ]
+POSTHOOK: Lineage: parquet_types.l1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:l1, type:array<int>, comment:null), ]
+POSTHOOK: Lineage: parquet_types.m1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:m1, type:map<string,varchar(3)>, comment:null), ]
+POSTHOOK: Lineage: parquet_types.st1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:st1, type:struct<c1:int,c2:char(1)>, comment:null), ]
 POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ]
 PREHOOK: query: SELECT * FROM parquet_types
 PREHOOK: type: QUERY
@@ -97,27 +116,56 @@ POSTHOOK: query: SELECT * FROM parquet_t
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@parquet_types
 #### A masked pattern was here ####
-100	1	1	1.0	0.0	abc	2011-01-01 01:01:01.111111111	a	a  
-101	2	2	1.1	0.3	def	2012-02-02 02:02:02.222222222	ab	ab  
-102	3	3	1.2	0.6	ghi	2013-03-03 03:03:03.333333333	abc	abc
-103	1	4	1.3	0.9	jkl	2014-04-04 04:04:04.444444444	abcd	abcd
-104	2	5	1.4	1.2	mno	2015-05-05 05:05:05.555555555	abcde	abcde
-105	3	1	1.0	1.5	pqr	2016-06-06 06:06:06.666666666	abcde	abcdef
-106	1	2	1.1	1.8	stu	2017-07-07 07:07:07.777777777	abcde	abcdefg
-107	2	3	1.2	2.1	vwx	2018-08-08 08:08:08.888888888	bcdef	abcdefgh
-108	3	4	1.3	2.4	yza	2019-09-09 09:09:09.999999999	cdefg	abcdefghij
-109	1	5	1.4	2.7	bcd	2020-10-10 10:10:10.101010101	klmno	abcdedef
-110	2	1	1.0	3.0	efg	2021-11-11 11:11:11.111111111	pqrst	abcdede
-111	3	2	1.1	3.3	hij	2022-12-12 12:12:12.121212121	nopqr	abcded
-112	1	3	1.2	3.6	klm	2023-01-02 13:13:13.131313131	opqrs	abcdd
-113	2	4	1.3	3.9	nop	2024-02-02 14:14:14.141414141	pqrst	abc
-114	3	5	1.4	4.2	qrs	2025-03-03 15:15:15.151515151	qrstu	b
-115	1	1	1.0	4.5	tuv	2026-04-04 16:16:16.161616161	rstuv	abcded
-116	2	2	1.1	4.8	wxy	2027-05-05 17:17:17.171717171	stuvw	abcded
-117	3	3	1.2	5.1	zab	2028-06-06 18:18:18.181818181	tuvwx	abcded
-118	1	4	1.3	5.4	cde	2029-07-07 19:19:19.191919191	uvwzy	abcdede
-119	2	5	1.4	5.7	fgh	2030-08-08 20:20:20.202020202	vwxyz	abcdede
-120	3	1	1.0	6.0	ijk	2031-09-09 21:21:21.212121212	wxyza	abcde
+100	1	1	1.0	0.0	abc	2011-01-01 01:01:01.111111111	a    	a  	{"k1":"v1"}	[101,200]	{"c1":10,"c2":"a"}
+101	2	2	1.1	0.3	def	2012-02-02 02:02:02.222222222	ab   	ab 	{"k2":"v2"}	[102,200]	{"c1":10,"c2":"d"}
+102	3	3	1.2	0.6	ghi	2013-03-03 03:03:03.333333333	abc  	abc	{"k3":"v3"}	[103,200]	{"c1":10,"c2":"g"}
+103	1	4	1.3	0.9	jkl	2014-04-04 04:04:04.444444444	abcd 	abcd	{"k4":"v4"}	[104,200]	{"c1":10,"c2":"j"}
+104	2	5	1.4	1.2	mno	2015-05-05 05:05:05.555555555	abcde	abcde	{"k5":"v5"}	[105,200]	{"c1":10,"c2":"m"}
+105	3	1	1.0	1.5	pqr	2016-06-06 06:06:06.666666666	abcde	abcdef	{"k6":"v6"}	[106,200]	{"c1":10,"c2":"p"}
+106	1	2	1.1	1.8	stu	2017-07-07 07:07:07.777777777	abcde	abcdefg	{"k7":"v7"}	[107,200]	{"c1":10,"c2":"s"}
+107	2	3	1.2	2.1	vwx	2018-08-08 08:08:08.888888888	bcdef	abcdefgh	{"k8":"v8"}	[108,200]	{"c1":10,"c2":"v"}
+108	3	4	1.3	2.4	yza	2019-09-09 09:09:09.999999999	cdefg	abcdefghij	{"k9":"v9"}	[109,200]	{"c1":10,"c2":"y"}
+109	1	5	1.4	2.7	bcd	2020-10-10 10:10:10.101010101	klmno	abcdedef	{"k10":"v10"}	[110,200]	{"c1":10,"c2":"b"}
+110	2	1	1.0	3.0	efg	2021-11-11 11:11:11.111111111	pqrst	abcdede	{"k11":"v11"}	[111,200]	{"c1":10,"c2":"e"}
+111	3	2	1.1	3.3	hij	2022-12-12 12:12:12.121212121	nopqr	abcded	{"k12":"v12"}	[112,200]	{"c1":10,"c2":"h"}
+112	1	3	1.2	3.6	klm	2023-01-02 13:13:13.131313131	opqrs	abcdd	{"k13":"v13"}	[113,200]	{"c1":10,"c2":"k"}
+113	2	4	1.3	3.9	nop	2024-02-02 14:14:14.141414141	pqrst	abc	{"k14":"v14"}	[114,200]	{"c1":10,"c2":"n"}
+114	3	5	1.4	4.2	qrs	2025-03-03 15:15:15.151515151	qrstu	b	{"k15":"v15"}	[115,200]	{"c1":10,"c2":"q"}
+115	1	1	1.0	4.5	qrs	2026-04-04 16:16:16.161616161	rstuv	abcded	{"k16":"v16"}	[116,200]	{"c1":10,"c2":"q"}
+116	2	2	1.1	4.8	wxy	2027-05-05 17:17:17.171717171	stuvw	abcded	{"k17":"v17"}	[117,200]	{"c1":10,"c2":"w"}
+117	3	3	1.2	5.1	zab	2028-06-06 18:18:18.181818181	tuvwx	abcded	{"k18":"v18"}	[118,200]	{"c1":10,"c2":"z"}
+118	1	4	1.3	5.4	cde	2029-07-07 19:19:19.191919191	uvwzy	abcdede	{"k19":"v19"}	[119,200]	{"c1":10,"c2":"c"}
+119	2	5	1.4	5.7	fgh	2030-08-08 20:20:20.202020202	vwxyz	abcdede	{"k20":"v20"}	[120,200]	{"c1":10,"c2":"f"}
+120	3	1	1.0	6.0	ijk	2031-09-09 21:21:21.212121212	wxyza	abcde	{"k21":"v21"}	[121,200]	{"c1":10,"c2":"i"}
+PREHOOK: query: SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar) FROM parquet_types
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar) FROM parquet_types
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+a    	1	a  	3
+ab   	2	ab 	3
+abc  	3	abc	3
+abcd 	4	abcd	4
+abcde	5	abcde	5
+abcde	5	abcdef	6
+abcde	5	abcdefg	7
+bcdef	5	abcdefgh	8
+cdefg	5	abcdefghij	10
+klmno	5	abcdedef	8
+pqrst	5	abcdede	7
+nopqr	5	abcded	6
+opqrs	5	abcdd	5
+pqrst	5	abc	3
+qrstu	5	b	1
+rstuv	5	abcded	6
+stuvw	5	abcded	6
+tuvwx	5	abcded	6
+uvwzy	5	abcdede	7
+vwxyz	5	abcdede	7
+wxyza	5	abcde	5
 PREHOOK: query: SELECT ctinyint,
   MAX(cint),
   MIN(csmallint),

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java Wed Sep 24 16:42:28 2014
@@ -21,6 +21,11 @@ import org.apache.hadoop.hive.common.typ
 import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
 import org.apache.hadoop.hive.serde2.typeinfo.BaseCharUtils;
 import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.io.Text;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
 
 public class WritableHiveCharObjectInspector extends AbstractPrimitiveWritableObjectInspector
     implements SettableHiveCharObjectInspector {
@@ -39,6 +44,12 @@ public class WritableHiveCharObjectInspe
     if (o == null) {
       return null;
     }
+
+    if (o instanceof Text) {
+      String str = ((Text)o).toString();
+      return new HiveChar(str, ((CharTypeInfo)typeInfo).getLength());
+    }
+
     HiveCharWritable writable = ((HiveCharWritable) o);
     if (doesWritableMatchTypeParams(writable)) {
       return writable.getHiveChar();
@@ -53,6 +64,14 @@ public class WritableHiveCharObjectInspe
     if (o == null) {
       return null;
     }
+
+    if (o instanceof Text) {
+      String str = ((Text)o).toString();
+      HiveCharWritable hcw = new HiveCharWritable();
+      hcw.set(str, ((CharTypeInfo)typeInfo).getLength());
+      return hcw;
+    }
+
     HiveCharWritable writable = ((HiveCharWritable) o);
     if (doesWritableMatchTypeParams((HiveCharWritable) o)) {
       return writable;

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java Wed Sep 24 16:42:28 2014
@@ -19,10 +19,15 @@ package org.apache.hadoop.hive.serde2.ob
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.type.HiveChar;
 import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
 import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.BaseCharUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hive.common.util.HiveStringUtils;
 
 public class WritableHiveVarcharObjectInspector extends AbstractPrimitiveWritableObjectInspector
 implements SettableHiveVarcharObjectInspector {
@@ -43,6 +48,12 @@ implements SettableHiveVarcharObjectInsp
     if (o == null) {
       return null;
     }
+
+    if (o instanceof Text) {
+      String str = ((Text)o).toString();
+      return new HiveVarchar(str, ((VarcharTypeInfo)typeInfo).getLength());
+    }
+
     HiveVarcharWritable writable = ((HiveVarcharWritable)o);
     if (doesWritableMatchTypeParams(writable)) {
       return writable.getHiveVarchar();
@@ -57,6 +68,14 @@ implements SettableHiveVarcharObjectInsp
     if (o == null) {
       return null;
     }
+
+    if (o instanceof Text) {
+      String str = ((Text)o).toString();
+      HiveVarcharWritable hcw = new HiveVarcharWritable();
+      hcw.set(str, ((VarcharTypeInfo)typeInfo).getLength());
+      return hcw;
+    }
+
     HiveVarcharWritable writable = ((HiveVarcharWritable)o);
     if (doesWritableMatchTypeParams((HiveVarcharWritable)o)) {
       return writable;