You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by xu...@apache.org on 2014/09/24 18:42:29 UTC
svn commit: r1627365 - in /hive/trunk: data/files/
ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/
ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/
ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/
serde/src/java/o...
Author: xuefu
Date: Wed Sep 24 16:42:28 2014
New Revision: 1627365
URL: http://svn.apache.org/r1627365
Log:
HIVE-8205: Using strings in group type fails in ParquetSerDe (Mohit via Xuefu)
Modified:
hive/trunk/data/files/parquet_types.txt
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java
hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java
Modified: hive/trunk/data/files/parquet_types.txt
URL: http://svn.apache.org/viewvc/hive/trunk/data/files/parquet_types.txt?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/data/files/parquet_types.txt (original)
+++ hive/trunk/data/files/parquet_types.txt Wed Sep 24 16:42:28 2014
@@ -1,21 +1,21 @@
-100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a |a
-101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab |ab
-102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc
-103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd
-104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde
-105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef
-106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg
-107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh
-108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|abcdefghijklmnop
-109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef
-110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede
-111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded
-112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd
-113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc
-114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b
-115|1|1|1.0|4.5|tuv|2026-04-04 16:16:16.161616161|rstuv|abcded
-116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded
-117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded
-118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede
-119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede
-120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde
+100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a |a |k1:v1|101,200|10,abc
+101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab |ab |k2:v2|102,200|10,def
+102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc|k3:v3|103,200|10,ghi
+103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd|k4:v4|104,200|10,jkl
+104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde|k5:v5|105,200|10,mno
+105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef|k6:v6|106,200|10,pqr
+106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg|k7:v7|107,200|10,stu
+107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh|k8:v8|108,200|10,vwx
+108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|abcdefghijklmnop|k9:v9|109,200|10,yza
+109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef|k10:v10|110,200|10,bcd
+110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede|k11:v11|111,200|10,efg
+111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded|k12:v12|112,200|10,hij
+112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd|k13:v13|113,200|10,klm
+113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc|k14:v14|114,200|10,nop
+114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b|k15:v15|115,200|10,qrs
+115|1|1|1.0|4.5|qrs|2026-04-04 16:16:16.161616161|rstuv|abcded|k16:v16|116,200|10,qrs
+116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded|k17:v17|117,200|10,wxy
+117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded|k18:v18|118,200|10,zab
+118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede|k19:v19|119,200|10,cde
+119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede|k20:v20|120,200|10,fgh
+120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde|k21:v21|121,200|10,ijk
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ArrayWritableGroupConverter.java Wed Sep 24 16:42:28 2014
@@ -13,9 +13,6 @@
*/
package org.apache.hadoop.hive.ql.io.parquet.convert;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.Writable;
@@ -33,7 +30,7 @@ public class ArrayWritableGroupConverter
private Writable[] mapPairContainer;
public ArrayWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent,
- final int index, List<TypeInfo> hiveSchemaTypeInfos) {
+ final int index) {
this.parent = parent;
this.index = index;
int count = groupType.getFieldCount();
@@ -43,8 +40,7 @@ public class ArrayWritableGroupConverter
isMap = count == 2;
converters = new Converter[count];
for (int i = 0; i < count; i++) {
- converters[i] = getConverterFromDescription(groupType.getType(i), i, this,
- hiveSchemaTypeInfos);
+ converters[i] = getConverterFromDescription(groupType.getType(i), i, this);
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableGroupConverter.java Wed Sep 24 16:42:28 2014
@@ -16,7 +16,6 @@ package org.apache.hadoop.hive.ql.io.par
import java.util.ArrayList;
import java.util.List;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.Writable;
@@ -37,21 +36,19 @@ public class DataWritableGroupConverter
private final Object[] currentArr;
private Writable[] rootMap;
- public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema,
- final List<TypeInfo> hiveSchemaTypeInfos) {
- this(requestedSchema, null, 0, tableSchema, hiveSchemaTypeInfos);
+ public DataWritableGroupConverter(final GroupType requestedSchema, final GroupType tableSchema) {
+ this(requestedSchema, null, 0, tableSchema);
final int fieldCount = tableSchema.getFieldCount();
this.rootMap = new Writable[fieldCount];
}
public DataWritableGroupConverter(final GroupType groupType, final HiveGroupConverter parent,
- final int index, final List<TypeInfo> hiveSchemaTypeInfos) {
- this(groupType, parent, index, groupType, hiveSchemaTypeInfos);
+ final int index) {
+ this(groupType, parent, index, groupType);
}
public DataWritableGroupConverter(final GroupType selectedGroupType,
- final HiveGroupConverter parent, final int index, final GroupType containingGroupType,
- final List<TypeInfo> hiveSchemaTypeInfos) {
+ final HiveGroupConverter parent, final int index, final GroupType containingGroupType) {
this.parent = parent;
this.index = index;
final int totalFieldCount = containingGroupType.getFieldCount();
@@ -65,8 +62,7 @@ public class DataWritableGroupConverter
Type subtype = selectedFields.get(i);
if (containingGroupType.getFields().contains(subtype)) {
converters[i] = getConverterFromDescription(subtype,
- containingGroupType.getFieldIndex(subtype.getName()), this,
- hiveSchemaTypeInfos);
+ containingGroupType.getFieldIndex(subtype.getName()), this);
} else {
throw new IllegalStateException("Group type [" + containingGroupType +
"] does not contain requested field: " + subtype);
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/DataWritableRecordConverter.java Wed Sep 24 16:42:28 2014
@@ -31,10 +31,8 @@ public class DataWritableRecordConverter
private final DataWritableGroupConverter root;
- public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema,
- final List<TypeInfo> hiveColumnTypeInfos) {
- this.root = new DataWritableGroupConverter(requestedSchema, tableSchema,
- hiveColumnTypeInfos);
+ public DataWritableRecordConverter(final GroupType requestedSchema, final GroupType tableSchema) {
+ this.root = new DataWritableGroupConverter(requestedSchema, tableSchema);
}
@Override
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java Wed Sep 24 16:42:28 2014
@@ -16,19 +16,12 @@ package org.apache.hadoop.hive.ql.io.par
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.util.ArrayList;
-import java.util.List;
-import org.apache.hadoop.hive.common.type.HiveChar;
-import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime;
import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils;
-import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.io.DoubleWritable;
-import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
-import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.FloatWritable;
@@ -152,32 +145,6 @@ public enum ETypeConverter {
}
};
}
- },
- ECHAR_CONVERTER(HiveCharWritable.class) {
- @Override
- Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
- return new BinaryConverter<HiveCharWritable>(type, parent, index) {
- @Override
- protected HiveCharWritable convert(Binary binary) {
- HiveChar hiveChar = new HiveChar();
- hiveChar.setValue(binary.toStringUsingUTF8());
- return new HiveCharWritable(hiveChar);
- }
- };
- }
- },
- EVARCHAR_CONVERTER(HiveVarcharWritable.class) {
- @Override
- Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent) {
- return new BinaryConverter<HiveVarcharWritable>(type, parent, index) {
- @Override
- protected HiveVarcharWritable convert(Binary binary) {
- HiveVarchar hiveVarchar = new HiveVarchar();
- hiveVarchar.setValue(binary.toStringUsingUTF8());
- return new HiveVarcharWritable(hiveVarchar);
- }
- };
- }
};
final Class<?> _type;
@@ -193,7 +160,7 @@ public enum ETypeConverter {
abstract Converter getConverter(final PrimitiveType type, final int index, final HiveGroupConverter parent);
public static Converter getNewConverter(final PrimitiveType type, final int index,
- final HiveGroupConverter parent, List<TypeInfo> hiveSchemaTypeInfos) {
+ final HiveGroupConverter parent) {
if (type.isPrimitive() && (type.asPrimitiveType().getPrimitiveTypeName().equals(PrimitiveType.PrimitiveTypeName.INT96))) {
//TODO- cleanup once parquet support Timestamp type annotation.
return ETypeConverter.ETIMESTAMP_CONVERTER.getConverter(type, index, parent);
@@ -201,15 +168,7 @@ public enum ETypeConverter {
if (OriginalType.DECIMAL == type.getOriginalType()) {
return EDECIMAL_CONVERTER.getConverter(type, index, parent);
} else if (OriginalType.UTF8 == type.getOriginalType()) {
- if (hiveSchemaTypeInfos.get(index).getTypeName()
- .startsWith(serdeConstants.CHAR_TYPE_NAME)) {
- return ECHAR_CONVERTER.getConverter(type, index, parent);
- } else if (hiveSchemaTypeInfos.get(index).getTypeName()
- .startsWith(serdeConstants.VARCHAR_TYPE_NAME)) {
- return EVARCHAR_CONVERTER.getConverter(type, index, parent);
- } else if (type.isPrimitive()) {
- return ESTRING_CONVERTER.getConverter(type, index, parent);
- }
+ return ESTRING_CONVERTER.getConverter(type, index, parent);
}
Class<?> javaType = type.getPrimitiveTypeName().javaType;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/HiveGroupConverter.java Wed Sep 24 16:42:28 2014
@@ -13,9 +13,6 @@
*/
package org.apache.hadoop.hive.ql.io.parquet.convert;
-import java.util.List;
-
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.io.Writable;
import parquet.io.api.Converter;
@@ -26,20 +23,17 @@ import parquet.schema.Type.Repetition;
public abstract class HiveGroupConverter extends GroupConverter {
protected static Converter getConverterFromDescription(final Type type, final int index,
- final HiveGroupConverter parent, List<TypeInfo> hiveSchemaTypeInfos) {
+ final HiveGroupConverter parent) {
if (type == null) {
return null;
}
if (type.isPrimitive()) {
- return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent,
- hiveSchemaTypeInfos);
+ return ETypeConverter.getNewConverter(type.asPrimitiveType(), index, parent);
} else {
if (type.asGroupType().getRepetition() == Repetition.REPEATED) {
- return new ArrayWritableGroupConverter(type.asGroupType(), parent, index,
- hiveSchemaTypeInfos);
+ return new ArrayWritableGroupConverter(type.asGroupType(), parent, index);
} else {
- return new DataWritableGroupConverter(type.asGroupType(), parent, index,
- hiveSchemaTypeInfos);
+ return new DataWritableGroupConverter(type.asGroupType(), parent, index);
}
}
}
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java Wed Sep 24 16:42:28 2014
@@ -14,7 +14,6 @@
package org.apache.hadoop.hive.ql.io.parquet.read;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -24,8 +23,6 @@ import org.apache.hadoop.hive.ql.io.IOCo
import org.apache.hadoop.hive.ql.io.parquet.convert.DataWritableRecordConverter;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.util.StringUtils;
@@ -56,7 +53,7 @@ public class DataWritableReadSupport ext
* From a string which columns names (including hive column), return a list
* of string columns
*
- * @param comma separated list of columns
+ * @param columns comma separated list of columns
* @return list with virtual columns removed
*/
private static List<String> getColumns(final String columns) {
@@ -64,27 +61,6 @@ public class DataWritableReadSupport ext
removeVirtualColumns(StringUtils.getStringCollection(columns));
}
- private static List<TypeInfo> getColumnTypes(Configuration configuration) {
-
- List<String> columnNames;
- String columnNamesProperty = configuration.get(IOConstants.COLUMNS);
- if (columnNamesProperty.length() == 0) {
- columnNames = new ArrayList<String>();
- } else {
- columnNames = Arrays.asList(columnNamesProperty.split(","));
- }
- List<TypeInfo> columnTypes;
- String columnTypesProperty = configuration.get(IOConstants.COLUMNS_TYPES);
- if (columnTypesProperty.length() == 0) {
- columnTypes = new ArrayList<TypeInfo>();
- } else {
- columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypesProperty);
- }
-
- columnTypes = VirtualColumn.removeVirtualColumnTypes(columnNames, columnTypes);
- return columnTypes;
- }
-
/**
*
* It creates the readContext for Parquet side with the requested schema during the init phase.
@@ -173,8 +149,7 @@ public class DataWritableReadSupport ext
}
final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser.
parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration);
- return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema,
- getColumnTypes(configuration));
+ return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema);
}
/**
@@ -194,4 +169,4 @@ public class DataWritableReadSupport ext
}
return requestedSchema;
}
-}
+}
\ No newline at end of file
Modified: hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q (original)
+++ hive/trunk/ql/src/test/queries/clientpositive/parquet_types.q Wed Sep 24 16:42:28 2014
@@ -10,9 +10,14 @@ CREATE TABLE parquet_types_staging (
cstring1 string,
t timestamp,
cchar char(5),
- cvarchar varchar(10)
+ cvarchar varchar(10),
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>
) ROW FORMAT DELIMITED
-FIELDS TERMINATED BY '|';
+FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':';
CREATE TABLE parquet_types (
cint int,
@@ -23,7 +28,10 @@ CREATE TABLE parquet_types (
cstring1 string,
t timestamp,
cchar char(5),
- cvarchar varchar(10)
+ cvarchar varchar(10),
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>
) STORED AS PARQUET;
LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging;
@@ -32,6 +40,8 @@ INSERT OVERWRITE TABLE parquet_types SEL
SELECT * FROM parquet_types;
+SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar) FROM parquet_types;
+
SELECT ctinyint,
MAX(cint),
MIN(csmallint),
Modified: hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out (original)
+++ hive/trunk/ql/src/test/results/clientpositive/parquet_types.q.out Wed Sep 24 16:42:28 2014
@@ -15,9 +15,14 @@ PREHOOK: query: CREATE TABLE parquet_typ
cstring1 string,
t timestamp,
cchar char(5),
- cvarchar varchar(10)
+ cvarchar varchar(10),
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>
) ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':'
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@parquet_types_staging
@@ -30,9 +35,14 @@ POSTHOOK: query: CREATE TABLE parquet_ty
cstring1 string,
t timestamp,
cchar char(5),
- cvarchar varchar(10)
+ cvarchar varchar(10),
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>
) ROW FORMAT DELIMITED
FIELDS TERMINATED BY '|'
+COLLECTION ITEMS TERMINATED BY ','
+MAP KEYS TERMINATED BY ':'
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@parquet_types_staging
@@ -45,7 +55,10 @@ PREHOOK: query: CREATE TABLE parquet_typ
cstring1 string,
t timestamp,
cchar char(5),
- cvarchar varchar(10)
+ cvarchar varchar(10),
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>
) STORED AS PARQUET
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
@@ -59,7 +72,10 @@ POSTHOOK: query: CREATE TABLE parquet_ty
cstring1 string,
t timestamp,
cchar char(5),
- cvarchar varchar(10)
+ cvarchar varchar(10),
+ m1 map<string, varchar(3)>,
+ l1 array<int>,
+ st1 struct<c1:int, c2:char(1)>
) STORED AS PARQUET
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
@@ -88,6 +104,9 @@ POSTHOOK: Lineage: parquet_types.csmalli
POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ]
POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ]
POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ]
+POSTHOOK: Lineage: parquet_types.l1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:l1, type:array<int>, comment:null), ]
+POSTHOOK: Lineage: parquet_types.m1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:m1, type:map<string,varchar(3)>, comment:null), ]
+POSTHOOK: Lineage: parquet_types.st1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:st1, type:struct<c1:int,c2:char(1)>, comment:null), ]
POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ]
PREHOOK: query: SELECT * FROM parquet_types
PREHOOK: type: QUERY
@@ -97,27 +116,56 @@ POSTHOOK: query: SELECT * FROM parquet_t
POSTHOOK: type: QUERY
POSTHOOK: Input: default@parquet_types
#### A masked pattern was here ####
-100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111111111 a a
-101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222222222 ab ab
-102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333333333 abc abc
-103 1 4 1.3 0.9 jkl 2014-04-04 04:04:04.444444444 abcd abcd
-104 2 5 1.4 1.2 mno 2015-05-05 05:05:05.555555555 abcde abcde
-105 3 1 1.0 1.5 pqr 2016-06-06 06:06:06.666666666 abcde abcdef
-106 1 2 1.1 1.8 stu 2017-07-07 07:07:07.777777777 abcde abcdefg
-107 2 3 1.2 2.1 vwx 2018-08-08 08:08:08.888888888 bcdef abcdefgh
-108 3 4 1.3 2.4 yza 2019-09-09 09:09:09.999999999 cdefg abcdefghij
-109 1 5 1.4 2.7 bcd 2020-10-10 10:10:10.101010101 klmno abcdedef
-110 2 1 1.0 3.0 efg 2021-11-11 11:11:11.111111111 pqrst abcdede
-111 3 2 1.1 3.3 hij 2022-12-12 12:12:12.121212121 nopqr abcded
-112 1 3 1.2 3.6 klm 2023-01-02 13:13:13.131313131 opqrs abcdd
-113 2 4 1.3 3.9 nop 2024-02-02 14:14:14.141414141 pqrst abc
-114 3 5 1.4 4.2 qrs 2025-03-03 15:15:15.151515151 qrstu b
-115 1 1 1.0 4.5 tuv 2026-04-04 16:16:16.161616161 rstuv abcded
-116 2 2 1.1 4.8 wxy 2027-05-05 17:17:17.171717171 stuvw abcded
-117 3 3 1.2 5.1 zab 2028-06-06 18:18:18.181818181 tuvwx abcded
-118 1 4 1.3 5.4 cde 2029-07-07 19:19:19.191919191 uvwzy abcdede
-119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede
-120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde
+100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111111111 a a {"k1":"v1"} [101,200] {"c1":10,"c2":"a"}
+101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222222222 ab ab {"k2":"v2"} [102,200] {"c1":10,"c2":"d"}
+102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333333333 abc abc {"k3":"v3"} [103,200] {"c1":10,"c2":"g"}
+103 1 4 1.3 0.9 jkl 2014-04-04 04:04:04.444444444 abcd abcd {"k4":"v4"} [104,200] {"c1":10,"c2":"j"}
+104 2 5 1.4 1.2 mno 2015-05-05 05:05:05.555555555 abcde abcde {"k5":"v5"} [105,200] {"c1":10,"c2":"m"}
+105 3 1 1.0 1.5 pqr 2016-06-06 06:06:06.666666666 abcde abcdef {"k6":"v6"} [106,200] {"c1":10,"c2":"p"}
+106 1 2 1.1 1.8 stu 2017-07-07 07:07:07.777777777 abcde abcdefg {"k7":"v7"} [107,200] {"c1":10,"c2":"s"}
+107 2 3 1.2 2.1 vwx 2018-08-08 08:08:08.888888888 bcdef abcdefgh {"k8":"v8"} [108,200] {"c1":10,"c2":"v"}
+108 3 4 1.3 2.4 yza 2019-09-09 09:09:09.999999999 cdefg abcdefghij {"k9":"v9"} [109,200] {"c1":10,"c2":"y"}
+109 1 5 1.4 2.7 bcd 2020-10-10 10:10:10.101010101 klmno abcdedef {"k10":"v10"} [110,200] {"c1":10,"c2":"b"}
+110 2 1 1.0 3.0 efg 2021-11-11 11:11:11.111111111 pqrst abcdede {"k11":"v11"} [111,200] {"c1":10,"c2":"e"}
+111 3 2 1.1 3.3 hij 2022-12-12 12:12:12.121212121 nopqr abcded {"k12":"v12"} [112,200] {"c1":10,"c2":"h"}
+112 1 3 1.2 3.6 klm 2023-01-02 13:13:13.131313131 opqrs abcdd {"k13":"v13"} [113,200] {"c1":10,"c2":"k"}
+113 2 4 1.3 3.9 nop 2024-02-02 14:14:14.141414141 pqrst abc {"k14":"v14"} [114,200] {"c1":10,"c2":"n"}
+114 3 5 1.4 4.2 qrs 2025-03-03 15:15:15.151515151 qrstu b {"k15":"v15"} [115,200] {"c1":10,"c2":"q"}
+115 1 1 1.0 4.5 qrs 2026-04-04 16:16:16.161616161 rstuv abcded {"k16":"v16"} [116,200] {"c1":10,"c2":"q"}
+116 2 2 1.1 4.8 wxy 2027-05-05 17:17:17.171717171 stuvw abcded {"k17":"v17"} [117,200] {"c1":10,"c2":"w"}
+117 3 3 1.2 5.1 zab 2028-06-06 18:18:18.181818181 tuvwx abcded {"k18":"v18"} [118,200] {"c1":10,"c2":"z"}
+118 1 4 1.3 5.4 cde 2029-07-07 19:19:19.191919191 uvwzy abcdede {"k19":"v19"} [119,200] {"c1":10,"c2":"c"}
+119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede {"k20":"v20"} [120,200] {"c1":10,"c2":"f"}
+120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde {"k21":"v21"} [121,200] {"c1":10,"c2":"i"}
+PREHOOK: query: SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar) FROM parquet_types
+PREHOOK: type: QUERY
+PREHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar) FROM parquet_types
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@parquet_types
+#### A masked pattern was here ####
+a 1 a 3
+ab 2 ab 3
+abc 3 abc 3
+abcd 4 abcd 4
+abcde 5 abcde 5
+abcde 5 abcdef 6
+abcde 5 abcdefg 7
+bcdef 5 abcdefgh 8
+cdefg 5 abcdefghij 10
+klmno 5 abcdedef 8
+pqrst 5 abcdede 7
+nopqr 5 abcded 6
+opqrs 5 abcdd 5
+pqrst 5 abc 3
+qrstu 5 b 1
+rstuv 5 abcded 6
+stuvw 5 abcded 6
+tuvwx 5 abcded 6
+uvwzy 5 abcdede 7
+vwxyz 5 abcdede 7
+wxyza 5 abcde 5
PREHOOK: query: SELECT ctinyint,
MAX(cint),
MIN(csmallint),
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveCharObjectInspector.java Wed Sep 24 16:42:28 2014
@@ -21,6 +21,11 @@ import org.apache.hadoop.hive.common.typ
import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharUtils;
import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
+import org.apache.hadoop.io.Text;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
public class WritableHiveCharObjectInspector extends AbstractPrimitiveWritableObjectInspector
implements SettableHiveCharObjectInspector {
@@ -39,6 +44,12 @@ public class WritableHiveCharObjectInspe
if (o == null) {
return null;
}
+
+ if (o instanceof Text) {
+ String str = ((Text)o).toString();
+ return new HiveChar(str, ((CharTypeInfo)typeInfo).getLength());
+ }
+
HiveCharWritable writable = ((HiveCharWritable) o);
if (doesWritableMatchTypeParams(writable)) {
return writable.getHiveChar();
@@ -53,6 +64,14 @@ public class WritableHiveCharObjectInspe
if (o == null) {
return null;
}
+
+ if (o instanceof Text) {
+ String str = ((Text)o).toString();
+ HiveCharWritable hcw = new HiveCharWritable();
+ hcw.set(str, ((CharTypeInfo)typeInfo).getLength());
+ return hcw;
+ }
+
HiveCharWritable writable = ((HiveCharWritable) o);
if (doesWritableMatchTypeParams((HiveCharWritable) o)) {
return writable;
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java?rev=1627365&r1=1627364&r2=1627365&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/WritableHiveVarcharObjectInspector.java Wed Sep 24 16:42:28 2014
@@ -19,10 +19,15 @@ package org.apache.hadoop.hive.serde2.ob
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveVarchar;
+import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
import org.apache.hadoop.hive.serde2.io.HiveVarcharWritable;
+import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.BaseCharUtils;
+import org.apache.hadoop.io.Text;
+import org.apache.hive.common.util.HiveStringUtils;
public class WritableHiveVarcharObjectInspector extends AbstractPrimitiveWritableObjectInspector
implements SettableHiveVarcharObjectInspector {
@@ -43,6 +48,12 @@ implements SettableHiveVarcharObjectInsp
if (o == null) {
return null;
}
+
+ if (o instanceof Text) {
+ String str = ((Text)o).toString();
+ return new HiveVarchar(str, ((VarcharTypeInfo)typeInfo).getLength());
+ }
+
HiveVarcharWritable writable = ((HiveVarcharWritable)o);
if (doesWritableMatchTypeParams(writable)) {
return writable.getHiveVarchar();
@@ -57,6 +68,14 @@ implements SettableHiveVarcharObjectInsp
if (o == null) {
return null;
}
+
+ if (o instanceof Text) {
+ String str = ((Text)o).toString();
+ HiveVarcharWritable hcw = new HiveVarcharWritable();
+ hcw.set(str, ((VarcharTypeInfo)typeInfo).getLength());
+ return hcw;
+ }
+
HiveVarcharWritable writable = ((HiveVarcharWritable)o);
if (doesWritableMatchTypeParams((HiveVarcharWritable)o)) {
return writable;