You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/10/29 17:05:29 UTC
svn commit: r1635187 - in /hive/branches/branch-0.14: ./
ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
Author: brock
Date: Wed Oct 29 16:05:28 2014
New Revision: 1635187
URL: http://svn.apache.org/r1635187
Log:
Merge HIVE-7800 - Parquet Column Index Access Schema Size Checking from trunk
Modified:
hive/branches/branch-0.14/ (props changed)
hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
Propchange: hive/branches/branch-0.14/
------------------------------------------------------------------------------
Merged /hive/trunk:r1629752
Modified: hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java?rev=1635187&r1=1635186&r2=1635187&view=diff
==============================================================================
--- hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java (original)
+++ hive/branches/branch-0.14/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java Wed Oct 29 16:05:28 2014
@@ -75,6 +75,7 @@ public class DataWritableReadSupport ext
final Map<String, String> keyValueMetaData, final MessageType fileSchema) {
final String columns = configuration.get(IOConstants.COLUMNS);
final Map<String, String> contextMetadata = new HashMap<String, String>();
+ final boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false);
if (columns != null) {
final List<String> listColumns = getColumns(columns);
final Map<String, String> lowerCaseFileSchemaColumns = new HashMap<String,String>();
@@ -82,45 +83,50 @@ public class DataWritableReadSupport ext
lowerCaseFileSchemaColumns.put(c.getPath()[0].toLowerCase(), c.getPath()[0]);
}
final List<Type> typeListTable = new ArrayList<Type>();
- for (String col : listColumns) {
- col = col.toLowerCase();
- // listColumns contains partition columns which are metadata only
- if (lowerCaseFileSchemaColumns.containsKey(col)) {
- typeListTable.add(fileSchema.getType(lowerCaseFileSchemaColumns.get(col)));
- } else {
- // below allows schema evolution
- typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, col));
+ if(indexAccess) {
+ for (int index = 0; index < listColumns.size(); index++) {
+ //Take columns based on index or pad the field
+ if(index < fileSchema.getFieldCount()) {
+ typeListTable.add(fileSchema.getType(index));
+ } else {
+ //prefixing with '_mask_' to ensure no conflict with named
+ //columns in the file schema
+ typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, "_mask_"+listColumns.get(index)));
+ }
+ }
+ } else {
+ for (String col : listColumns) {
+ col = col.toLowerCase();
+ // listColumns contains partition columns which are metadata only
+ if (lowerCaseFileSchemaColumns.containsKey(col)) {
+ typeListTable.add(fileSchema.getType(lowerCaseFileSchemaColumns.get(col)));
+ } else {
+ // below allows schema evolution
+ typeListTable.add(new PrimitiveType(Repetition.OPTIONAL, PrimitiveTypeName.BINARY, col));
+ }
}
}
MessageType tableSchema = new MessageType(TABLE_SCHEMA, typeListTable);
contextMetadata.put(HIVE_SCHEMA_KEY, tableSchema.toString());
- MessageType requestedSchemaByUser = tableSchema;
final List<Integer> indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration);
final List<Type> typeListWanted = new ArrayList<Type>();
- final boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false);
+
for (final Integer idx : indexColumnsWanted) {
if (idx < listColumns.size()) {
String col = listColumns.get(idx);
if (indexAccess) {
- typeListWanted.add(tableSchema.getType(col));
+ typeListWanted.add(fileSchema.getFields().get(idx));
} else {
col = col.toLowerCase();
if (lowerCaseFileSchemaColumns.containsKey(col)) {
typeListWanted.add(tableSchema.getType(lowerCaseFileSchemaColumns.get(col)));
- } else {
- // should never occur?
- String msg = "Column " + col + " at index " + idx + " does not exist in " +
- lowerCaseFileSchemaColumns;
- throw new IllegalStateException(msg);
}
}
}
}
- requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(),
- typeListWanted), fileSchema, configuration);
-
+ MessageType requestedSchemaByUser = new MessageType(fileSchema.getName(), typeListWanted);
return new ReadContext(requestedSchemaByUser, contextMetadata);
} else {
contextMetadata.put(HIVE_SCHEMA_KEY, fileSchema.toString());
@@ -147,26 +153,7 @@ public class DataWritableReadSupport ext
throw new IllegalStateException("ReadContext not initialized properly. " +
"Don't know the Hive Schema.");
}
- final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser.
- parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration);
+ final MessageType tableSchema = MessageTypeParser.parseMessageType(metadata.get(HIVE_SCHEMA_KEY));
return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema);
}
-
- /**
- * Determine the file column names based on the position within the requested columns and
- * use that as the requested schema.
- */
- private MessageType resolveSchemaAccess(MessageType requestedSchema, MessageType fileSchema,
- Configuration configuration) {
- if (configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false)) {
- final List<String> listColumns = getColumns(configuration.get(IOConstants.COLUMNS));
- List<Type> requestedTypes = new ArrayList<Type>();
- for(Type t : requestedSchema.getFields()) {
- int index = listColumns.indexOf(t.getName());
- requestedTypes.add(fileSchema.getType(index));
- }
- requestedSchema = new MessageType(requestedSchema.getName(), requestedTypes);
- }
- return requestedSchema;
- }
-}
\ No newline at end of file
+}