You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/06/14 00:22:41 UTC
svn commit: r1602534 - in /hive/trunk/ql/src/java/org/apache/hadoop/hive/ql:
exec/DDLTask.java io/parquet/read/DataWritableReadSupport.java
Author: brock
Date: Fri Jun 13 22:22:41 2014
New Revision: 1602534
URL: http://svn.apache.org/r1602534
Log:
HIVE-6938 - Add Support for Parquet Column Rename (Daniel Weeks via Brock)
Modified:
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1602534&r1=1602533&r2=1602534&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Fri Jun 13 22:22:41 2014
@@ -92,6 +92,7 @@ import org.apache.hadoop.hive.ql.QueryPl
import org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
import org.apache.hadoop.hive.ql.io.rcfile.merge.BlockMergeTask;
import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork;
import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateTask;
@@ -3700,7 +3701,8 @@ public class DDLTask extends Task<DDLWor
MetadataTypedColumnsetSerDe.class.getName())
&& !tbl.getSerializationLib().equals(LazySimpleSerDe.class.getName())
&& !tbl.getSerializationLib().equals(ColumnarSerDe.class.getName())
- && !tbl.getSerializationLib().equals(DynamicSerDe.class.getName())) {
+ && !tbl.getSerializationLib().equals(DynamicSerDe.class.getName())
+ && !tbl.getSerializationLib().equals(ParquetHiveSerDe.class.getName())) {
throw new HiveException(ErrorMsg.CANNOT_REPLACE_COLUMNS, alterTbl.getOldName());
}
tbl.getTTable().getSd().setCols(alterTbl.getNewCols());
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java?rev=1602534&r1=1602533&r2=1602534&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java Fri Jun 13 22:22:41 2014
@@ -46,7 +46,8 @@ public class DataWritableReadSupport ext
private static final String TABLE_SCHEMA = "table_schema";
public static final String HIVE_SCHEMA_KEY = "HIVE_TABLE_SCHEMA";
-
+ public static final String PARQUET_COLUMN_INDEX_ACCESS = "parquet.column.index.access";
+
/**
* From a string which columns names (including hive column), return a list
* of string columns
@@ -95,7 +96,8 @@ public class DataWritableReadSupport ext
for (final Integer idx : indexColumnsWanted) {
typeListWanted.add(tableSchema.getType(listColumns.get(idx)));
}
- requestedSchemaByUser = new MessageType(fileSchema.getName(), typeListWanted);
+ requestedSchemaByUser = resolveSchemaAccess(new MessageType(fileSchema.getName(),
+ typeListWanted), fileSchema, configuration);
return new ReadContext(requestedSchemaByUser, contextMetadata);
} else {
@@ -123,8 +125,31 @@ public class DataWritableReadSupport ext
throw new IllegalStateException("ReadContext not initialized properly. " +
"Don't know the Hive Schema.");
}
- final MessageType tableSchema = MessageTypeParser.
- parseMessageType(metadata.get(HIVE_SCHEMA_KEY));
+ final MessageType tableSchema = resolveSchemaAccess(MessageTypeParser.
+ parseMessageType(metadata.get(HIVE_SCHEMA_KEY)), fileSchema, configuration);
+
return new DataWritableRecordConverter(readContext.getRequestedSchema(), tableSchema);
}
+
+ /**
+ * Determine the file column names based on the position within the requested columns and
+ * use that as the requested schema.
+ */
+ private MessageType resolveSchemaAccess(MessageType requestedSchema, MessageType fileSchema,
+ Configuration configuration) {
+ if(configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false)) {
+ final List<String> listColumns = getColumns(configuration.get(IOConstants.COLUMNS));
+
+ List<Type> requestedTypes = new ArrayList<Type>();
+
+ for(Type t : requestedSchema.getFields()) {
+ int index = listColumns.indexOf(t.getName());
+ requestedTypes.add(fileSchema.getType(index));
+ }
+
+ requestedSchema = new MessageType(requestedSchema.getName(), requestedTypes);
+ }
+
+ return requestedSchema;
+ }
}