You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by ka...@apache.org on 2017/01/11 22:18:49 UTC
incubator-hawq git commit: HAWQ-1215. Support Complextypes with
HiveORC
Repository: incubator-hawq
Updated Branches:
refs/heads/master aa5792d85 -> 3b15739a0
HAWQ-1215. Support Complextypes with HiveORC
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/3b15739a
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/3b15739a
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/3b15739a
Branch: refs/heads/master
Commit: 3b15739a009601049f7343131abe889b204b4d62
Parents: aa5792d
Author: Kavinder Dhaliwal <ka...@gmail.com>
Authored: Fri Dec 23 16:27:58 2016 -0800
Committer: Kavinder Dhaliwal <ka...@gmail.com>
Committed: Wed Jan 11 14:14:36 2017 -0800
----------------------------------------------------------------------
.../plugins/hive/HiveInputFormatFragmenter.java | 5 +--
.../pxf/plugins/hive/HiveORCSerdeResolver.java | 32 ++++++++++++++++++++
.../plugins/hive/utilities/HiveUtilities.java | 3 +-
3 files changed, 37 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/3b15739a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
index 051a246..ca4501b 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
@@ -26,7 +26,6 @@ import org.apache.hawq.pxf.api.UserDataException;
import org.apache.hawq.pxf.api.io.DataType;
import org.apache.hawq.pxf.api.utilities.ColumnDescriptor;
import org.apache.hawq.pxf.api.utilities.InputData;
-import org.apache.hawq.pxf.plugins.hive.utilities.EnumHiveToHawqType;
import org.apache.hawq.pxf.plugins.hive.utilities.HiveUtilities;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -35,6 +34,7 @@ import org.apache.hadoop.hive.metastore.api.Table;
import java.util.Arrays;
import java.util.List;
+import java.util.Properties;
/**
* Specialized Hive fragmenter for RC and Text files tables. Unlike the
@@ -55,10 +55,11 @@ import java.util.List;
*/
public class HiveInputFormatFragmenter extends HiveDataFragmenter {
private static final Log LOG = LogFactory.getLog(HiveInputFormatFragmenter.class);
- private static final int EXPECTED_NUM_OF_TOKS = 3;
+ private static final int EXPECTED_NUM_OF_TOKS = 4;
public static final int TOK_SERDE = 0;
public static final int TOK_KEYS = 1;
public static final int TOK_FILTER_DONE = 2;
+ public static final int TOK_COL_TYPES = 3;
/** Defines the Hive input formats currently supported in pxf */
public enum PXF_HIVE_INPUT_FORMATS {
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/3b15739a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCSerdeResolver.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCSerdeResolver.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCSerdeResolver.java
index 7673713..93aa474 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCSerdeResolver.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveORCSerdeResolver.java
@@ -45,6 +45,7 @@ public class HiveORCSerdeResolver extends HiveResolver {
private static final Log LOG = LogFactory.getLog(HiveORCSerdeResolver.class);
private OrcSerde deserializer;
private HiveInputFormatFragmenter.PXF_HIVE_SERDES serdeType;
+ private String typesString;
public HiveORCSerdeResolver(InputData input) throws Exception {
super(input);
@@ -61,6 +62,7 @@ public class HiveORCSerdeResolver extends HiveResolver {
throw new UnsupportedTypeException("Unsupported Hive Serde: " + serdeEnumStr);
}
partitionKeys = toks[HiveInputFormatFragmenter.TOK_KEYS];
+ typesString = toks[HiveInputFormatFragmenter.TOK_COL_TYPES];
collectionDelim = input.getUserProperty("COLLECTION_DELIM") == null ? COLLECTION_DELIM
: input.getUserProperty("COLLECTION_DELIM");
mapkeyDelim = input.getUserProperty("MAPKEY_DELIM") == null ? MAPKEY_DELIM
@@ -102,11 +104,19 @@ public class HiveORCSerdeResolver extends HiveResolver {
StringBuilder columnNames = new StringBuilder(numberOfDataColumns * 2); // column + delimiter
StringBuilder columnTypes = new StringBuilder(numberOfDataColumns * 2); // column + delimiter
+ String[] cols = typesString.split(":");
+ String[] hiveColTypes = new String[numberOfDataColumns];
+ parseColTypes(cols, hiveColTypes);
+
String delim = ",";
for (int i = 0; i < numberOfDataColumns; i++) {
ColumnDescriptor column = input.getColumn(i);
String columnName = column.columnName();
String columnType = HiveUtilities.toCompatibleHiveType(DataType.get(column.columnTypeCode()), column.columnTypeModifiers());
+ //Complex Types will have a mismatch between Hive and Hawq type
+ if (!columnType.equals(hiveColTypes[i])) {
+ columnType = hiveColTypes[i];
+ }
if(i > 0) {
columnNames.append(delim);
columnTypes.append(delim);
@@ -125,4 +135,26 @@ public class HiveORCSerdeResolver extends HiveResolver {
deserializer.initialize(new JobConf(new Configuration(), HiveORCSerdeResolver.class), serdeProperties);
}
+
+ private void parseColTypes(String[] cols, String[] output) {
+ int i = 0;
+ StringBuilder structTypeBuilder = new StringBuilder();
+ boolean inStruct = false;
+ for (String str : cols) {
+ if (str.contains("struct")) {
+ structTypeBuilder = new StringBuilder();
+ inStruct = true;
+ structTypeBuilder.append(str);
+ } else if (inStruct) {
+ structTypeBuilder.append(':');
+ structTypeBuilder.append(str);
+ if (str.contains(">")) {
+ inStruct = false;
+ output[i++] = structTypeBuilder.toString();
+ }
+ } else {
+ output[i++] = str;
+ }
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/3b15739a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
index ffd66b8..f7ebf4d 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
@@ -444,9 +444,10 @@ public class HiveUtilities {
String inputFormatName = partData.storageDesc.getInputFormat();
String serdeName = partData.storageDesc.getSerdeInfo().getSerializationLib();
String partitionKeys = serializePartitionKeys(partData);
+ String colTypes = partData.properties.getProperty("columns.types");
assertFileType(inputFormatName, partData);
userData = assertSerde(serdeName, partData) + HiveDataFragmenter.HIVE_UD_DELIM
- + partitionKeys + HiveDataFragmenter.HIVE_UD_DELIM + filterInFragmenter;
+ + partitionKeys + HiveDataFragmenter.HIVE_UD_DELIM + filterInFragmenter + HiveDataFragmenter.HIVE_UD_DELIM + colTypes;
} else if (HiveDataFragmenter.class.isAssignableFrom(fragmenterClass)){
String inputFormatName = partData.storageDesc.getInputFormat();
String serdeName = partData.storageDesc.getSerdeInfo().getSerializationLib();