You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hawq.apache.org by od...@apache.org on 2016/08/24 23:55:07 UTC
incubator-hawq git commit: HAWQ-992. PXF Hive data type check in
Fragmenter too restrictive.
Repository: incubator-hawq
Updated Branches:
refs/heads/HAWQ-992 [created] 24f5e363d
HAWQ-992. PXF Hive data type check in Fragmenter too restrictive.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/24f5e363
Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/24f5e363
Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/24f5e363
Branch: refs/heads/HAWQ-992
Commit: 24f5e363d48f13108cb86750a5a5f2e76844a2f3
Parents: c2280de
Author: Oleksandr Diachenko <od...@pivotal.io>
Authored: Wed Aug 24 16:54:55 2016 -0700
Committer: Oleksandr Diachenko <od...@pivotal.io>
Committed: Wed Aug 24 16:54:55 2016 -0700
----------------------------------------------------------------------
.../pxf/api/utilities/ColumnDescriptor.java | 15 ++++-
.../hawq/pxf/api/utilities/EnumHawqType.java | 49 ++++++++------
.../plugins/hive/HiveColumnarSerdeResolver.java | 3 +-
.../plugins/hive/HiveInputFormatFragmenter.java | 70 ++------------------
.../hive/utilities/EnumHiveToHawqType.java | 12 ++++
.../plugins/hive/utilities/HiveUtilities.java | 43 ++++++++++++
.../pxf/service/utilities/ProtocolData.java | 16 ++++-
7 files changed, 116 insertions(+), 92 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java
index baaca1d..ff85672 100644
--- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java
+++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/ColumnDescriptor.java
@@ -30,6 +30,7 @@ public class ColumnDescriptor {
String gpdbColumnName;
String gpdbColumnTypeName;
int gpdbColumnIndex;
+ String[] gpdbColumnTypeModifiers;
/**
* Reserved word for a table record key.
@@ -44,12 +45,14 @@ public class ColumnDescriptor {
* @param typecode OID
* @param index column index
* @param typename type name
+ * @param typemods type modifiers
*/
- public ColumnDescriptor(String name, int typecode, int index, String typename) {
+ public ColumnDescriptor(String name, int typecode, int index, String typename, String[] typemods) {
gpdbColumnTypeCode = typecode;
gpdbColumnTypeName = typename;
gpdbColumnName = name;
gpdbColumnIndex = index;
+ gpdbColumnTypeModifiers = typemods;
}
/**
@@ -62,6 +65,9 @@ public class ColumnDescriptor {
this.gpdbColumnName = copy.gpdbColumnName;
this.gpdbColumnIndex = copy.gpdbColumnIndex;
this.gpdbColumnTypeName = copy.gpdbColumnTypeName;
+ System.arraycopy(this.gpdbColumnTypeModifiers, 0,
+ copy.gpdbColumnTypeModifiers, 0,
+ this.gpdbColumnTypeModifiers.length);
}
public String columnName() {
@@ -80,6 +86,10 @@ public class ColumnDescriptor {
return gpdbColumnTypeName;
}
+ public String[] columnTypeModifiers() {
+ return gpdbColumnTypeModifiers;
+ }
+
/**
* Returns <tt>true</tt> if {@link #gpdbColumnName} is a {@link #RECORD_KEY_NAME}.
*
@@ -94,6 +104,7 @@ public class ColumnDescriptor {
return "ColumnDescriptor [gpdbColumnTypeCode=" + gpdbColumnTypeCode
+ ", gpdbColumnName=" + gpdbColumnName
+ ", gpdbColumnTypeName=" + gpdbColumnTypeName
- + ", gpdbColumnIndex=" + gpdbColumnIndex + "]";
+ + ", gpdbColumnIndex=" + gpdbColumnIndex
+ + ", gpdbColumnTypeModifiers=" + gpdbColumnTypeModifiers + "]";
}
}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java
index b5a94c6..01d40f0 100644
--- a/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java
+++ b/pxf/pxf-api/src/main/java/org/apache/hawq/pxf/api/utilities/EnumHawqType.java
@@ -20,6 +20,8 @@
package org.apache.hawq.pxf.api.utilities;
import java.io.IOException;
+
+import org.apache.hawq.pxf.api.io.DataType;
import org.codehaus.jackson.JsonGenerator;
import org.codehaus.jackson.map.JsonSerializer;
import org.codehaus.jackson.map.annotate.JsonSerialize;
@@ -43,35 +45,32 @@ class EnumHawqTypeSerializer extends JsonSerializer<EnumHawqType> {
*/
@JsonSerialize(using = EnumHawqTypeSerializer.class)
public enum EnumHawqType {
- Int2Type("int2"),
- Int4Type("int4"),
- Int8Type("int8"),
- Float4Type("float4"),
- Float8Type("float8"),
- TextType("text"),
- VarcharType("varchar", (byte) 1, true),
- ByteaType("bytea"),
- DateType("date"),
- TimestampType("timestamp"),
- BoolType("bool"),
- NumericType("numeric", (byte) 2, true),
- BpcharType("bpchar", (byte) 1, true);
+ Int2Type("int2", DataType.SMALLINT),
+ Int4Type("int4", DataType.INTEGER),
+ Int8Type("int8", DataType.BIGINT),
+ Float4Type("float4", DataType.REAL),
+ Float8Type("float8", DataType.FLOAT8),
+ TextType("text", DataType.TEXT),
+ VarcharType("varchar", DataType.VARCHAR, (byte) 1, true),
+ ByteaType("bytea", DataType.BYTEA),
+ DateType("date", DataType.DATE),
+ TimestampType("timestamp", DataType.TIMESTAMP),
+ BoolType("bool", DataType.BOOLEAN),
+ NumericType("numeric", DataType.NUMERIC, (byte) 2, true),
+ BpcharType("bpchar", DataType.BPCHAR, (byte) 1, true);
+ private DataType dataType;
private String typeName;
private byte modifiersNum;
private boolean validateIntegerModifiers;
- EnumHawqType(String typeName) {
+ EnumHawqType(String typeName, DataType dataType) {
this.typeName = typeName;
+ this.dataType = dataType;
}
- EnumHawqType(String typeName, byte modifiersNum) {
- this(typeName);
- this.modifiersNum = modifiersNum;
- }
-
- EnumHawqType(String typeName, byte modifiersNum, boolean validateIntegerModifiers) {
- this(typeName);
+ EnumHawqType(String typeName, DataType dataType, byte modifiersNum, boolean validateIntegerModifiers) {
+ this(typeName, dataType);
this.modifiersNum = modifiersNum;
this.validateIntegerModifiers = validateIntegerModifiers;
}
@@ -99,6 +98,14 @@ public enum EnumHawqType {
public boolean getValidateIntegerModifiers() {
return this.validateIntegerModifiers;
}
+
+ /**
+ *
+ * @return data type
+ */
+ public DataType getDataType() {
+ return this.dataType;
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java
index d298bac..43e3b65 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveColumnarSerdeResolver.java
@@ -28,6 +28,7 @@ import org.apache.hawq.pxf.api.io.DataType;
import org.apache.hawq.pxf.api.utilities.ColumnDescriptor;
import org.apache.hawq.pxf.api.utilities.InputData;
import org.apache.hawq.pxf.api.utilities.Utilities;
+import org.apache.hawq.pxf.plugins.hive.utilities.HiveUtilities;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
@@ -126,7 +127,7 @@ public class HiveColumnarSerdeResolver extends HiveResolver {
for (int i = 0; i < numberOfDataColumns; i++) {
ColumnDescriptor column = input.getColumn(i);
String columnName = column.columnName();
- String columnType = HiveInputFormatFragmenter.toHiveType(DataType.get(column.columnTypeCode()), columnName);
+ String columnType = HiveUtilities.toHiveType(DataType.get(column.columnTypeCode()));
columnNames.append(delim).append(columnName);
columnTypes.append(delim).append(columnType);
delim = ",";
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
index a666b8b..b944206 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/HiveInputFormatFragmenter.java
@@ -26,6 +26,8 @@ import org.apache.hawq.pxf.api.UserDataException;
import org.apache.hawq.pxf.api.io.DataType;
import org.apache.hawq.pxf.api.utilities.ColumnDescriptor;
import org.apache.hawq.pxf.api.utilities.InputData;
+import org.apache.hawq.pxf.plugins.hive.utilities.EnumHiveToHawqType;
+import org.apache.hawq.pxf.plugins.hive.utilities.HiveUtilities;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -146,82 +148,18 @@ public class HiveInputFormatFragmenter extends HiveDataFragmenter {
for (FieldSchema hiveCol : hiveColumns) {
ColumnDescriptor colDesc = inputData.getColumn(index++);
DataType colType = DataType.get(colDesc.columnTypeCode());
- compareTypes(colType, hiveCol.getType(), colDesc.columnName());
+ HiveUtilities.compareTypes(colType, hiveCol.getType(), colDesc.columnName());
}
// check partition fields
List<FieldSchema> hivePartitions = tbl.getPartitionKeys();
for (FieldSchema hivePart : hivePartitions) {
ColumnDescriptor colDesc = inputData.getColumn(index++);
DataType colType = DataType.get(colDesc.columnTypeCode());
- compareTypes(colType, hivePart.getType(), colDesc.columnName());
+ HiveUtilities.compareTypes(colType, hivePart.getType(), colDesc.columnName());
}
}
- private void compareTypes(DataType type, String hiveType, String fieldName) {
- String convertedHive = toHiveType(type, fieldName);
- if (!convertedHive.equals(hiveType)
- && !(convertedHive.equals("smallint") && hiveType.equals("tinyint"))) {
- throw new UnsupportedTypeException(
- "Schema mismatch definition: Field " + fieldName
- + " (Hive type " + hiveType + ", HAWQ type "
- + type.toString() + ")");
- }
- if (LOG.isDebugEnabled()) {
- LOG.debug("Field " + fieldName + ": Hive type " + hiveType
- + ", HAWQ type " + type.toString());
- }
- }
-
- /**
- * Converts HAWQ type to hive type. The supported mappings are:<ul>
- * <li>{@code BOOLEAN -> boolean}</li>
- * <li>{@code SMALLINT -> smallint (tinyint is converted to smallint)}</li>
- * <li>{@code BIGINT -> bigint}</li>
- * <li>{@code TIMESTAMP, TIME -> timestamp}</li>
- * <li>{@code NUMERIC -> decimal}</li>
- * <li>{@code BYTEA -> binary}</li>
- * <li>{@code INTERGER -> int}</li>
- * <li>{@code TEXT -> string}</li>
- * <li>{@code REAL -> float}</li>
- * <li>{@code FLOAT8 -> double}</li>
- * </ul>
- * All other types (both in HAWQ and in HIVE) are not supported.
- *
- * @param type HAWQ data type
- * @param name field name
- * @return Hive type
- * @throws UnsupportedTypeException if type is not supported
- */
- public static String toHiveType(DataType type, String name) {
- switch (type) {
- case BOOLEAN:
- case SMALLINT:
- case BIGINT:
- case TIMESTAMP:
- return type.toString().toLowerCase();
- case NUMERIC:
- return "decimal";
- case BYTEA:
- return "binary";
- case INTEGER:
- return "int";
- case TEXT:
- return "string";
- case REAL:
- return "float";
- case FLOAT8:
- return "double";
- case TIME:
- return "timestamp";
- default:
- throw new UnsupportedTypeException(
- type.toString()
- + " conversion is not supported by HiveInputFormatFragmenter (Field "
- + name + ")");
- }
- }
-
/*
* Validates that partition format corresponds to PXF supported formats and
* transforms the class name to an enumeration for writing it to the
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java
index a747bd5..1cedaa8 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/EnumHiveToHawqType.java
@@ -19,6 +19,7 @@
package org.apache.hawq.pxf.plugins.hive.utilities;
+import org.apache.hawq.pxf.api.io.DataType;
import org.apache.hawq.pxf.api.utilities.EnumHawqType;
import org.apache.hawq.pxf.api.UnsupportedTypeException;
@@ -110,4 +111,15 @@ public enum EnumHiveToHawqType {
+ hiveType + " to HAWQ's type");
}
+ public static EnumHiveToHawqType getHawqToHiveType(DataType dataType) {
+
+ for (EnumHiveToHawqType t : values()) {
+
+ if (t.getHawqType().getDataType().equals(dataType)) {
+ return t;
+ }
+ }
+ throw new UnsupportedTypeException("Unable to map HAWQ's type: "
+ + dataType + " to Hive's type");
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
index 096c0ff..579ab0b 100644
--- a/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
+++ b/pxf/pxf-hive/src/main/java/org/apache/hawq/pxf/plugins/hive/utilities/HiveUtilities.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.hive.metastore.api.Table;
import org.apache.hawq.pxf.api.Metadata;
import org.apache.hawq.pxf.api.UnsupportedTypeException;
import org.apache.hawq.pxf.api.utilities.EnumHawqType;
+import org.apache.hawq.pxf.api.io.DataType;
import org.apache.hawq.pxf.plugins.hive.utilities.EnumHiveToHawqType;
/**
@@ -256,4 +257,46 @@ public class HiveUtilities {
throw new RuntimeException("Failed connecting to Hive MetaStore service: " + cause.getMessage(), cause);
}
}
+
+
+ /**
+ * Converts HAWQ type to hive type. The supported mappings are:<ul>
+ * <li>{@code BOOLEAN -> boolean}</li>
+ * <li>{@code SMALLINT -> smallint (tinyint is converted to smallint)}</li>
+ * <li>{@code BIGINT -> bigint}</li>
+ * <li>{@code TIMESTAMP, TIME -> timestamp}</li>
+ * <li>{@code NUMERIC -> decimal}</li>
+ * <li>{@code BYTEA -> binary}</li>
+ * <li>{@code INTERGER -> int}</li>
+ * <li>{@code TEXT -> string}</li>
+ * <li>{@code REAL -> float}</li>
+ * <li>{@code FLOAT8 -> double}</li>
+ * </ul>
+ * All other types (both in HAWQ and in HIVE) are not supported.
+ *
+ * @param type HAWQ data type
+ * @param name field name
+ * @return Hive type
+ * @throws UnsupportedTypeException if type is not supported
+ */
+ public static String toHiveType(DataType type) {
+
+ EnumHiveToHawqType hiveToHawqType = EnumHiveToHawqType.getHawqToHiveType(type);
+ return hiveToHawqType.getTypeName();
+ }
+
+ public static void compareTypes(DataType type, String hiveType, String columnName) {
+ String convertedHive = toHiveType(type);
+ if (!convertedHive.equals(hiveType)
+ && !(convertedHive.equals("smallint") && hiveType.equals("tinyint"))) {
+ throw new UnsupportedTypeException(
+ "Schema mismatch definition:"
+ + " (Hive type " + hiveType + ", HAWQ type "
+ + type.toString() + ")");
+ }
+ if (LOG.isDebugEnabled()) {
+ LOG.debug(" Hive type " + hiveType
+ + ", HAWQ type " + type.toString());
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/24f5e363/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java
----------------------------------------------------------------------
diff --git a/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java b/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java
index 0337937..5e6f6c4 100644
--- a/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java
+++ b/pxf/pxf-service/src/main/java/org/apache/hawq/pxf/service/utilities/ProtocolData.java
@@ -29,7 +29,6 @@ import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.security.UserGroupInformation;
-
import org.apache.hawq.pxf.api.OutputFormat;
import org.apache.hawq.pxf.api.utilities.ColumnDescriptor;
import org.apache.hawq.pxf.api.utilities.InputData;
@@ -390,9 +389,10 @@ public class ProtocolData extends InputData {
String columnName = getProperty("ATTR-NAME" + i);
int columnTypeCode = getIntProperty("ATTR-TYPECODE" + i);
String columnTypeName = getProperty("ATTR-TYPENAME" + i);
+ String[] columnTypeMods = parseTypeMods(i);
ColumnDescriptor column = new ColumnDescriptor(columnName,
- columnTypeCode, i, columnTypeName);
+ columnTypeCode, i, columnTypeName, columnTypeMods);
tupleDescription.add(column);
if (columnName.equalsIgnoreCase(ColumnDescriptor.RECORD_KEY_NAME)) {
@@ -401,6 +401,18 @@ public class ProtocolData extends InputData {
}
}
+ private String[] parseTypeMods(int columnIndex) {
+ Integer typeModeCount = Integer.parseInt(getOptionalProperty("ATTR-TYPEMOD" + columnIndex + "COUNT"));
+ String[] result = null;
+ if (typeModeCount > 0) {
+ result = new String[typeModeCount];
+ for (int i = 0; i < typeModeCount; i++) {
+ result[i] = getProperty("ATTR-TYPEMOD" + columnIndex + "-" + i);
+ }
+ }
+ return result;
+ }
+
/**
* Sets the index of the allocated data fragment
*