You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ja...@apache.org on 2014/06/12 18:34:59 UTC
[08/24] git commit: DRILL-521: Fix failures in metadata conversion of
Hive tables for INFORMATION_SCHEMA
DRILL-521: Fix failures in metadata conversion of Hive tables for INFORMATION_SCHEMA
- Instead of converting two forms of Hive types info (string and ObjectInspector), switch
to using only one form of type info TypeInfo.
- Add missing mappings of Hive data type to Sql data type.
Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/71432fd1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/71432fd1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/71432fd1
Branch: refs/heads/master
Commit: 71432fd1ed7659aec118514bdf822043da293992
Parents: 9f3b9d2
Author: vkorukanti <ve...@gmail.com>
Authored: Wed Jun 11 12:44:26 2014 -0700
Committer: Jacques Nadeau <ja...@apache.org>
Committed: Wed Jun 11 16:07:07 2014 -0700
----------------------------------------------------------------------
.../exec/store/hive/schema/DrillHiveTable.java | 148 +++++++++----------
.../drill/exec/store/ischema/Records.java | 12 +-
.../exec/work/fragment/FragmentExecutor.java | 4 +-
.../exec/store/hive/HiveTestDataGenerator.java | 29 +++-
.../apache/drill/jdbc/test/TestJdbcQuery.java | 2 +-
.../apache/drill/jdbc/test/TestMetadataDDL.java | 6 +-
6 files changed, 116 insertions(+), 85 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java
index 7d6bc72..02d19d3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java
@@ -19,16 +19,20 @@ package org.apache.drill.exec.store.hive.schema;
import java.nio.charset.Charset;
import java.util.ArrayList;
+import java.util.List;
-import org.apache.drill.common.logical.StoragePluginConfig;
+import com.google.common.collect.Lists;
import org.apache.drill.exec.planner.logical.DrillTable;
import org.apache.drill.exec.store.hive.HiveReadEntry;
import org.apache.drill.exec.store.hive.HiveStoragePlugin;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.ql.metadata.Table;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.eigenbase.reltype.RelDataType;
import org.eigenbase.reltype.RelDataTypeFactory;
import org.eigenbase.sql.SqlCollation;
@@ -41,82 +45,35 @@ public class DrillHiveTable extends DrillTable{
public DrillHiveTable(String storageEngineName, HiveStoragePlugin plugin, HiveReadEntry readEntry) {
super(storageEngineName, plugin, readEntry);
- this.hiveTable = new org.apache.hadoop.hive.ql.metadata.Table(readEntry.getTable());
+ this.hiveTable = readEntry.getTable();
}
@Override
public RelDataType getRowType(RelDataTypeFactory typeFactory) {
- ArrayList<RelDataType> typeList = new ArrayList<>();
- ArrayList<String> fieldNameList = new ArrayList<>();
-
- ArrayList<StructField> hiveFields = hiveTable.getFields();
- for(StructField hiveField : hiveFields) {
- fieldNameList.add(hiveField.getFieldName());
- typeList.add(getRelDataTypeFromHiveType(typeFactory, hiveField.getFieldObjectInspector()));
+ List<RelDataType> typeList = Lists.newArrayList();
+ List<String> fieldNameList = Lists.newArrayList();
+
+ List<FieldSchema> hiveFields = hiveTable.getSd().getCols();
+ for(FieldSchema hiveField : hiveFields) {
+ fieldNameList.add(hiveField.getName());
+ typeList.add(getRelDataTypeFromHiveType(
+ typeFactory, TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType())));
}
for (FieldSchema field : hiveTable.getPartitionKeys()) {
fieldNameList.add(field.getName());
- typeList.add(getRelDataTypeFromHiveTypeString(typeFactory, field.getType()));
+ typeList.add(getRelDataTypeFromHiveType(
+ typeFactory, TypeInfoUtils.getTypeInfoFromTypeString(field.getType())));
}
- final RelDataType rowType = typeFactory.createStructType(typeList, fieldNameList);
- return rowType;
+ return typeFactory.createStructType(typeList, fieldNameList);
}
- private RelDataType getRelDataTypeFromHiveTypeString(RelDataTypeFactory typeFactory, String type) {
- switch(type) {
- case "boolean":
- return typeFactory.createSqlType(SqlTypeName.BOOLEAN);
-
- case "tinyint":
- return typeFactory.createSqlType(SqlTypeName.TINYINT);
-
- case "smallint":
- return typeFactory.createSqlType(SqlTypeName.SMALLINT);
-
- case "int":
- return typeFactory.createSqlType(SqlTypeName.INTEGER);
-
- case "bigint":
- return typeFactory.createSqlType(SqlTypeName.BIGINT);
-
- case "float":
- return typeFactory.createSqlType(SqlTypeName.FLOAT);
-
- case "double":
- return typeFactory.createSqlType(SqlTypeName.DOUBLE);
-
- case "date":
- return typeFactory.createSqlType(SqlTypeName.DATE);
-
- case "timestamp":
- return typeFactory.createSqlType(SqlTypeName.TIMESTAMP);
-
- case "binary":
- return typeFactory.createSqlType(SqlTypeName.BINARY);
-
- case "decimal":
- return typeFactory.createSqlType(SqlTypeName.DECIMAL);
-
- case "string":
- case "varchar": {
- return typeFactory.createTypeWithCharsetAndCollation(
- typeFactory.createSqlType(SqlTypeName.VARCHAR), /*input type*/
- Charset.forName("ISO-8859-1"), /*unicode char set*/
- SqlCollation.IMPLICIT /* TODO: need to decide if implicit is the correct one */
- );
- }
-
- default:
- throw new RuntimeException("Unknown or unsupported hive type: " + type);
- }
- }
-
- private RelDataType getRelDataTypeFromHivePrimitiveType(RelDataTypeFactory typeFactory, PrimitiveObjectInspector poi) {
- switch(poi.getPrimitiveCategory()) {
+ private RelDataType getRelDataTypeFromHivePrimitiveType(RelDataTypeFactory typeFactory, PrimitiveTypeInfo pTypeInfo) {
+ switch(pTypeInfo.getPrimitiveCategory()) {
case BOOLEAN:
return typeFactory.createSqlType(SqlTypeName.BOOLEAN);
+
case BYTE:
return typeFactory.createSqlType(SqlTypeName.TINYINT);
@@ -159,20 +116,59 @@ public class DrillHiveTable extends DrillTable{
case UNKNOWN:
case VOID:
default:
- throw new RuntimeException("Unknown or unsupported hive type");
+ throwUnsupportedHiveDataTypeError(pTypeInfo.getPrimitiveCategory().toString());
}
+
+ return null;
}
- private RelDataType getRelDataTypeFromHiveType(RelDataTypeFactory typeFactory, ObjectInspector oi) {
- switch(oi.getCategory()) {
+ private RelDataType getRelDataTypeFromHiveType(RelDataTypeFactory typeFactory, TypeInfo typeInfo) {
+ switch(typeInfo.getCategory()) {
case PRIMITIVE:
- return getRelDataTypeFromHivePrimitiveType(typeFactory, ((PrimitiveObjectInspector) oi));
- case LIST:
- case MAP:
- case STRUCT:
+ return getRelDataTypeFromHivePrimitiveType(typeFactory, ((PrimitiveTypeInfo) typeInfo));
+
+ case LIST: {
+ ListTypeInfo listTypeInfo = (ListTypeInfo)typeInfo;
+ RelDataType listElemTypeInfo = getRelDataTypeFromHiveType(typeFactory, listTypeInfo.getListElementTypeInfo());
+ return typeFactory.createArrayType(listElemTypeInfo, -1);
+ }
+
+ case MAP: {
+ MapTypeInfo mapTypeInfo = (MapTypeInfo)typeInfo;
+ RelDataType keyType = getRelDataTypeFromHiveType(typeFactory, mapTypeInfo.getMapKeyTypeInfo());
+ RelDataType valueType = getRelDataTypeFromHiveType(typeFactory, mapTypeInfo.getMapValueTypeInfo());
+ return typeFactory.createMapType(keyType, valueType);
+ }
+
+ case STRUCT: {
+ StructTypeInfo structTypeInfo = (StructTypeInfo)typeInfo;
+ ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+ ArrayList<TypeInfo> fieldHiveTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos();
+ List<RelDataType> fieldRelDataTypeList = Lists.newArrayList();
+ for(TypeInfo fieldHiveType : fieldHiveTypeInfoList) {
+ fieldRelDataTypeList.add(getRelDataTypeFromHiveType(typeFactory, fieldHiveType));
+ }
+ return typeFactory.createStructType(fieldRelDataTypeList, fieldNames);
+ }
+
case UNION:
- default:
- throw new RuntimeException("Unknown or unsupported hive type");
+ logger.warn("There is no UNION data type in SQL. Converting it to Sql type OTHER to avoid " +
+ "breaking INFORMATION_SCHEMA queries");
+ return typeFactory.createSqlType(SqlTypeName.OTHER);
}
+
+ throwUnsupportedHiveDataTypeError(typeInfo.getCategory().toString());
+ return null;
+ }
+
+ private void throwUnsupportedHiveDataTypeError(String hiveType) {
+ StringBuilder errMsg = new StringBuilder();
+ errMsg.append(String.format("Unsupported Hive data type %s. ", hiveType));
+ errMsg.append(System.getProperty("line.separator"));
+ errMsg.append("Following Hive data types are supported in Drill INFORMATION_SCHEMA: ");
+ errMsg.append("BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, DATE, TIMESTAMP, BINARY, DECIMAL, STRING, " +
+ "VARCHAR, LIST, MAP, STRUCT and UNION");
+
+ throw new RuntimeException(errMsg.toString());
}
}
http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java
index d999346..8d10775 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java
@@ -64,7 +64,17 @@ public class Records {
this.ORDINAL_POSITION = field.getIndex();
this.IS_NULLABLE = type.isNullable() ? "YES" : "NO";
- this.DATA_TYPE = sqlType.getName();
+
+ if (sqlType == SqlTypeName.ARRAY || sqlType == SqlTypeName.MAP || sqlType == SqlTypeName.ROW) {
+ // For complex types use the toString method to display the inside elements
+ String typeString = type.toString();
+
+ // RelDataType.toString prints "RecordType" for "STRUCT".
+ this.DATA_TYPE = type.toString().replace("RecordType", "STRUCT");
+ } else {
+ this.DATA_TYPE = sqlType.toString();
+ }
+
this.NUMERIC_PRECISION_RADIX = (sqlType == SqlTypeName.DECIMAL) ? 10 : -1; // TODO: where do we get radix?
this.CHARACTER_MAXIMUM_LENGTH = -1; // TODO: where do we get char length?
this.NUMERIC_PRECISION = (sqlType.allowsPrec())?type.getPrecision(): -1;
http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java
index 36727ec..7d4b657 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java
@@ -79,9 +79,9 @@ public class FragmentExecutor implements Runnable, CancelableQuery, StatusProvid
boolean closed = false;
try {
root = ImplCreator.getExec(context, rootOperator);
- } catch (ExecutionSetupException e) {
+ } catch (AssertionError | Exception e) {
context.fail(e);
- logger.debug("Failure while running fragement", e);
+ logger.debug("Failure while initializing operator tree", e);
internalFail(e);
return;
}
http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
index 6aa68b4..8433931 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
@@ -88,12 +88,35 @@ public class HiveTestDataGenerator {
// create a table with no data
executeQuery("CREATE TABLE IF NOT EXISTS default.empty_table(a INT, b STRING)");
- // create a table that has all supported types in Drill
+ // create a Hive table that has columns with data types which are supported for reading in Drill.
testDataFile = generateAllTypesDataFile();
- executeQuery("CREATE TABLE IF NOT EXISTS alltypes (c1 INT, c2 BOOLEAN, c3 DOUBLE, c4 STRING, " +
+ executeQuery("CREATE TABLE IF NOT EXISTS allReadSupportedHiveDataTypes (c1 INT, c2 BOOLEAN, c3 DOUBLE, c4 STRING, " +
"c9 TINYINT, c10 SMALLINT, c11 FLOAT, c12 BIGINT, c19 BINARY) " +
"ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE");
- executeQuery(String.format("LOAD DATA LOCAL INPATH '%s' OVERWRITE INTO TABLE default.alltypes", testDataFile));
+ executeQuery(String.format("LOAD DATA LOCAL INPATH '%s' OVERWRITE INTO TABLE " +
+ "default.allReadSupportedHiveDataTypes", testDataFile));
+
+ // create a table that has all Hive types. This is to test how hive tables metadata is populated in
+ // Drill's INFORMATION_SCHEMA.
+ executeQuery("CREATE TABLE IF NOT EXISTS allHiveDataTypes(" +
+ "booleanType BOOLEAN, " +
+ "tinyintType TINYINT, " +
+ "smallintType SMALLINT, " +
+ "intType INT, " +
+ "bigintType BIGINT, " +
+ "floatType FLOAT, " +
+ "doubleType DOUBLE, " +
+ "dataType DATE, " +
+ "timestampType TIMESTAMP, " +
+ "binaryType BINARY, " +
+ "decimalType DECIMAL, " +
+ "stringType STRING, " +
+ "varCharType VARCHAR(20), " +
+ "listType ARRAY<STRING>, " +
+ "mapType MAP<STRING,INT>, " +
+ "structType STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>, " +
+ "uniontypeType UNIONTYPE<int, double, array<string>>)"
+ );
ss.close();
}
http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java
----------------------------------------------------------------------
diff --git a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java
index bf4e12e..932f207 100644
--- a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java
+++ b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java
@@ -80,7 +80,7 @@ public class TestJdbcQuery extends JdbcTest{
public void testHiveReadWithDb() throws Exception{
testQuery("select * from hive.`default`.kv");
testQuery("select key from hive.`default`.kv group by key");
- testQuery("select * from hive.`default`.alltypes");
+ testQuery("select * from hive.`default`.allreadsupportedhivedatatypes");
}
@Test
http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java
----------------------------------------------------------------------
diff --git a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java
index 3975ead..3580711 100644
--- a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java
+++ b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java
@@ -50,7 +50,8 @@ public class TestMetadataDDL extends TestJdbcQuery {
.sql("SHOW TABLES")
.returns(
"TABLE_SCHEMA=hive.default; TABLE_NAME=empty_table\n" +
- "TABLE_SCHEMA=hive.default; TABLE_NAME=alltypes\n" +
+ "TABLE_SCHEMA=hive.default; TABLE_NAME=allhivedatatypes\n" +
+ "TABLE_SCHEMA=hive.default; TABLE_NAME=allreadsupportedhivedatatypes\n" +
"TABLE_SCHEMA=hive.default; TABLE_NAME=kv\n" +
"TABLE_SCHEMA=hive.default; TABLE_NAME=foodate\n"
);
@@ -72,7 +73,8 @@ public class TestMetadataDDL extends TestJdbcQuery {
.sql("SHOW TABLES IN hive.`default`")
.returns(
"TABLE_SCHEMA=hive.default; TABLE_NAME=empty_table\n" +
- "TABLE_SCHEMA=hive.default; TABLE_NAME=alltypes\n" +
+ "TABLE_SCHEMA=hive.default; TABLE_NAME=allhivedatatypes\n" +
+ "TABLE_SCHEMA=hive.default; TABLE_NAME=allreadsupportedhivedatatypes\n" +
"TABLE_SCHEMA=hive.default; TABLE_NAME=kv\n" +
"TABLE_SCHEMA=hive.default; TABLE_NAME=foodate\n");
}