You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ja...@apache.org on 2014/06/12 18:34:59 UTC

[08/24] git commit: DRILL-521: Fix failures in metadata conversion of Hive tables for INFORMATION_SCHEMA

DRILL-521: Fix failures in metadata conversion of Hive tables for INFORMATION_SCHEMA

- Instead of converting two forms of Hive types info (string and ObjectInspector), switch
  to using only one form of type info TypeInfo.
- Add missing mappings of Hive data type to Sql data type.


Project: http://git-wip-us.apache.org/repos/asf/incubator-drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-drill/commit/71432fd1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-drill/tree/71432fd1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-drill/diff/71432fd1

Branch: refs/heads/master
Commit: 71432fd1ed7659aec118514bdf822043da293992
Parents: 9f3b9d2
Author: vkorukanti <ve...@gmail.com>
Authored: Wed Jun 11 12:44:26 2014 -0700
Committer: Jacques Nadeau <ja...@apache.org>
Committed: Wed Jun 11 16:07:07 2014 -0700

----------------------------------------------------------------------
 .../exec/store/hive/schema/DrillHiveTable.java  | 148 +++++++++----------
 .../drill/exec/store/ischema/Records.java       |  12 +-
 .../exec/work/fragment/FragmentExecutor.java    |   4 +-
 .../exec/store/hive/HiveTestDataGenerator.java  |  29 +++-
 .../apache/drill/jdbc/test/TestJdbcQuery.java   |   2 +-
 .../apache/drill/jdbc/test/TestMetadataDDL.java |   6 +-
 6 files changed, 116 insertions(+), 85 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java
index 7d6bc72..02d19d3 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/hive/schema/DrillHiveTable.java
@@ -19,16 +19,20 @@ package org.apache.drill.exec.store.hive.schema;
 
 import java.nio.charset.Charset;
 import java.util.ArrayList;
+import java.util.List;
 
-import org.apache.drill.common.logical.StoragePluginConfig;
+import com.google.common.collect.Lists;
 import org.apache.drill.exec.planner.logical.DrillTable;
 import org.apache.drill.exec.store.hive.HiveReadEntry;
 import org.apache.drill.exec.store.hive.HiveStoragePlugin;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.ql.metadata.Table;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
 import org.eigenbase.reltype.RelDataType;
 import org.eigenbase.reltype.RelDataTypeFactory;
 import org.eigenbase.sql.SqlCollation;
@@ -41,82 +45,35 @@ public class DrillHiveTable extends DrillTable{
   
   public DrillHiveTable(String storageEngineName, HiveStoragePlugin plugin, HiveReadEntry readEntry) {
     super(storageEngineName, plugin, readEntry);
-    this.hiveTable = new org.apache.hadoop.hive.ql.metadata.Table(readEntry.getTable());
+    this.hiveTable = readEntry.getTable();
   }
 
   @Override
   public RelDataType getRowType(RelDataTypeFactory typeFactory) {
-    ArrayList<RelDataType> typeList = new ArrayList<>();
-    ArrayList<String> fieldNameList = new ArrayList<>();
-
-    ArrayList<StructField> hiveFields = hiveTable.getFields();
-    for(StructField hiveField : hiveFields) {
-      fieldNameList.add(hiveField.getFieldName());
-      typeList.add(getRelDataTypeFromHiveType(typeFactory, hiveField.getFieldObjectInspector()));
+    List<RelDataType> typeList = Lists.newArrayList();
+    List<String> fieldNameList = Lists.newArrayList();
+
+    List<FieldSchema> hiveFields = hiveTable.getSd().getCols();
+    for(FieldSchema hiveField : hiveFields) {
+      fieldNameList.add(hiveField.getName());
+      typeList.add(getRelDataTypeFromHiveType(
+          typeFactory, TypeInfoUtils.getTypeInfoFromTypeString(hiveField.getType())));
     }
 
     for (FieldSchema field : hiveTable.getPartitionKeys()) {
       fieldNameList.add(field.getName());
-      typeList.add(getRelDataTypeFromHiveTypeString(typeFactory, field.getType()));
+      typeList.add(getRelDataTypeFromHiveType(
+          typeFactory, TypeInfoUtils.getTypeInfoFromTypeString(field.getType())));
     }
 
-    final RelDataType rowType = typeFactory.createStructType(typeList, fieldNameList);
-    return rowType;
+    return typeFactory.createStructType(typeList, fieldNameList);
   }
 
-  private RelDataType getRelDataTypeFromHiveTypeString(RelDataTypeFactory typeFactory, String type) {
-    switch(type) {
-      case "boolean":
-        return typeFactory.createSqlType(SqlTypeName.BOOLEAN);
-
-      case "tinyint":
-        return typeFactory.createSqlType(SqlTypeName.TINYINT);
-
-      case "smallint":
-        return typeFactory.createSqlType(SqlTypeName.SMALLINT);
-
-      case "int":
-        return typeFactory.createSqlType(SqlTypeName.INTEGER);
-
-      case "bigint":
-        return typeFactory.createSqlType(SqlTypeName.BIGINT);
-
-      case "float":
-        return typeFactory.createSqlType(SqlTypeName.FLOAT);
-
-      case "double":
-        return typeFactory.createSqlType(SqlTypeName.DOUBLE);
-
-      case "date":
-        return typeFactory.createSqlType(SqlTypeName.DATE);
-
-      case "timestamp":
-        return typeFactory.createSqlType(SqlTypeName.TIMESTAMP);
-
-      case "binary":
-        return typeFactory.createSqlType(SqlTypeName.BINARY);
-
-      case "decimal":
-        return typeFactory.createSqlType(SqlTypeName.DECIMAL);
-
-      case "string":
-      case "varchar": {
-        return typeFactory.createTypeWithCharsetAndCollation(
-                typeFactory.createSqlType(SqlTypeName.VARCHAR), /*input type*/
-                Charset.forName("ISO-8859-1"), /*unicode char set*/
-                SqlCollation.IMPLICIT /* TODO: need to decide if implicit is the correct one */
-        );
-      }
-
-      default:
-        throw new RuntimeException("Unknown or unsupported hive type: " + type);
-    }
-  }
-
-  private RelDataType getRelDataTypeFromHivePrimitiveType(RelDataTypeFactory typeFactory, PrimitiveObjectInspector poi) {
-    switch(poi.getPrimitiveCategory()) {
+  private RelDataType getRelDataTypeFromHivePrimitiveType(RelDataTypeFactory typeFactory, PrimitiveTypeInfo pTypeInfo) {
+    switch(pTypeInfo.getPrimitiveCategory()) {
       case BOOLEAN:
         return typeFactory.createSqlType(SqlTypeName.BOOLEAN);
+
       case BYTE:
         return typeFactory.createSqlType(SqlTypeName.TINYINT);
 
@@ -159,20 +116,59 @@ public class DrillHiveTable extends DrillTable{
       case UNKNOWN:
       case VOID:
       default:
-        throw new RuntimeException("Unknown or unsupported hive type");
+        throwUnsupportedHiveDataTypeError(pTypeInfo.getPrimitiveCategory().toString());
     }
+
+    return null;
   }
 
-  private RelDataType getRelDataTypeFromHiveType(RelDataTypeFactory typeFactory, ObjectInspector oi) {
-    switch(oi.getCategory()) {
+  private RelDataType getRelDataTypeFromHiveType(RelDataTypeFactory typeFactory, TypeInfo typeInfo) {
+    switch(typeInfo.getCategory()) {
       case PRIMITIVE:
-        return getRelDataTypeFromHivePrimitiveType(typeFactory, ((PrimitiveObjectInspector) oi));
-      case LIST:
-      case MAP:
-      case STRUCT:
+        return getRelDataTypeFromHivePrimitiveType(typeFactory, ((PrimitiveTypeInfo) typeInfo));
+
+      case LIST: {
+        ListTypeInfo listTypeInfo = (ListTypeInfo)typeInfo;
+        RelDataType listElemTypeInfo = getRelDataTypeFromHiveType(typeFactory, listTypeInfo.getListElementTypeInfo());
+        return typeFactory.createArrayType(listElemTypeInfo, -1);
+      }
+
+      case MAP: {
+        MapTypeInfo mapTypeInfo = (MapTypeInfo)typeInfo;
+        RelDataType keyType = getRelDataTypeFromHiveType(typeFactory, mapTypeInfo.getMapKeyTypeInfo());
+        RelDataType valueType = getRelDataTypeFromHiveType(typeFactory, mapTypeInfo.getMapValueTypeInfo());
+        return typeFactory.createMapType(keyType, valueType);
+      }
+
+      case STRUCT: {
+        StructTypeInfo structTypeInfo = (StructTypeInfo)typeInfo;
+        ArrayList<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+        ArrayList<TypeInfo> fieldHiveTypeInfoList = structTypeInfo.getAllStructFieldTypeInfos();
+        List<RelDataType> fieldRelDataTypeList = Lists.newArrayList();
+        for(TypeInfo fieldHiveType : fieldHiveTypeInfoList) {
+          fieldRelDataTypeList.add(getRelDataTypeFromHiveType(typeFactory, fieldHiveType));
+        }
+        return typeFactory.createStructType(fieldRelDataTypeList, fieldNames);
+      }
+
       case UNION:
-      default:
-        throw new RuntimeException("Unknown or unsupported hive type");
+        logger.warn("There is no UNION data type in SQL. Converting it to Sql type OTHER to avoid " +
+            "breaking INFORMATION_SCHEMA queries");
+        return typeFactory.createSqlType(SqlTypeName.OTHER);
     }
+
+    throwUnsupportedHiveDataTypeError(typeInfo.getCategory().toString());
+    return null;
+  }
+
+  private void throwUnsupportedHiveDataTypeError(String hiveType) {
+    StringBuilder errMsg = new StringBuilder();
+    errMsg.append(String.format("Unsupported Hive data type %s. ", hiveType));
+    errMsg.append(System.getProperty("line.separator"));
+    errMsg.append("Following Hive data types are supported in Drill INFORMATION_SCHEMA: ");
+    errMsg.append("BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, DATE, TIMESTAMP, BINARY, DECIMAL, STRING, " +
+        "VARCHAR, LIST, MAP, STRUCT and UNION");
+
+    throw new RuntimeException(errMsg.toString());
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java
index d999346..8d10775 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/ischema/Records.java
@@ -64,7 +64,17 @@ public class Records {
 
       this.ORDINAL_POSITION = field.getIndex();
       this.IS_NULLABLE = type.isNullable() ? "YES" : "NO";
-      this.DATA_TYPE = sqlType.getName();
+
+      if (sqlType == SqlTypeName.ARRAY || sqlType == SqlTypeName.MAP || sqlType == SqlTypeName.ROW) {
+        // For complex types use the toString method to display the inside elements
+        String typeString = type.toString();
+
+        // RelDataType.toString prints "RecordType" for "STRUCT".
+        this.DATA_TYPE = type.toString().replace("RecordType", "STRUCT");
+      } else {
+        this.DATA_TYPE = sqlType.toString();
+      }
+
       this.NUMERIC_PRECISION_RADIX = (sqlType == SqlTypeName.DECIMAL) ? 10 : -1; // TODO: where do we get radix?
       this.CHARACTER_MAXIMUM_LENGTH = -1;  // TODO: where do we get char length?
       this.NUMERIC_PRECISION = (sqlType.allowsPrec())?type.getPrecision(): -1;

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java b/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java
index 36727ec..7d4b657 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/work/fragment/FragmentExecutor.java
@@ -79,9 +79,9 @@ public class FragmentExecutor implements Runnable, CancelableQuery, StatusProvid
     boolean closed = false;
     try {
       root = ImplCreator.getExec(context, rootOperator);
-    } catch (ExecutionSetupException e) {
+    } catch (AssertionError | Exception e) {
       context.fail(e);
-      logger.debug("Failure while running fragement", e);
+      logger.debug("Failure while initializing operator tree", e);
       internalFail(e);
       return;
     }

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java b/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
index 6aa68b4..8433931 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/store/hive/HiveTestDataGenerator.java
@@ -88,12 +88,35 @@ public class HiveTestDataGenerator {
     // create a table with no data
     executeQuery("CREATE TABLE IF NOT EXISTS default.empty_table(a INT, b STRING)");
 
-    // create a table that has all supported types in Drill
+    // create a Hive table that has columns with data types which are supported for reading in Drill.
     testDataFile = generateAllTypesDataFile();
-    executeQuery("CREATE TABLE IF NOT EXISTS alltypes (c1 INT, c2 BOOLEAN, c3 DOUBLE, c4 STRING, " +
+    executeQuery("CREATE TABLE IF NOT EXISTS allReadSupportedHiveDataTypes (c1 INT, c2 BOOLEAN, c3 DOUBLE, c4 STRING, " +
         "c9 TINYINT, c10 SMALLINT, c11 FLOAT, c12 BIGINT, c19 BINARY) " +
         "ROW FORMAT DELIMITED FIELDS TERMINATED BY ',' STORED AS TEXTFILE");
-    executeQuery(String.format("LOAD DATA LOCAL INPATH '%s' OVERWRITE INTO TABLE default.alltypes", testDataFile));
+    executeQuery(String.format("LOAD DATA LOCAL INPATH '%s' OVERWRITE INTO TABLE " +
+        "default.allReadSupportedHiveDataTypes", testDataFile));
+
+    // create a table that has all Hive types. This is to test how hive tables metadata is populated in
+    // Drill's INFORMATION_SCHEMA.
+    executeQuery("CREATE TABLE IF NOT EXISTS allHiveDataTypes(" +
+        "booleanType BOOLEAN, " +
+        "tinyintType TINYINT, " +
+        "smallintType SMALLINT, " +
+        "intType INT, " +
+        "bigintType BIGINT, " +
+        "floatType FLOAT, " +
+        "doubleType DOUBLE, " +
+        "dataType DATE, " +
+        "timestampType TIMESTAMP, " +
+        "binaryType BINARY, " +
+        "decimalType DECIMAL, " +
+        "stringType STRING, " +
+        "varCharType VARCHAR(20), " +
+        "listType ARRAY<STRING>, " +
+        "mapType MAP<STRING,INT>, " +
+        "structType STRUCT<sint:INT,sboolean:BOOLEAN,sstring:STRING>, " +
+        "uniontypeType UNIONTYPE<int, double, array<string>>)"
+    );
 
     ss.close();
   }

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java
----------------------------------------------------------------------
diff --git a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java
index bf4e12e..932f207 100644
--- a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java
+++ b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestJdbcQuery.java
@@ -80,7 +80,7 @@ public class TestJdbcQuery extends JdbcTest{
   public void testHiveReadWithDb() throws Exception{
     testQuery("select * from hive.`default`.kv");
     testQuery("select key from hive.`default`.kv group by key");
-    testQuery("select * from hive.`default`.alltypes");
+    testQuery("select * from hive.`default`.allreadsupportedhivedatatypes");
   }
 
   @Test

http://git-wip-us.apache.org/repos/asf/incubator-drill/blob/71432fd1/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java
----------------------------------------------------------------------
diff --git a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java
index 3975ead..3580711 100644
--- a/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java
+++ b/exec/jdbc/src/test/java/org/apache/drill/jdbc/test/TestMetadataDDL.java
@@ -50,7 +50,8 @@ public class TestMetadataDDL extends TestJdbcQuery {
         .sql("SHOW TABLES")
         .returns(
             "TABLE_SCHEMA=hive.default; TABLE_NAME=empty_table\n" +
-            "TABLE_SCHEMA=hive.default; TABLE_NAME=alltypes\n" +
+            "TABLE_SCHEMA=hive.default; TABLE_NAME=allhivedatatypes\n" +
+            "TABLE_SCHEMA=hive.default; TABLE_NAME=allreadsupportedhivedatatypes\n" +
             "TABLE_SCHEMA=hive.default; TABLE_NAME=kv\n" +
             "TABLE_SCHEMA=hive.default; TABLE_NAME=foodate\n"
         );
@@ -72,7 +73,8 @@ public class TestMetadataDDL extends TestJdbcQuery {
         .sql("SHOW TABLES IN hive.`default`")
         .returns(
             "TABLE_SCHEMA=hive.default; TABLE_NAME=empty_table\n" +
-            "TABLE_SCHEMA=hive.default; TABLE_NAME=alltypes\n" +
+            "TABLE_SCHEMA=hive.default; TABLE_NAME=allhivedatatypes\n" +
+            "TABLE_SCHEMA=hive.default; TABLE_NAME=allreadsupportedhivedatatypes\n" +
             "TABLE_SCHEMA=hive.default; TABLE_NAME=kv\n" +
             "TABLE_SCHEMA=hive.default; TABLE_NAME=foodate\n");
   }