You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/04/25 01:44:26 UTC

[impala] 02/02: IMPALA-1856: MetadataOp.getTypeInfo() does not return all supported types.

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 63d55359e06b7b061c8edea38b34617c250ad3b1
Author: Abhishek <ar...@cloudera.com>
AuthorDate: Tue Apr 23 10:56:22 2019 -0700

    IMPALA-1856: MetadataOp.getTypeInfo() does not return all supported types.
    
    The MetadataOp.getTypeInfo() is missing all the complex types
    (ARRAY, MAP, and STRUCT). Several of the primitive data types such as
    CHAR, VARCHAR, DECIMAL, DATE (newly added) are also not returned.
    
    The Impala JDBC client should in theory call MetadataOp.getTypeInfo()
    but that is not happening in the latest version of the driver. This
    change will only ensure that on Impala side the
    MetadataOp.getTypeInfo() returns correct results.
    
    Updated MetadataOp::createGetTypeInfoResults to include all supported
    and externally visible data types, including complex types (which were
    missing along with some other Primitive types).
    
    Added a new function ScalarType::isInternalType() to identify internal
    types such as NULL_TYPE, FIXED_UDA_INTERMEDIATE which are not exposed
    through getTypeInfo function or in any other manner through SQL.
    
    Testing: There was a testing gap and ideally whenever a new type is
    added or support for a type is changed the MetadataOp.getTypeInfo()
    should return the correct result set representing the supported types.
    - Updated FrontendTest.java to ensure that the result set from
      MetadataOp::getTypeInfo contains all supported and externally visible
      types
    - Added new E2E test (test_get_type_info) in tests/hs2/test_hs2.py. The
      new test validates that the HS2 GetTypeInfo() RPC returns supported
      and externally visible types.
    
    Change-Id: Icdccde7c32e52ed1b0c7b13a22171e8bcd7f1f2d
    Reviewed-on: http://gerrit.cloudera.org:8080/13094
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 .../java/org/apache/impala/catalog/ScalarType.java |  8 ++
 .../main/java/org/apache/impala/catalog/Type.java  | 29 +++++++
 .../java/org/apache/impala/service/MetadataOp.java | 90 ++++++++++++++--------
 .../org/apache/impala/service/FrontendTest.java    | 29 ++++++-
 tests/hs2/test_hs2.py                              | 23 ++++++
 5 files changed, 145 insertions(+), 34 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/ScalarType.java b/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
index 57923ee..5a987de 100644
--- a/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
+++ b/fe/src/main/java/org/apache/impala/catalog/ScalarType.java
@@ -307,6 +307,14 @@ public class ScalarType extends Type {
     return isValid() && !getUnsupportedTypes().contains(this);
   }
 
+  /**
+   *  Returns true if this type is internal and not exposed externally in SQL.
+   */
+  public boolean isInternalType() {
+    return type_ == PrimitiveType.FIXED_UDA_INTERMEDIATE
+        || type_ == PrimitiveType.NULL_TYPE;
+  }
+
   @Override
   public boolean supportsTablePartitioning() {
     if (!isSupported() || isComplexType() || type_ == PrimitiveType.TIMESTAMP) {
diff --git a/fe/src/main/java/org/apache/impala/catalog/Type.java b/fe/src/main/java/org/apache/impala/catalog/Type.java
index 79411c1..ea55de7 100644
--- a/fe/src/main/java/org/apache/impala/catalog/Type.java
+++ b/fe/src/main/java/org/apache/impala/catalog/Type.java
@@ -128,6 +128,35 @@ public abstract class Type {
   }
 
   /**
+   * Returns the static ScalarType members. The ScalarTypes which require additional
+   * metadata (such as CHAR, DECIMAL, FIXED_UDA_INTERMEDIATE) must be created using
+   * proper create functions defined in ScalarType class.
+   */
+  public static ScalarType getDefaultScalarType(PrimitiveType ptype) {
+    switch (ptype) {
+      case INVALID_TYPE: return INVALID;
+      case NULL_TYPE: return NULL;
+      case BOOLEAN: return BOOLEAN;
+      case TINYINT: return TINYINT;
+      case SMALLINT: return SMALLINT;
+      case INT: return INT;
+      case BIGINT: return BIGINT;
+      case FLOAT: return FLOAT;
+      case DOUBLE: return DOUBLE;
+      case DATE: return DATE;
+      case DATETIME: return DATETIME;
+      case TIMESTAMP: return TIMESTAMP;
+      case STRING: return STRING;
+      case VARCHAR: return VARCHAR;
+      case BINARY: return BINARY;
+      case DECIMAL: return DECIMAL;
+      case CHAR: return CHAR;
+      case FIXED_UDA_INTERMEDIATE: return FIXED_UDA_INTERMEDIATE;
+      default: return INVALID;
+    }
+  }
+
+  /**
    * The output of this is stored directly in the hive metastore as the column type.
    * The string must match exactly.
    */
diff --git a/fe/src/main/java/org/apache/impala/service/MetadataOp.java b/fe/src/main/java/org/apache/impala/service/MetadataOp.java
index 63484b1..fa4a0e7 100644
--- a/fe/src/main/java/org/apache/impala/service/MetadataOp.java
+++ b/fe/src/main/java/org/apache/impala/service/MetadataOp.java
@@ -26,14 +26,17 @@ import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.impala.analysis.StmtMetadataLoader;
 import org.apache.impala.analysis.TableName;
 import org.apache.impala.authorization.User;
+import org.apache.impala.catalog.ArrayType;
 import org.apache.impala.catalog.Catalog;
 import org.apache.impala.catalog.Column;
 import org.apache.impala.catalog.FeCatalog;
 import org.apache.impala.catalog.FeDb;
 import org.apache.impala.catalog.FeTable;
 import org.apache.impala.catalog.Function;
+import org.apache.impala.catalog.MapType;
 import org.apache.impala.catalog.PrimitiveType;
 import org.apache.impala.catalog.ScalarType;
+import org.apache.impala.catalog.StructType;
 import org.apache.impala.catalog.Type;
 import org.apache.impala.catalog.local.InconsistentMetadataFetchException;
 import org.apache.impala.common.ImpalaException;
@@ -42,6 +45,7 @@ import org.apache.impala.thrift.TColumnValue;
 import org.apache.impala.thrift.TResultRow;
 import org.apache.impala.thrift.TResultSet;
 import org.apache.impala.thrift.TResultSetMetadata;
+import org.apache.impala.thrift.TTypeNodeType;
 import org.apache.impala.util.PatternMatcher;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -608,40 +612,66 @@ public class MetadataOp {
   }
 
   /**
-   * Fills the GET_TYPEINFO_RESULTS with supported primitive types.
+   * Return result row corresponding to the input type.
    */
-  private static void createGetTypeInfoResults() {
+  private static TResultRow createGetTypeInfoResult(String typeName, Type type) {
+    TResultRow row = new TResultRow();
+    row.colVals = Lists.newArrayList();
+    row.colVals.add(createTColumnValue(typeName)); // TYPE_NAME
+    row.colVals.add(createTColumnValue(type.getJavaSqlType())); // DATA_TYPE
+    row.colVals.add(createTColumnValue(type.getPrecision())); // PRECISION
+    row.colVals.add(NULL_COL_VAL); // LITERAL_PREFIX
+    row.colVals.add(NULL_COL_VAL); // LITERAL_SUFFIX
+    row.colVals.add(NULL_COL_VAL); // CREATE_PARAMS
+    row.colVals.add(createTColumnValue(DatabaseMetaData.typeNullable)); // NULLABLE
+    row.colVals.add(createTColumnValue(type.isStringType())); // CASE_SENSITIVE
+    row.colVals.add(createTColumnValue(DatabaseMetaData.typeSearchable)); // SEARCHABLE
+    row.colVals.add(createTColumnValue(!type.isNumericType())); // UNSIGNED_ATTRIBUTE
+    row.colVals.add(createTColumnValue(false)); // FIXED_PREC_SCALE
+    row.colVals.add(createTColumnValue(false)); // AUTO_INCREMENT
+    row.colVals.add(NULL_COL_VAL); // LOCAL_TYPE_NAME
+    row.colVals.add(createTColumnValue(0)); // MINIMUM_SCALE
+    row.colVals.add(createTColumnValue(0)); // MAXIMUM_SCALE
+    row.colVals.add(NULL_COL_VAL); // SQL_DATA_TYPE
+    row.colVals.add(NULL_COL_VAL); // SQL_DATETIME_SUB
+    row.colVals.add(createTColumnValue(type.getNumPrecRadix())); // NUM_PREC_RADIX
+    return row;
+  }
+
+  /**
+   * Fills the GET_TYPEINFO_RESULTS with externally supported primitive types.
+   */
+  private static void createGetPrimitiveTypeInfoResults() {
     for (PrimitiveType ptype: PrimitiveType.values()) {
-      if (ptype.equals(PrimitiveType.INVALID_TYPE) ||
-          ptype.equals(PrimitiveType.DATE) ||
-          ptype.equals(PrimitiveType.DATETIME) ||
-          ptype.equals(PrimitiveType.DECIMAL) ||
-          ptype.equals(PrimitiveType.CHAR) ||
-          ptype.equals(PrimitiveType.VARCHAR) ||
-          ptype.equals(PrimitiveType.FIXED_UDA_INTERMEDIATE)) {
+      ScalarType type = Type.getDefaultScalarType(ptype);
+      if (type.isInternalType() || !type.isSupported()) continue;
+      TResultRow row = createGetTypeInfoResult(ptype.name(), type);
+      GET_TYPEINFO_RESULTS.add(row);
+    }
+  }
+
+  /**
+   * Fills the GET_TYPEINFO_RESULTS with externally supported types.
+   */
+  private static void createGetTypeInfoResults() {
+    // Loop through the types included in the TTypeNodeType enum so that all the types
+    // (both existing and any new ones which get added) are accounted for.
+    for (TTypeNodeType nodeType : TTypeNodeType.values()) {
+      Type type = null;
+      if (nodeType == TTypeNodeType.SCALAR) {
+        createGetPrimitiveTypeInfoResults();
         continue;
+      } else if (nodeType == TTypeNodeType.ARRAY) {
+        type = new ArrayType(ScalarType.createType(PrimitiveType.INT));
+      } else if (nodeType == TTypeNodeType.MAP) {
+        type = new MapType(ScalarType.createType(PrimitiveType.INT),
+            ScalarType.createType(PrimitiveType.INT));
+      } else if (nodeType == TTypeNodeType.STRUCT) {
+        type = new StructType();
       }
-      Type type = ScalarType.createType(ptype);
-      TResultRow row = new TResultRow();
-      row.colVals = Lists.newArrayList();
-      row.colVals.add(createTColumnValue(ptype.name())); // TYPE_NAME
-      row.colVals.add(createTColumnValue(type.getJavaSqlType()));  // DATA_TYPE
-      row.colVals.add(createTColumnValue(type.getPrecision()));  // PRECISION
-      row.colVals.add(NULL_COL_VAL); // LITERAL_PREFIX
-      row.colVals.add(NULL_COL_VAL); // LITERAL_SUFFIX
-      row.colVals.add(NULL_COL_VAL); // CREATE_PARAMS
-      row.colVals.add(createTColumnValue(DatabaseMetaData.typeNullable));  // NULLABLE
-      row.colVals.add(createTColumnValue(type.isStringType())); // CASE_SENSITIVE
-      row.colVals.add(createTColumnValue(DatabaseMetaData.typeSearchable));  // SEARCHABLE
-      row.colVals.add(createTColumnValue(!type.isNumericType())); // UNSIGNED_ATTRIBUTE
-      row.colVals.add(createTColumnValue(false));  // FIXED_PREC_SCALE
-      row.colVals.add(createTColumnValue(false));  // AUTO_INCREMENT
-      row.colVals.add(NULL_COL_VAL); // LOCAL_TYPE_NAME
-      row.colVals.add(createTColumnValue(0));  // MINIMUM_SCALE
-      row.colVals.add(createTColumnValue(0));  // MAXIMUM_SCALE
-      row.colVals.add(NULL_COL_VAL); // SQL_DATA_TYPE
-      row.colVals.add(NULL_COL_VAL); // SQL_DATETIME_SUB
-      row.colVals.add(createTColumnValue(type.getNumPrecRadix()));  // NUM_PREC_RADIX
+
+      if (!type.isSupported()) continue;
+      TResultRow row = createGetTypeInfoResult(nodeType.name(), type);
       GET_TYPEINFO_RESULTS.add(row);
     }
   }
diff --git a/fe/src/test/java/org/apache/impala/service/FrontendTest.java b/fe/src/test/java/org/apache/impala/service/FrontendTest.java
index c7fee57..b9ef150 100644
--- a/fe/src/test/java/org/apache/impala/service/FrontendTest.java
+++ b/fe/src/test/java/org/apache/impala/service/FrontendTest.java
@@ -22,6 +22,7 @@ import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
 import java.util.Arrays;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Set;
 
@@ -33,13 +34,16 @@ import org.apache.hive.service.rpc.thrift.TGetSchemasReq;
 import org.apache.hive.service.rpc.thrift.TGetTablesReq;
 import org.apache.impala.catalog.Db;
 import org.apache.impala.catalog.PrimitiveType;
+import org.apache.impala.catalog.ScalarType;
 import org.apache.impala.catalog.Table;
+import org.apache.impala.catalog.Type;
 import org.apache.impala.common.FrontendTestBase;
 import org.apache.impala.common.ImpalaException;
 import org.apache.impala.thrift.TMetadataOpRequest;
 import org.apache.impala.thrift.TMetadataOpcode;
 import org.apache.impala.thrift.TResultRow;
 import org.apache.impala.thrift.TResultSet;
+import org.apache.impala.thrift.TTypeNodeType;
 import org.junit.Test;
 
 import com.google.common.collect.Lists;
@@ -65,10 +69,27 @@ public class FrontendTest extends FrontendTestBase {
     // DatabaseMetaData.getTypeInfo has 18 columns.
     assertEquals(18, resp.schema.columns.size());
     assertEquals(18, resp.rows.get(0).colVals.size());
-    // All primitives types, except INVALID_TYPE, DATE, DATETIME, DECIMAL, CHAR,
-    // VARCHAR, and FIXED_UDA_INTERMEDIATE should be returned.
-    // Therefore #supported types =  PrimitiveType.values().length - 7.
-    assertEquals(PrimitiveType.values().length - 7, resp.rows.size());
+    // Validate the supported types
+    List<String> supportedTypes = new ArrayList<>();
+    for (ScalarType stype : Type.getSupportedTypes()) {
+      if (stype.isSupported() && !stype.isInternalType()) {
+        supportedTypes.add(stype.getPrimitiveType().name());
+      }
+    }
+    supportedTypes.add("ARRAY");
+    supportedTypes.add("MAP");
+    supportedTypes.add("STRUCT");
+    assertEquals(supportedTypes.size(), resp.rows.size());
+    for (TResultRow row : resp.rows) {
+      boolean foundType = false;
+      String typeName = row.colVals.get(0).getString_val();
+      if (supportedTypes.contains(typeName)) {
+        supportedTypes.remove(typeName);
+        foundType = true;
+      }
+      assertTrue(foundType);
+    }
+    assertEquals(supportedTypes.size(), 0);
   }
 
   @Test
diff --git a/tests/hs2/test_hs2.py b/tests/hs2/test_hs2.py
index 3408b53..16fc156 100644
--- a/tests/hs2/test_hs2.py
+++ b/tests/hs2/test_hs2.py
@@ -536,3 +536,26 @@ class TestHS2(HS2TestSuite):
     execute_statement_req.statement = "SELECT 1 FROM alltypes LIMIT 1"
     execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
     TestHS2.check_response(execute_statement_resp, TCLIService.TStatusCode.ERROR_STATUS)
+
+  @needs_session()
+  def test_get_type_info(self):
+    get_type_info_req = TCLIService.TGetTypeInfoReq()
+    get_type_info_req.sessionHandle = self.session_handle
+    get_type_info_resp = self.hs2_client.GetTypeInfo(get_type_info_req)
+    TestHS2.check_response(get_type_info_resp)
+    fetch_results_req = TCLIService.TFetchResultsReq()
+    fetch_results_req.operationHandle = get_type_info_resp.operationHandle
+    fetch_results_req.maxRows = 100
+    fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
+    TestHS2.check_response(fetch_results_resp)
+    results = fetch_results_resp.results
+    types = ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE', 'DATE',
+             'TIMESTAMP', 'STRING', 'VARCHAR', 'DECIMAL', 'CHAR', 'ARRAY', 'MAP',
+             'STRUCT']
+    assert self.get_num_rows(results) == len(types)
+    # Validate that each type description (result row) has the required 18 fields as
+    # described in the DatabaseMetaData.getTypeInfo() documentation.
+    assert len(results.columns) == 18
+    typed_col = getattr(results.columns[0], 'stringVal')
+    for colType in types:
+      assert typed_col.values.count(colType) == 1