You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jd...@apache.org on 2016/05/03 22:31:55 UTC

[42/45] hive git commit: HIVE-13178: Enhance ORC Schema Evolution to handle more standard data type conversions (Matt McCline, reviewed by Prasanth Jayachandran)

HIVE-13178: Enhance ORC Schema Evolution to handle more standard data type conversions (Matt McCline, reviewed by Prasanth Jayachandran)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a16058e1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a16058e1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a16058e1

Branch: refs/heads/llap
Commit: a16058e10025d9f7af75c2283727c7c176b770e2
Parents: e1b0383
Author: Matt McCline <mm...@hortonworks.com>
Authored: Tue May 3 02:35:00 2016 -0700
Committer: Matt McCline <mm...@hortonworks.com>
Committed: Tue May 3 02:35:00 2016 -0700

----------------------------------------------------------------------
 .../org/apache/hadoop/hive/conf/HiveConf.java   |    2 +-
 .../test/resources/testconfiguration.properties |    4 +
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java |   53 -
 .../hive/ql/exec/vector/VectorExtractRow.java   |   12 +-
 .../ql/exec/vector/VectorizationContext.java    |    2 +-
 .../ql/io/orc/ConvertTreeReaderFactory.java     | 3750 +++++++++++++
 .../hadoop/hive/ql/io/orc/SchemaEvolution.java  |   29 +-
 .../hive/ql/io/orc/TreeReaderFactory.java       |   86 +-
 .../clientnegative/orc_replace_columns2.q       |    5 +-
 .../clientnegative/orc_replace_columns2_acid.q  |    5 +-
 .../clientnegative/orc_replace_columns3.q       |    3 +
 .../clientnegative/orc_replace_columns3_acid.q  |    3 +
 .../clientnegative/orc_type_promotion1.q        |    7 +-
 .../clientnegative/orc_type_promotion1_acid.q   |    7 +-
 .../clientnegative/orc_type_promotion2.q        |    5 +-
 .../clientnegative/orc_type_promotion2_acid.q   |    5 +-
 .../clientnegative/orc_type_promotion3.q        |    5 +-
 .../clientnegative/orc_type_promotion3_acid.q   |    5 +-
 .../clientpositive/orc_int_type_promotion.q     |    4 +
 .../clientpositive/orc_schema_evolution.q       |    2 +
 .../schema_evol_orc_acid_mapwork_part.q         |  846 ++-
 .../schema_evol_orc_acid_mapwork_table.q        |  804 ++-
 .../schema_evol_orc_acidvec_mapwork_part.q      |  843 ++-
 .../schema_evol_orc_acidvec_mapwork_table.q     |  801 ++-
 .../schema_evol_orc_nonvec_fetchwork_part.q     |  831 ++-
 .../schema_evol_orc_nonvec_fetchwork_table.q    |  824 ++-
 .../schema_evol_orc_nonvec_mapwork_part.q       |  833 ++-
 ...a_evol_orc_nonvec_mapwork_part_all_complex.q |  162 +
 ...evol_orc_nonvec_mapwork_part_all_primitive.q |  481 ++
 .../schema_evol_orc_nonvec_mapwork_table.q      |  824 ++-
 .../schema_evol_orc_vec_mapwork_part.q          |  831 ++-
 ...hema_evol_orc_vec_mapwork_part_all_complex.q |  162 +
 ...ma_evol_orc_vec_mapwork_part_all_primitive.q |  481 ++
 .../schema_evol_orc_vec_mapwork_table.q         |  819 ++-
 .../schema_evol_text_nonvec_mapwork_part.q      |    5 +-
 ..._evol_text_nonvec_mapwork_part_all_complex.q |    5 +-
 ...vol_text_nonvec_mapwork_part_all_primitive.q |    5 +-
 .../schema_evol_text_nonvec_mapwork_table.q     |    5 +-
 .../schema_evol_text_vec_mapwork_part.q         |    2 +-
 ...ema_evol_text_vec_mapwork_part_all_complex.q |    2 +-
 ...a_evol_text_vec_mapwork_part_all_primitive.q |    2 +-
 .../schema_evol_text_vec_mapwork_table.q        |    2 +-
 .../schema_evol_text_vecrow_mapwork_part.q      |    2 +-
 ..._evol_text_vecrow_mapwork_part_all_complex.q |    2 +-
 ...vol_text_vecrow_mapwork_part_all_primitive.q |    2 +-
 .../schema_evol_text_vecrow_mapwork_table.q     |    2 +-
 .../clientnegative/orc_replace_columns2.q.out   |   13 +-
 .../orc_replace_columns2_acid.q.out             |   13 +-
 .../clientnegative/orc_replace_columns3.q.out   |   11 +-
 .../orc_replace_columns3_acid.q.out             |   11 +-
 .../clientnegative/orc_type_promotion1.q.out    |   13 +-
 .../orc_type_promotion1_acid.q.out              |   13 +-
 .../clientnegative/orc_type_promotion2.q.out    |   13 +-
 .../orc_type_promotion2_acid.q.out              |   13 +-
 .../clientnegative/orc_type_promotion3.q.out    |   11 +-
 .../clientnegative/orc_type_promotion3_acid.q   |   18 +
 .../orc_type_promotion3_acid.q.out              |   11 +-
 .../schema_evol_orc_acid_mapwork_part.q.out     | 4319 ++++++++++++---
 .../schema_evol_orc_acid_mapwork_table.q.out    | 3334 ++++++++++--
 .../schema_evol_orc_acidvec_mapwork_part.q.out  | 4319 ++++++++++++---
 .../schema_evol_orc_acidvec_mapwork_table.q.out | 3334 ++++++++++--
 .../schema_evol_orc_nonvec_fetchwork_part.q.out | 4905 +++++++++++++++--
 ...schema_evol_orc_nonvec_fetchwork_table.q.out | 4367 +++++++++++++++-
 .../schema_evol_orc_nonvec_mapwork_part.q.out   | 4909 +++++++++++++++--
 ...ol_orc_nonvec_mapwork_part_all_complex.q.out |  726 +++
 ..._orc_nonvec_mapwork_part_all_primitive.q.out | 2872 ++++++++++
 .../schema_evol_orc_nonvec_mapwork_table.q.out  | 4367 +++++++++++++++-
 .../schema_evol_orc_vec_mapwork_part.q.out      | 4929 ++++++++++++++++--
 ..._evol_orc_vec_mapwork_part_all_complex.q.out |  726 +++
 ...vol_orc_vec_mapwork_part_all_primitive.q.out | 2887 ++++++++++
 .../schema_evol_orc_vec_mapwork_table.q.out     | 4391 +++++++++++++++-
 .../tez/schema_evol_orc_acid_mapwork_part.q.out | 4319 ++++++++++++---
 .../schema_evol_orc_acid_mapwork_table.q.out    | 3334 ++++++++++--
 .../schema_evol_orc_acidvec_mapwork_part.q.out  | 4319 ++++++++++++---
 .../schema_evol_orc_acidvec_mapwork_table.q.out | 3334 ++++++++++--
 .../schema_evol_orc_nonvec_fetchwork_part.q.out | 4449 ++++++++++++++--
 ...schema_evol_orc_nonvec_fetchwork_table.q.out | 3911 +++++++++++++-
 .../schema_evol_orc_nonvec_mapwork_part.q.out   | 4453 ++++++++++++++--
 ...ol_orc_nonvec_mapwork_part_all_complex.q.out |  669 +++
 ..._orc_nonvec_mapwork_part_all_primitive.q.out | 2587 +++++++++
 .../schema_evol_orc_nonvec_mapwork_table.q.out  | 3911 +++++++++++++-
 .../tez/schema_evol_orc_vec_mapwork_part.q.out  | 4449 ++++++++++++++--
 ..._evol_orc_vec_mapwork_part_all_complex.q.out |  669 +++
 ...vol_orc_vec_mapwork_part_all_primitive.q.out | 2587 +++++++++
 .../tez/schema_evol_orc_vec_mapwork_table.q.out | 3911 +++++++++++++-
 85 files changed, 100951 insertions(+), 10048 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a16058e1/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index caadf2a..06a6906 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1106,7 +1106,7 @@ public class HiveConf extends Configuration {
         "than this threshold, it will try to convert the common join into map join"),
 
 
-    HIVE_SCHEMA_EVOLUTION("hive.exec.schema.evolution", false,
+    HIVE_SCHEMA_EVOLUTION("hive.exec.schema.evolution", true,
         "Use schema evolution to convert self-describing file format's data to the schema desired by the reader."),
 
     HIVE_TRANSACTIONAL_TABLE_SCAN("hive.transactional.table.scan", false,

http://git-wip-us.apache.org/repos/asf/hive/blob/a16058e1/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index c791ede..88381aa 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -193,8 +193,12 @@ minitez.query.files.shared=acid_globallimit.q,\
   schema_evol_orc_nonvec_fetchwork_part.q,\
   schema_evol_orc_nonvec_fetchwork_table.q,\
   schema_evol_orc_nonvec_mapwork_part.q,\
+  schema_evol_orc_nonvec_mapwork_part_all_complex.q,\
+  schema_evol_orc_nonvec_mapwork_part_all_primitive.q,\
   schema_evol_orc_nonvec_mapwork_table.q,\
   schema_evol_orc_vec_mapwork_part.q,\
+  schema_evol_orc_vec_mapwork_part_all_complex.q,\
+  schema_evol_orc_vec_mapwork_part_all_primitive.q,\
   schema_evol_orc_vec_mapwork_table.q,\
   schema_evol_text_nonvec_mapwork_part.q,\
   schema_evol_text_nonvec_mapwork_part_all_complex.q,\

http://git-wip-us.apache.org/repos/asf/hive/blob/a16058e1/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index 5c20caa..9887d77 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -3363,12 +3363,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
             && !oldColName.equalsIgnoreCase(oldName)) {
           throw new HiveException(ErrorMsg.DUPLICATE_COLUMN_NAMES, newName);
         } else if (oldColName.equalsIgnoreCase(oldName)) {
-          // if orc table, restrict changing column types. Only integer type promotion is supported.
-          // smallint -> int -> bigint
-          if (isOrcSchemaEvolution && !isSupportedTypeChange(col.getType(), type)) {
-            throw new HiveException(ErrorMsg.CANNOT_CHANGE_COLUMN_TYPE, col.getType(), type,
-                newName);
-          }
           col.setName(newName);
           if (type != null && !type.trim().equals("")) {
             col.setType(type);
@@ -3435,15 +3429,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
         if (replaceCols.size() < existingCols.size()) {
           throw new HiveException(ErrorMsg.REPLACE_CANNOT_DROP_COLUMNS, alterTbl.getOldName());
         }
-
-        for (int i = 0; i < existingCols.size(); i++) {
-          final String currentColType = existingCols.get(i).getType().toLowerCase().trim();
-          final String newColType = replaceCols.get(i).getType().toLowerCase().trim();
-          if (!isSupportedTypeChange(currentColType, newColType)) {
-            throw new HiveException(ErrorMsg.REPLACE_UNSUPPORTED_TYPE_CONVERSION, currentColType,
-                newColType, replaceCols.get(i).getName());
-          }
-        }
       }
       sd.setCols(alterTbl.getNewCols());
     } else if (alterTbl.getOp() == AlterTableDesc.AlterTableTypes.ADDPROPS) {
@@ -3611,44 +3596,6 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
     return 0;
   }
 
-  // don't change the order of enums as ordinal values are used to check for valid type promotions
-  enum PromotableTypes {
-    SMALLINT,
-    INT,
-    BIGINT;
-
-    static List<String> types() {
-      return ImmutableList.of(SMALLINT.toString().toLowerCase(),
-          INT.toString().toLowerCase(), BIGINT.toString().toLowerCase());
-    }
-  }
-
-  // for ORC, only supported type promotions are smallint -> int -> bigint. No other
-  // type promotions are supported at this point
-  private boolean isSupportedTypeChange(String currentType, String newType) {
-    if (currentType != null && newType != null) {
-      currentType = currentType.toLowerCase().trim();
-      newType = newType.toLowerCase().trim();
-      // no type change
-      if (currentType.equals(newType)) {
-        return true;
-      }
-      if (PromotableTypes.types().contains(currentType)
-          && PromotableTypes.types().contains(newType)) {
-        PromotableTypes pCurrentType = PromotableTypes.valueOf(currentType.toUpperCase());
-        PromotableTypes pNewType = PromotableTypes.valueOf(newType.toUpperCase());
-        if (pNewType.ordinal() >= pCurrentType.ordinal()) {
-          return true;
-        } else {
-          return false;
-        }
-      } else {
-        return false;
-      }
-    }
-    return true;
-  }
-
   /**
    * Drop a given table or some partitions. DropTableDesc is currently used for both.
    *

http://git-wip-us.apache.org/repos/asf/hive/blob/a16058e1/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
index c965dc8..b7b5ae8 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExtractRow.java
@@ -264,7 +264,7 @@ public class VectorExtractRow {
             int length = bytesColVector.length[adjustedIndex];
 
             if (bytes == null) {
-              LOG.info("null string entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
+              nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum);
             }
 
             // Use org.apache.hadoop.io.Text as our helper to go from byte[] to String.
@@ -280,7 +280,7 @@ public class VectorExtractRow {
             int length = bytesColVector.length[adjustedIndex];
 
             if (bytes == null) {
-              LOG.info("null varchar entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
+              nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum);
             }
 
             int adjustedLength = StringExpr.truncate(bytes, start, length,
@@ -299,7 +299,7 @@ public class VectorExtractRow {
             int length = bytesColVector.length[adjustedIndex];
 
             if (bytes == null) {
-              LOG.info("null char entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
+              nullBytesReadError(primitiveCategory, batchIndex, projectionColumnNum);
             }
 
             int adjustedLength = StringExpr.rightTrimAndTruncate(bytes, start, length,
@@ -343,4 +343,10 @@ public class VectorExtractRow {
       objects[i] = extractRowColumn(batch, batchIndex, i);
     }
   }
+
+  private void nullBytesReadError(PrimitiveCategory primitiveCategory, int batchIndex,
+    int projectionColumnNum) {
+    throw new RuntimeException("null " + primitiveCategory.name() +
+        " entry: batchIndex " + batchIndex + " projection column num " + projectionColumnNum);
+  }
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/a16058e1/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
index 5c55011..5454ba3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
@@ -787,7 +787,7 @@ public class VectorizationContext {
                 || arg0Type(expr).equals("float"))) {
       return true;
     } else if (gudf instanceof GenericUDFBetween && (mode == Mode.PROJECTION)) {
-      // between has 4 args here, but can be vectorized like this 
+      // between has 4 args here, but can be vectorized like this
       return true;
     }
     return false;