You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2018/10/12 18:07:04 UTC

[2/2] hive git commit: HIVE-20590 : Allow merge statement to have column schema (Miklos Gergely via Ashutosh Chauhan)

HIVE-20590 : Allow merge statement to have column schema (Miklos Gergely via Ashutosh Chauhan)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/beccce39
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/beccce39
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/beccce39

Branch: refs/heads/master
Commit: beccce3987ae409c65deb810cd571ba06088bae1
Parents: 24f7d24
Author: Miklos Gergely <mg...@hortonworks.com>
Authored: Tue Oct 2 06:22:00 2018 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Fri Oct 12 11:06:18 2018 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/ql/parse/HiveParser.g    |    4 +-
 .../hadoop/hive/ql/parse/SemanticAnalyzer.java  |   62 +-
 .../ql/parse/UpdateDeleteSemanticAnalyzer.java  |   88 +-
 .../generic/GenericUDFCardinalityViolation.java |   19 +-
 .../clientnegative/merge_column_mismatch.q      |   15 +
 .../queries/clientpositive/sqlmerge_stats.q     |   83 +-
 .../clientnegative/merge_column_mismatch.q.out  |   37 +
 .../clientpositive/llap/sqlmerge_stats.q.out    | 2065 +++++++++++++++++-
 8 files changed, 2288 insertions(+), 85 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index 78bc87c..bc95c46 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -2999,8 +2999,8 @@ whenNotMatchedClause
 @init { pushMsg("WHEN NOT MATCHED clause", state); }
 @after { popMsg(state); }
    :
-  KW_WHEN KW_NOT KW_MATCHED (KW_AND expression)? KW_THEN KW_INSERT KW_VALUES valueRowConstructor ->
-    ^(TOK_NOT_MATCHED ^(TOK_INSERT valueRowConstructor) expression?)
+  KW_WHEN KW_NOT KW_MATCHED (KW_AND expression)? KW_THEN KW_INSERT (targetCols=columnParenthesesList)? KW_VALUES valueRowConstructor ->
+    ^(TOK_NOT_MATCHED ^(TOK_INSERT $targetCols? valueRowConstructor) expression?)
   ;
 whenMatchedAndClause
 @init { pushMsg("WHEN MATCHED AND clause", state); }

http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 31bc38e..6a6e6c3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -726,17 +726,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
    * @throws SemanticException
    */
   private static List<String> getDefaultConstraints(Table tbl, List<String> targetSchema) throws SemanticException{
-    Map<String, String> colNameToDefaultVal =  null;
-    try {
-      DefaultConstraint dc = Hive.get().getEnabledDefaultConstraints(tbl.getDbName(), tbl.getTableName());
-      colNameToDefaultVal = dc.getColNameToDefaultValueMap();
-    } catch (Exception e) {
-      if (e instanceof SemanticException) {
-        throw (SemanticException) e;
-      } else {
-        throw (new RuntimeException(e));
-      }
-    }
+    Map<String, String> colNameToDefaultVal = getColNameToDefaultValueMap(tbl);
     List<String> defaultConstraints = new ArrayList<>();
     if(targetSchema != null) {
       for (String colName : targetSchema) {
@@ -751,6 +741,21 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     return defaultConstraints;
   }
 
+  protected static Map<String, String> getColNameToDefaultValueMap(Table tbl) throws SemanticException {
+    Map<String, String> colNameToDefaultVal = null;
+    try {
+      DefaultConstraint dc = Hive.get().getEnabledDefaultConstraints(tbl.getDbName(), tbl.getTableName());
+      colNameToDefaultVal = dc.getColNameToDefaultValueMap();
+    } catch (Exception e) {
+      if (e instanceof SemanticException) {
+        throw (SemanticException) e;
+      } else {
+        throw (new RuntimeException(e));
+      }
+    }
+    return colNameToDefaultVal;
+  }
+
   /**
    * Constructs an AST for given DEFAULT string
    * @param newValue
@@ -772,28 +777,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
     return newNode;
   }
 
-  public static String replaceDefaultKeywordForMerge(String valueClause,Table targetTable)
-      throws SemanticException {
-    List<String> defaultConstraints = null;
-    String[] values = valueClause.trim().split(",");
-    StringBuilder newValueClause = new StringBuilder();
-    for (int i = 0; i < values.length; i++) {
-      if (values[i].trim().toLowerCase().equals("`default`")) {
-        if (defaultConstraints == null) {
-          defaultConstraints = getDefaultConstraints(targetTable, null);
-        }
-        newValueClause.append(defaultConstraints.get(i));
-      }
-      else {
-        newValueClause.append(values[i]);
-      }
-      if(i != values.length-1) {
-        newValueClause.append(",");
-      }
-    }
-    return newValueClause.toString();
-  }
-
   /**
    * This method replaces ASTNode corresponding to DEFAULT keyword with either DEFAULT constraint
    *  expression if exists or NULL otherwise
@@ -4634,17 +4617,7 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
     // see if we need to fetch default constraints from metastore
     if(targetCol2Projection.size() < targetTableColNames.size()) {
-      try {
-          DefaultConstraint dc = Hive.get().getEnabledDefaultConstraints(target.getDbName(), target.getTableName());
-          colNameToDefaultVal = dc.getColNameToDefaultValueMap();
-      } catch (Exception e) {
-        if (e instanceof SemanticException) {
-          throw (SemanticException) e;
-        } else {
-          throw (new RuntimeException(e));
-        }
-      }
-
+      colNameToDefaultVal = getColNameToDefaultValueMap(target);
     }
     for (int i = 0; i < targetTableColNames.size(); i++) {
       String f = targetTableColNames.get(i);
@@ -6396,7 +6369,6 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
 
     for (Map.Entry<String, ASTNode> entry : aggregationTrees.entrySet()) {
       ASTNode value = entry.getValue();
-      ArrayList<ExprNodeDesc> aggParameters = new ArrayList<ExprNodeDesc>();
       // 0 is the function name
       for (int i = 1; i < value.getChildCount(); i++) {
         ASTNode paraExpr = (ASTNode) value.getChild(i);

http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index e8823e1..8651afd 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -32,6 +32,8 @@ import java.util.UUID;
 
 import org.antlr.runtime.TokenRewriteStream;
 import org.antlr.runtime.tree.Tree;
+import org.apache.commons.collections.MapUtils;
+import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
@@ -993,7 +995,6 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
         insClauseIdx < rewrittenTree.getChildCount() - (validating ? 1 : 0/*skip cardinality violation clause*/);
         insClauseIdx++, whenClauseIdx++) {
       //we've added Insert clauses in order or WHEN items in whenClauses
-      ASTNode insertClause = (ASTNode) rewrittenTree.getChild(insClauseIdx);
       switch (getWhenClauseOperation(whenClauses.get(whenClauseIdx)).getType()) {
         case HiveParser.TOK_INSERT:
           rewrittenCtx.addDestNamePrefix(insClauseIdx, Context.DestClausePrefix.INSERT);
@@ -1185,7 +1186,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
     String targetName = getSimpleTableName(target);
     rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
     addPartitionColsToInsert(targetTable.getPartCols(), rewrittenQueryStr);
-    rewrittenQueryStr.append("    -- update clause\n select ");
+    rewrittenQueryStr.append("    -- update clause\n SELECT ");
     if (hintStr != null) {
       rewrittenQueryStr.append(hintStr);
     }
@@ -1226,7 +1227,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
     if(deleteExtraPredicate != null) {
       rewrittenQueryStr.append(" AND NOT(").append(deleteExtraPredicate).append(")");
     }
-    rewrittenQueryStr.append("\n sort by ");
+    rewrittenQueryStr.append("\n SORT BY ");
     rewrittenQueryStr.append(targetName).append(".ROW__ID \n");
 
     setUpAccessControlInfoForUpdate(targetTable, setColsExprs);
@@ -1249,7 +1250,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
     rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
     addPartitionColsToInsert(partCols, rewrittenQueryStr);
 
-    rewrittenQueryStr.append("    -- delete clause\n select ");
+    rewrittenQueryStr.append("    -- delete clause\n SELECT ");
     if (hintStr != null) {
       rewrittenQueryStr.append(hintStr);
     }
@@ -1264,7 +1265,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
     if(updateExtraPredicate != null) {
       rewrittenQueryStr.append(" AND NOT(").append(updateExtraPredicate).append(")");
     }
-    rewrittenQueryStr.append("\n sort by ");
+    rewrittenQueryStr.append("\n SORT BY ");
     rewrittenQueryStr.append(targetName).append(".ROW__ID \n");
     return extraPredicate;
   }
@@ -1353,7 +1354,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
    */
   private String getWhenClausePredicate(ASTNode whenClause) {
     if(!(whenClause.getType() == HiveParser.TOK_MATCHED || whenClause.getType() == HiveParser.TOK_NOT_MATCHED)) {
-      throw  raiseWrongType("Expected TOK_MATCHED|TOK_NOT_MATCHED", whenClause);
+      throw raiseWrongType("Expected TOK_MATCHED|TOK_NOT_MATCHED", whenClause);
     }
     if(whenClause.getChildCount() == 2) {
       return getMatchedText((ASTNode)whenClause.getChild(1));
@@ -1366,33 +1367,80 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
    * @throws SemanticException
    */
   private void handleInsert(ASTNode whenNotMatchedClause, StringBuilder rewrittenQueryStr, ASTNode target,
-                            ASTNode onClause, Table targetTable,
-                            String targetTableNameInSourceQuery, String onClauseAsString, String hintStr) throws SemanticException {
+                            ASTNode onClause, Table targetTable, String targetTableNameInSourceQuery,
+                            String onClauseAsString, String hintStr) throws SemanticException {
+    ASTNode whenClauseOperation = getWhenClauseOperation(whenNotMatchedClause);
     assert whenNotMatchedClause.getType() == HiveParser.TOK_NOT_MATCHED;
-    assert getWhenClauseOperation(whenNotMatchedClause).getType() == HiveParser.TOK_INSERT;
-    List<FieldSchema> partCols = targetTable.getPartCols();
-    String valuesClause = getMatchedText((ASTNode)getWhenClauseOperation(whenNotMatchedClause).getChild(0));
-    valuesClause = valuesClause.substring(1, valuesClause.length() - 1);//strip '(' and ')'
-    valuesClause = SemanticAnalyzer.replaceDefaultKeywordForMerge(valuesClause, targetTable);
+    assert whenClauseOperation.getType() == HiveParser.TOK_INSERT;
+
+    // identify the node that contains the values to insert and the optional column list node
+    ArrayList<Node> children = whenClauseOperation.getChildren();
+    ASTNode valuesNode =
+        (ASTNode)children.stream().filter(n -> ((ASTNode)n).getType() == HiveParser.TOK_FUNCTION).findFirst().get();
+    ASTNode columnListNode =
+        (ASTNode)children.stream().filter(n -> ((ASTNode)n).getType() == HiveParser.TOK_TABCOLNAME).findFirst()
+        .orElse(null);
+
+    // if column list is specified, then it has to have the same number of elements as the values
+    // valuesNode has a child for struct, the rest are the columns
+    if (columnListNode != null && columnListNode.getChildCount() != (valuesNode.getChildCount() - 1)) {
+      throw new SemanticException(String.format("Column schema must have the same length as values (%d vs %d)",
+          columnListNode.getChildCount(), valuesNode.getChildCount() - 1));
+    }
 
     rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
-    addPartitionColsToInsert(partCols, rewrittenQueryStr);
+    if (columnListNode != null) {
+      rewrittenQueryStr.append(' ').append(getMatchedText(columnListNode));
+    }
+    addPartitionColsToInsert(targetTable.getPartCols(), rewrittenQueryStr);
 
-    OnClauseAnalyzer oca = new OnClauseAnalyzer(onClause, targetTable, targetTableNameInSourceQuery,
-      conf, onClauseAsString);
-    oca.analyze();
-    rewrittenQueryStr.append("    -- insert clause\n  select ");
+    rewrittenQueryStr.append("    -- insert clause\n  SELECT ");
     if (hintStr != null) {
       rewrittenQueryStr.append(hintStr);
     }
+
+    OnClauseAnalyzer oca = new OnClauseAnalyzer(onClause, targetTable, targetTableNameInSourceQuery,
+        conf, onClauseAsString);
+    oca.analyze();
+
+    String valuesClause = getMatchedText(valuesNode);
+    valuesClause = valuesClause.substring(1, valuesClause.length() - 1);//strip '(' and ')'
+    valuesClause = replaceDefaultKeywordForMerge(valuesClause, targetTable, columnListNode);
     rewrittenQueryStr.append(valuesClause).append("\n   WHERE ").append(oca.getPredicate());
+
     String extraPredicate = getWhenClausePredicate(whenNotMatchedClause);
-    if(extraPredicate != null) {
+    if (extraPredicate != null) {
       //we have WHEN NOT MATCHED AND <boolean expr> THEN INSERT
       rewrittenQueryStr.append(" AND ")
         .append(getMatchedText(((ASTNode)whenNotMatchedClause.getChild(1)))).append('\n');
     }
   }
+
+  private String replaceDefaultKeywordForMerge(String valueClause, Table table, ASTNode columnListNode)
+      throws SemanticException {
+    if (!valueClause.toLowerCase().contains("`default`")) {
+      return valueClause;
+    }
+
+    Map<String, String> colNameToDefaultConstraint = getColNameToDefaultValueMap(table);
+    String[] values = valueClause.trim().split(",");
+    String[] replacedValues = new String[values.length];
+
+    // the list of the column names may be set in the query
+    String[] columnNames = columnListNode == null ?
+      table.getAllCols().stream().map(f -> f.getName()).toArray(size -> new String[size]) :
+      columnListNode.getChildren().stream().map(n -> ((ASTNode)n).toString()).toArray(size -> new String[size]);
+
+    for (int i = 0; i < values.length; i++) {
+      if (values[i].trim().toLowerCase().equals("`default`")) {
+        replacedValues[i] = MapUtils.getString(colNameToDefaultConstraint, columnNames[i], "null");
+      } else {
+        replacedValues[i] = values[i];
+      }
+    }
+    return StringUtils.join(replacedValues, ',');
+  }
+
   /**
    * Suppose the input Merge statement has ON target.a = source.b and c = d.  Assume, that 'c' is from
    * target table and 'd' is from source expression.  In order to properly
@@ -1503,7 +1551,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
       List<String> targetCols = table2column.get(targetTableNameInSourceQuery.toLowerCase());
       if(targetCols == null) {
         /*e.g. ON source.t=1
-        * this is not strictly speaking invlaid but it does ensure that all columns from target
+        * this is not strictly speaking invalid but it does ensure that all columns from target
         * table are all NULL for every row.  This would make any WHEN MATCHED clause invalid since
         * we don't have a ROW__ID.  The WHEN NOT MATCHED could be meaningful but it's just data from
         * source satisfying source.t=1...  not worth the effort to support this*/

http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java
index b688447..b3c1a06 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCardinalityViolation.java
@@ -18,29 +18,18 @@
 
 package org.apache.hadoop.hive.ql.udf.generic;
 
-import java.util.ArrayList;
-
 import org.apache.hadoop.hive.ql.exec.Description;
 import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
-import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
-import org.apache.hadoop.hive.serde2.objectinspector.primitive.VoidObjectInspector;
-import org.apache.logging.log4j.core.layout.StringBuilderEncoder;
 
 /**
- * GenericUDFArray.
- *
+ * Function intended to fail. It is used in query parts which should not return anything, and thus mark the problem.
  */
 @Description(name = "cardinality_violation",
   value = "_FUNC_(n0, n1...) - raises Cardinality Violation")
 public class GenericUDFCardinalityViolation extends GenericUDF {
-  private transient Converter[] converters;
-  private transient ArrayList<Object> ret = new ArrayList<Object>();
 
   @Override
   public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
@@ -50,8 +39,10 @@ public class GenericUDFCardinalityViolation extends GenericUDF {
   @Override
   public Object evaluate(DeferredObject[] arguments) throws HiveException {
     StringBuilder nonUniqueKey = new StringBuilder();
-    for(DeferredObject t : arguments) {
-      if(nonUniqueKey.length() > 0) {nonUniqueKey.append(','); }
+    for (DeferredObject t : arguments) {
+      if (nonUniqueKey.length() > 0) {
+        nonUniqueKey.append(',');
+      }
       nonUniqueKey.append(t.get());
     }
     throw new RuntimeException("Cardinality Violation in Merge statement: " + nonUniqueKey);

http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/test/queries/clientnegative/merge_column_mismatch.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientnegative/merge_column_mismatch.q b/ql/src/test/queries/clientnegative/merge_column_mismatch.q
new file mode 100644
index 0000000..5f78ea4
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/merge_column_mismatch.q
@@ -0,0 +1,15 @@
+set hive.mapred.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.explain.user=false;
+set hive.merge.cardinality.check=true;
+
+create table t (a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+create table upd_t (a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into t values (1,2), (2,4);
+insert into upd_t values (1,3), (3,5);
+
+merge into t as t using upd_t as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = default
+WHEN NOT MATCHED THEN INSERT (a) VALUES(u.a, default);

http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/test/queries/clientpositive/sqlmerge_stats.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/sqlmerge_stats.q b/ql/src/test/queries/clientpositive/sqlmerge_stats.q
index c480eb6..453060e 100644
--- a/ql/src/test/queries/clientpositive/sqlmerge_stats.q
+++ b/ql/src/test/queries/clientpositive/sqlmerge_stats.q
@@ -29,10 +29,89 @@ desc formatted t;
 
 merge into t as t using upd_t as u ON t.a = u.a 
 WHEN MATCHED THEN DELETE
-WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b);
-
+WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b);
 
 select assert_true(count(1) = 0) from t group by a>-1;
 -- rownum is 0; because the orc writer can keep track of delta
 desc formatted t;
 
+create table t2(a int, b int, c int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+create table upd_t2_1(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false');
+create table upd_t2_2(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false');
+create table upd_t2_3(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false');
+create table upd_t2_4(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='false');
+
+desc formatted t2;
+
+insert into t2 (a, b) values (1,1), (3,3), (5,5), (7,7);
+insert into upd_t2_1 values (1,1),(2,2);
+insert into upd_t2_2 values (3,3),(4,4);
+insert into upd_t2_3 values (5,5),(6,6);
+insert into upd_t2_4 values (7,7),(8,8);
+
+explain merge into t2 as t using upd_t2_1 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 99
+WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b, default);
+
+merge into t2 as t using upd_t2_1 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 99
+WHEN NOT MATCHED THEN INSERT VALUES(u.a, u.b, default);
+
+explain merge into t2 as t using upd_t2_2 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 98
+WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b);
+
+merge into t2 as t using upd_t2_2 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 98
+WHEN NOT MATCHED THEN INSERT (a, b) VALUES(u.a, u.b);
+
+explain merge into t2 as t using upd_t2_3 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 97
+WHEN NOT MATCHED THEN INSERT (a, b, c) VALUES(u.a, u.b, default);
+
+merge into t2 as t using upd_t2_3 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 97
+WHEN NOT MATCHED THEN INSERT (a, b, c) VALUES(u.a, u.b, default);
+
+explain merge into t2 as t using upd_t2_4 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 96
+WHEN NOT MATCHED THEN INSERT (b, c, a) VALUES(u.b, default, u.a);
+
+merge into t2 as t using upd_t2_4 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = 96
+WHEN NOT MATCHED THEN INSERT (b, c, a) VALUES(u.b, default, u.a);
+
+select * from t2;
+
+create table t3(a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+create table upd_t3(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into t3 values (1,2), (2,4);
+insert into upd_t3 values (1,3), (3,5);
+
+explain merge into t3 as t using upd_t3 as u ON t.a = u.a
+WHEN MATCHED THEN DELETE
+WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b);
+
+merge into t3 as t using upd_t3 as u ON t.a = u.a
+WHEN MATCHED THEN DELETE
+WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b);
+
+select * from t3;
+
+create table t4(a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+create table upd_t4(a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true');
+
+insert into t4 values (1,2), (2,4);
+insert into upd_t4 values (1,3), (3,5);
+
+explain merge into t4 as t using upd_t4 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = default
+WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b);
+
+merge into t4 as t using upd_t4 as u ON t.a = u.a
+WHEN MATCHED THEN UPDATE SET b = default
+WHEN NOT MATCHED THEN INSERT (b, a) VALUES(default, u.b);
+
+select * from t4;
+

http://git-wip-us.apache.org/repos/asf/hive/blob/beccce39/ql/src/test/results/clientnegative/merge_column_mismatch.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientnegative/merge_column_mismatch.q.out b/ql/src/test/results/clientnegative/merge_column_mismatch.q.out
new file mode 100644
index 0000000..844986e
--- /dev/null
+++ b/ql/src/test/results/clientnegative/merge_column_mismatch.q.out
@@ -0,0 +1,37 @@
+PREHOOK: query: create table t (a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@t
+POSTHOOK: query: create table t (a int, b int default 1) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@t
+PREHOOK: query: create table upd_t (a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@upd_t
+POSTHOOK: query: create table upd_t (a int, b int) clustered by (a) into 2 buckets stored as orc TBLPROPERTIES ('transactional'='true')
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@upd_t
+PREHOOK: query: insert into t values (1,2), (2,4)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@t
+POSTHOOK: query: insert into t values (1,2), (2,4)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@t
+POSTHOOK: Lineage: t.a SCRIPT []
+POSTHOOK: Lineage: t.b SCRIPT []
+PREHOOK: query: insert into upd_t values (1,3), (3,5)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@upd_t
+POSTHOOK: query: insert into upd_t values (1,3), (3,5)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@upd_t
+POSTHOOK: Lineage: upd_t.a SCRIPT []
+POSTHOOK: Lineage: upd_t.b SCRIPT []
+FAILED: SemanticException Column schema must have the same length as values (1 vs 2)