You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ek...@apache.org on 2016/12/08 02:13:41 UTC
[2/2] hive git commit: HIVE-15307 Hive MERGE: "when matched then update" allows invalid column names. (Eugene Koifman, reviewed by Alan Gates)

HIVE-15307 Hive MERGE: "when matched then update" allows invalid column names. (Eugene Koifman, reviewed by Alan Gates)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/b920fa73
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/b920fa73
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/b920fa73

Branch: refs/heads/master
Commit: b920fa735ea7910af228a4c327168b11b73424f3
Parents: a88141f
Author: Eugene Koifman <ek...@hortonworks.com>
Authored: Wed Dec 7 18:09:47 2016 -0800
Committer: Eugene Koifman <ek...@hortonworks.com>
Committed: Wed Dec 7 18:09:47 2016 -0800

----------------------------------------------------------------------
 .../TestHiveAuthorizerCheckInvocation.java      |  4 +-
 .../org/apache/hadoop/hive/ql/ErrorMsg.java     |  1 +
 .../ql/parse/UpdateDeleteSemanticAnalyzer.java  | 50 ++++++++++----------
 .../apache/hadoop/hive/ql/TestTxnCommands.java  | 17 ++++++-
 4 files changed, 45 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/b920fa73/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java
index 9aca713..f973f8d 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/security/authorization/plugin/TestHiveAuthorizerCheckInvocation.java
@@ -329,7 +329,7 @@ public class TestHiveAuthorizerCheckInvocation {
   public void testUpdateSomeColumnsUsedExprInSet() throws HiveAuthzPluginException,
       HiveAccessControlException, CommandNeedRetryException {
     reset(mockedAuthorizer);
-    int status = driver.compile("update " + acidTableName + " set i = 5, l = k where j = 3");
+    int status = driver.compile("update " + acidTableName + " set i = 5, j = k where j = 3");
     assertEquals(0, status);
 
     Pair<List<HivePrivilegeObject>, List<HivePrivilegeObject>> io = getHivePrivilegeObjectInputs();
@@ -337,7 +337,7 @@ public class TestHiveAuthorizerCheckInvocation {
     HivePrivilegeObject tableObj = outputs.get(0);
     LOG.debug("Got privilege object " + tableObj);
     assertEquals("no of columns used", 2, tableObj.getColumns().size());
-    assertEquals("Columns used", Arrays.asList("i", "l"),
+    assertEquals("Columns used", Arrays.asList("i", "j"),
         getSortedList(tableObj.getColumns()));
     List<HivePrivilegeObject> inputs = io.getLeft();
     assertEquals(1, inputs.size());

http://git-wip-us.apache.org/repos/asf/hive/blob/b920fa73/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
index a315057..306c57f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/ErrorMsg.java
@@ -460,6 +460,7 @@ public enum ErrorMsg {
   MERGE_TOO_MANY_DELETE(10405, "MERGE statment can have at most 1 WHEN MATCHED ... DELETE clause: <{0}>", true),
   MERGE_TOO_MANY_UPDATE(10406, "MERGE statment can have at most 1 WHEN MATCHED ... UPDATE clause: <{0}>", true),
   INVALID_JOIN_CONDITION(10407, "Error parsing condition in outer join"),
+  INVALID_TARGET_COLUMN_IN_SET_CLAUSE(10408, "Target column \"{0}\" of set clause is not found in table \"{1}\".", true),
   //========================== 20000 range starts here ========================//
   SCRIPT_INIT_ERROR(20000, "Unable to initialize custom script."),
   SCRIPT_IO_ERROR(20001, "An error occurred while reading or writing to your custom script. "

http://git-wip-us.apache.org/repos/asf/hive/blob/b920fa73/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
index 55a3735..027eb68 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/UpdateDeleteSemanticAnalyzer.java
@@ -146,22 +146,30 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
    * Assert that we are not asked to update a bucketing column or partition column
    * @param colName it's the A in "SET A = B"
    */
-  private void checkValidSetClauseTarget(ASTNode colName, List<FieldSchema> partCols,
-                                         List<String> bucketingCols) throws SemanticException {
+  private void checkValidSetClauseTarget(ASTNode colName, Table targetTable) throws SemanticException {
     String columnName = normalizeColName(colName.getText());
 
     // Make sure this isn't one of the partitioning columns, that's not supported.
-    if (partCols != null) {
-      for (FieldSchema fschema : partCols) {
-        if (fschema.getName().equalsIgnoreCase(columnName)) {
-          throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_PART_VALUE.getMsg());
-        }
+    for (FieldSchema fschema : targetTable.getPartCols()) {
+      if (fschema.getName().equalsIgnoreCase(columnName)) {
+        throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_PART_VALUE.getMsg());
       }
     }
     //updating bucket column should move row from one file to another - not supported
-    if(bucketingCols != null && bucketingCols.contains(columnName)) {
+    if(targetTable.getBucketCols() != null && targetTable.getBucketCols().contains(columnName)) {
       throw new SemanticException(ErrorMsg.UPDATE_CANNOT_UPDATE_BUCKET_VALUE,columnName);
     }
+    boolean foundColumnInTargetTable = false;
+    for(FieldSchema col : targetTable.getCols()) {
+      if(columnName.equalsIgnoreCase(col.getName())) {
+        foundColumnInTargetTable = true;
+        break;
+      }
+    }
+    if(!foundColumnInTargetTable) {
+      throw new SemanticException(ErrorMsg.INVALID_TARGET_COLUMN_IN_SET_CLAUSE, colName.getText(),
+        getDotName(new String[] {targetTable.getDbName(), targetTable.getTableName()}));
+    }
   }
   private ASTNode findLHSofAssignment(ASTNode assignment) {
     assert assignment.getToken().getType() == HiveParser.EQUAL :
@@ -174,9 +182,8 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
       "Expected column name";
     return colName;
   }
-  private Map<String, ASTNode> collectSetColumnsAndExpressions(
-    ASTNode setClause,List<FieldSchema> partCols, List<String> bucketingCols, Set<String> setRCols)
-    throws SemanticException {
+  private Map<String, ASTNode> collectSetColumnsAndExpressions(ASTNode setClause,
+                         Set<String> setRCols, Table targetTable) throws SemanticException {
     // An update needs to select all of the columns, as we rewrite the entire row.  Also,
     // we need to figure out which columns we are going to replace.
     assert setClause.getToken().getType() == HiveParser.TOK_SET_COLUMNS_CLAUSE :
@@ -192,7 +199,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
       if(setRCols != null) {
         addSetRCols((ASTNode) assignment.getChildren().get(1), setRCols);
       }
-      checkValidSetClauseTarget(colName, partCols, bucketingCols);
+      checkValidSetClauseTarget(colName, targetTable);
 
       String columnName = normalizeColName(colName.getText());
       // This means that in UPDATE T SET x = _something_
@@ -338,13 +345,10 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
     Table mTable = getTargetTable(tabName);
     validateTargetTable(mTable);
 
-    List<FieldSchema> partCols = mTable.getPartCols();
-    List<String> bucketingCols = mTable.getBucketCols();
-
     rewrittenQueryStr.append("insert into table ");
     rewrittenQueryStr.append(getFullTableNameForSQL(tabName));
 
-    addPartitionColsToInsert(partCols, rewrittenQueryStr);
+    addPartitionColsToInsert(mTable.getPartCols(), rewrittenQueryStr);
 
     rewrittenQueryStr.append(" select ROW__ID");
 
@@ -358,8 +362,8 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
       // The set list from update should be the second child (index 1)
       assert children.size() >= 2 : "Expected update token to have at least two children";
       ASTNode setClause = (ASTNode)children.get(1);
-      setCols = collectSetColumnsAndExpressions(setClause, partCols, bucketingCols, setRCols);
-      setColExprs = new HashMap<Integer, ASTNode>(setClause.getChildCount());
+      setCols = collectSetColumnsAndExpressions(setClause, setRCols, mTable);
+      setColExprs = new HashMap<>(setClause.getChildCount());
 
       List<FieldSchema> nonPartCols = mTable.getCols();
       for (int i = 0; i < nonPartCols.size(); i++) {
@@ -376,7 +380,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
       }
     }
 
-    addPartitionColsToSelect(partCols, rewrittenQueryStr, null);
+    addPartitionColsToSelect(mTable.getPartCols(), rewrittenQueryStr, null);
     rewrittenQueryStr.append(" from ");
     rewrittenQueryStr.append(getFullTableNameForSQL(tabName));
 
@@ -786,17 +790,15 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
                               String deleteExtraPredicate) throws SemanticException {
     assert whenMatchedUpdateClause.getType() == HiveParser.TOK_MATCHED;
     assert getWhenClauseOperation(whenMatchedUpdateClause).getType() == HiveParser.TOK_UPDATE;
-    List<FieldSchema> partCols = targetTable.getPartCols();
-    List<String> bucketingCols = targetTable.getBucketCols();
     String targetName = getSimpleTableName(target);
     rewrittenQueryStr.append("INSERT INTO ").append(getFullTableNameForSQL(target));
-    addPartitionColsToInsert(partCols, rewrittenQueryStr);
+    addPartitionColsToInsert(targetTable.getPartCols(), rewrittenQueryStr);
     rewrittenQueryStr.append("\n select ").append(targetName).append(".ROW__ID");
 
     ASTNode setClause = (ASTNode)getWhenClauseOperation(whenMatchedUpdateClause).getChild(0);
     //columns being updated -> update expressions; "setRCols" (last param) is null because we use actual expressions
     //before reparsing, i.e. they are known to SemanticAnalyzer logic
-    Map<String, ASTNode> setColsExprs = collectSetColumnsAndExpressions(setClause, partCols, bucketingCols, null);
+    Map<String, ASTNode> setColsExprs = collectSetColumnsAndExpressions(setClause, null, targetTable);
     //if target table has cols c1,c2,c3 and p1 partition col and we had "SET c2 = 5, c1 = current_date()" we want to end up with
     //insert into target (p1) select current_date(), 5, c3, p1 where ....
     //since we take the RHS of set exactly as it was in Input, we don't need to deal with quoting/escaping column/table names
@@ -812,7 +814,7 @@ public class UpdateDeleteSemanticAnalyzer extends SemanticAnalyzer {
         rewrittenQueryStr.append(getSimpleTableName(target)).append(".").append(HiveUtils.unparseIdentifier(name, this.conf));
       }
     }
-    addPartitionColsToSelect(partCols, rewrittenQueryStr, targetName);
+    addPartitionColsToSelect(targetTable.getPartCols(), rewrittenQueryStr, targetName);
     rewrittenQueryStr.append("\n   WHERE ").append(onClauseAsString);
     String extraPredicate = getWhenClausePredicate(whenMatchedUpdateClause);
     if(extraPredicate != null) {

http://git-wip-us.apache.org/repos/asf/hive/blob/b920fa73/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
index 68af15a..fd9959e 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/TestTxnCommands.java
@@ -58,6 +58,9 @@ import java.util.concurrent.TimeUnit;
  * test AC=true, and AC=false with commit/rollback/exception and test resulting data.
  *
  * Can also test, calling commit in AC=true mode, etc, toggling AC...
+ * 
+ * Tests here are for multi-statement transactions (WIP) and those that don't need to
+ * run with Acid 2.0 (see subclasses of TestTxnCommands2)
  */
 public class TestTxnCommands {
   private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") +
@@ -595,7 +598,7 @@ public class TestTxnCommands {
   public void testMergeNegative2() throws Exception {
     CommandProcessorResponse cpr = runStatementOnDriverNegative("MERGE INTO "+ Table.ACIDTBL +
       " target USING " + Table.NONACIDORCTBL + "\n source ON target.pk = source.pk " +
-      "\nWHEN MATCHED THEN UPDATE set t = 1 " +
+      "\nWHEN MATCHED THEN UPDATE set b = 1 " +
       "\nWHEN MATCHED THEN UPDATE set b=a");
     Assert.assertEquals(ErrorMsg.MERGE_TOO_MANY_UPDATE, ((HiveException)cpr.getException()).getCanonicalErrorMsg());
   }
@@ -615,4 +618,16 @@ public class TestTxnCommands {
       "\nwhen matched then update set vc=`\u2206\u220b` " +
       "\nwhen not matched then insert values(`a/b`.`g/h`,`a/b`.j,`a/b`.k)");
   }
+  @Test
+  public void testSetClauseFakeColumn() throws Exception {
+    CommandProcessorResponse cpr = runStatementOnDriverNegative("MERGE INTO "+ Table.ACIDTBL +
+      " target USING " + Table.NONACIDORCTBL +
+      "\n source ON target.a = source.a " +
+      "\nWHEN MATCHED THEN UPDATE set t = 1");
+    Assert.assertEquals(ErrorMsg.INVALID_TARGET_COLUMN_IN_SET_CLAUSE,
+      ((HiveException)cpr.getException()).getCanonicalErrorMsg());
+    cpr = runStatementOnDriverNegative("update " + Table.ACIDTBL + " set t = 1");
+    Assert.assertEquals(ErrorMsg.INVALID_TARGET_COLUMN_IN_SET_CLAUSE,
+      ((HiveException)cpr.getException()).getCanonicalErrorMsg());
+  }
 }