You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/04/27 01:31:46 UTC

[hive] 04/04: HIVE-21650: QOutProcessor should provide configurable partial masks for qtests (Aditya Shah, reviewed by Laszlo Bodor, Jesus Camacho Rodriguez)

This is an automated email from the ASF dual-hosted git repository.

jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git

commit 3ec51a237d87af8122583a8ffe9afbb658f27d5e
Author: Aditya Shah <ad...@gmail.com>
AuthorDate: Fri Apr 26 18:31:04 2019 -0700

    HIVE-21650: QOutProcessor should provide configurable partial masks for qtests (Aditya Shah, reviewed by Laszlo Bodor, Jesus Camacho Rodriguez)
---
 .../java/org/apache/hadoop/hive/conf/HiveConf.java |   6 +
 .../hadoop/hive/cli/control/CoreCliDriver.java     |  24 +++
 .../org/apache/hadoop/hive/ql/QOutProcessor.java   |  30 ++--
 .../clientpositive/acid_table_directories_test.q   |  34 ++++
 .../acid_table_directories_test.q.out              | 186 +++++++++++++++++++++
 5 files changed, 269 insertions(+), 11 deletions(-)

diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 37a0a6d..0c2bd1e 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -631,6 +631,12 @@ public class HiveConf extends Configuration {
         true),
     HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD("hive.mapjoin.testing.no.hash.table.load", false, "internal use only, true when in testing map join",
         true),
+    HIVE_ADDITIONAL_PARTIAL_MASKS_PATTERN("hive.qtest.additional.partial.mask.pattern", "",
+        "internal use only, used in only qtests. Provide additional partial masks pattern" +
+         "for qtests as a ',' separated list"),
+    HIVE_ADDITIONAL_PARTIAL_MASKS_REPLACEMENT_TEXT("hive.qtest.additional.partial.mask.replacement.text", "",
+        "internal use only, used in only qtests. Provide additional partial masks replacement" +
+        "text for qtests as a ',' separated list"),
 
     HIVE_IN_REPL_TEST_FILES_SORTED("hive.in.repl.test.files.sorted", false,
       "internal usage only, set to true if the file listing is required in sorted order during bootstrap load", true),
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CoreCliDriver.java b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CoreCliDriver.java
index 7ed8388..8465474 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CoreCliDriver.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/cli/control/CoreCliDriver.java
@@ -23,6 +23,8 @@ import static org.junit.Assert.fail;
 import java.io.File;
 import java.util.concurrent.TimeUnit;
 
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.ql.QTestArguments;
 import org.apache.hadoop.hive.ql.QTestProcessExecResult;
 import org.apache.hadoop.hive.ql.QTestUtil;
@@ -187,7 +189,9 @@ public class CoreCliDriver extends CliAdapter {
         qt.failed(ecode, fname, debugHint);
       }
 
+      setupAdditionalPartialMasks();
       QTestProcessExecResult result = qt.checkCliDriverResults(fname);
+      resetAdditionalPartialMasks();
       if (result.getReturnCode() != 0) {
         failed = true;
         String message = Strings.isNullOrEmpty(result.getCapturedOutput()) ? debugHint
@@ -206,4 +210,24 @@ public class CoreCliDriver extends CliAdapter {
     }
     assertTrue("Test passed", true);
   }
+
+  private void setupAdditionalPartialMasks() {
+    String patternStr = HiveConf.getVar(qt.getConf(), ConfVars.HIVE_ADDITIONAL_PARTIAL_MASKS_PATTERN);
+    String replacementStr = HiveConf.getVar(qt.getConf(), ConfVars.HIVE_ADDITIONAL_PARTIAL_MASKS_REPLACEMENT_TEXT);
+    if (patternStr != null  && replacementStr != null && !replacementStr.isEmpty() && !patternStr.isEmpty()) {
+      String[] patterns = patternStr.split(",");
+      String[] replacements = replacementStr.split(",");
+      if (patterns.length != replacements.length) {
+        throw new RuntimeException("Count mismatch for additional partial masks and their replacements");
+      }
+      for (int i = 0; i < patterns.length; i++) {
+        qt.getQOutProcessor().addPatternWithMaskComment(patterns[i],
+            String.format("### %s ###", replacements[i]));
+      }
+    }
+  }
+
+  private void resetAdditionalPartialMasks() {
+    qt.getQOutProcessor().resetPatternwithMaskComments();
+  }
 }
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java
index b87d904..ec61b34 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/QOutProcessor.java
@@ -293,18 +293,22 @@ public class QOutProcessor {
 
     partialPlanMask = ppm.toArray(new PatternReplacementPair[ppm.size()]);
   }
-  /* This list may be modified by specific cli drivers to mask strings that change on every test */
+
   @SuppressWarnings("serial")
-  private final List<Pair<Pattern, String>> patternsWithMaskComments =
-      new ArrayList<Pair<Pattern, String>>() {
-        {
-          add(toPatternPair("(pblob|s3.?|swift|wasb.?).*hive-staging.*",
-              "### BLOBSTORE_STAGING_PATH ###"));
-          add(toPatternPair(PATH_HDFS_WITH_DATE_USER_GROUP_REGEX, String.format("%s %s$3$4 %s $6%s",
-              HDFS_USER_MASK, HDFS_GROUP_MASK, HDFS_DATE_MASK, HDFS_MASK)));
-          add(toPatternPair(PATH_HDFS_REGEX, String.format("$1%s", HDFS_MASK)));
-        }
-      };
+  private ArrayList<Pair<Pattern, String>> initPatternWithMaskComments() {
+    return new ArrayList<Pair<Pattern, String>>() {
+      {
+        add(toPatternPair("(pblob|s3.?|swift|wasb.?).*hive-staging.*",
+            "### BLOBSTORE_STAGING_PATH ###"));
+        add(toPatternPair(PATH_HDFS_WITH_DATE_USER_GROUP_REGEX, String.format("%s %s$3$4 %s $6%s",
+            HDFS_USER_MASK, HDFS_GROUP_MASK, HDFS_DATE_MASK, HDFS_MASK)));
+        add(toPatternPair(PATH_HDFS_REGEX, String.format("$1%s", HDFS_MASK)));
+      }
+    };
+  }
+
+  /* This list may be modified by specific cli drivers to mask strings that change on every test */
+  private List<Pair<Pattern, String>> patternsWithMaskComments = initPatternWithMaskComments();
 
   private Pair<Pattern, String> toPatternPair(String patternStr, String maskComment) {
     return ImmutablePair.of(Pattern.compile(patternStr), maskComment);
@@ -334,4 +338,8 @@ public class QOutProcessor {
     return false;
   }
 
+  public void resetPatternwithMaskComments() {
+    patternsWithMaskComments = initPatternWithMaskComments();
+  }
+
 }
diff --git a/ql/src/test/queries/clientpositive/acid_table_directories_test.q b/ql/src/test/queries/clientpositive/acid_table_directories_test.q
new file mode 100644
index 0000000..9bb8e10
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/acid_table_directories_test.q
@@ -0,0 +1,34 @@
+set hive.mapred.mode=nonstrict;
+set hive.support.concurrency=true;
+set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager;
+set hive.exec.dynamic.partition.mode=nonstrict;
+
+set hive.qtest.additional.partial.mask.pattern=.*acidparttable\/p=(100|200)\/base.*,.*acidparttable/p=(100|200)/delta.*;
+set hive.qtest.additional.partial.mask.replacement.text=ACID BASE DIR,ACID DELTA DIR;
+
+-- create a source table where the IOW data select from
+create table srctbl (key char(1), value int);
+insert into table srctbl values ('d', 4), ('e', 5), ('f', 6), ('i', 9), ('j', 10);
+select * from srctbl;
+
+-- insert overwrite on partitioned acid table
+drop table if exists acidparttbl;
+create table acidparttbl (key char(1), value int) partitioned by (p int) clustered by (value) into 2 buckets  stored as orc   location 'pfile://${system:test.tmp.dir}/acidparttable' TBLPROPERTIES ("transactional"="true");
+
+insert into table acidparttbl partition(p=100) values ('a', 1), ('b', 2), ('c', 3);
+select p, key, value from acidparttbl order by p, key;
+
+insert overwrite table acidparttbl partition(p=100) select key, value from srctbl where key in ('d', 'e', 'f');
+select p, key, value from acidparttbl order by p, key;
+
+insert into table acidparttbl partition(p) values ('g', 7, 100), ('h', 8, 200);
+select p, key, value from acidparttbl order by p, key;
+
+insert overwrite table acidparttbl partition(p) values ('i', 9, 100), ('j', 10, 200);
+select p, key, value from acidparttbl order by p, key;
+
+-- check directories of the table
+dfs -ls -R 'pfile://${system:test.tmp.dir}/acidparttable';
+
+drop table acidparttbl;
+drop table srctbl;
\ No newline at end of file
diff --git a/ql/src/test/results/clientpositive/acid_table_directories_test.q.out b/ql/src/test/results/clientpositive/acid_table_directories_test.q.out
new file mode 100644
index 0000000..1fec361
--- /dev/null
+++ b/ql/src/test/results/clientpositive/acid_table_directories_test.q.out
@@ -0,0 +1,186 @@
+PREHOOK: query: create table srctbl (key char(1), value int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@srctbl
+POSTHOOK: query: create table srctbl (key char(1), value int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@srctbl
+PREHOOK: query: insert into table srctbl values ('d', 4), ('e', 5), ('f', 6), ('i', 9), ('j', 10)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@srctbl
+POSTHOOK: query: insert into table srctbl values ('d', 4), ('e', 5), ('f', 6), ('i', 9), ('j', 10)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@srctbl
+POSTHOOK: Lineage: srctbl.key SCRIPT []
+POSTHOOK: Lineage: srctbl.value SCRIPT []
+PREHOOK: query: select * from srctbl
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srctbl
+#### A masked pattern was here ####
+POSTHOOK: query: select * from srctbl
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srctbl
+#### A masked pattern was here ####
+d	4
+e	5
+f	6
+i	9
+j	10
+PREHOOK: query: drop table if exists acidparttbl
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table if exists acidparttbl
+POSTHOOK: type: DROPTABLE
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@acidparttbl
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@acidparttbl
+PREHOOK: query: insert into table acidparttbl partition(p=100) values ('a', 1), ('b', 2), ('c', 3)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@acidparttbl@p=100
+POSTHOOK: query: insert into table acidparttbl partition(p=100) values ('a', 1), ('b', 2), ('c', 3)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@acidparttbl@p=100
+POSTHOOK: Lineage: acidparttbl PARTITION(p=100).key SCRIPT []
+POSTHOOK: Lineage: acidparttbl PARTITION(p=100).value SCRIPT []
+PREHOOK: query: select p, key, value from acidparttbl order by p, key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acidparttbl
+PREHOOK: Input: default@acidparttbl@p=100
+#### A masked pattern was here ####
+POSTHOOK: query: select p, key, value from acidparttbl order by p, key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acidparttbl
+POSTHOOK: Input: default@acidparttbl@p=100
+#### A masked pattern was here ####
+100	a	1
+100	b	2
+100	c	3
+PREHOOK: query: insert overwrite table acidparttbl partition(p=100) select key, value from srctbl where key in ('d', 'e', 'f')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@srctbl
+PREHOOK: Output: default@acidparttbl@p=100
+POSTHOOK: query: insert overwrite table acidparttbl partition(p=100) select key, value from srctbl where key in ('d', 'e', 'f')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@srctbl
+POSTHOOK: Output: default@acidparttbl@p=100
+POSTHOOK: Lineage: acidparttbl PARTITION(p=100).key SIMPLE [(srctbl)srctbl.FieldSchema(name:key, type:char(1), comment:null), ]
+POSTHOOK: Lineage: acidparttbl PARTITION(p=100).value SIMPLE [(srctbl)srctbl.FieldSchema(name:value, type:int, comment:null), ]
+PREHOOK: query: select p, key, value from acidparttbl order by p, key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acidparttbl
+PREHOOK: Input: default@acidparttbl@p=100
+#### A masked pattern was here ####
+POSTHOOK: query: select p, key, value from acidparttbl order by p, key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acidparttbl
+POSTHOOK: Input: default@acidparttbl@p=100
+#### A masked pattern was here ####
+100	d	4
+100	e	5
+100	f	6
+PREHOOK: query: insert into table acidparttbl partition(p) values ('g', 7, 100), ('h', 8, 200)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@acidparttbl
+POSTHOOK: query: insert into table acidparttbl partition(p) values ('g', 7, 100), ('h', 8, 200)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@acidparttbl@p=100
+POSTHOOK: Output: default@acidparttbl@p=200
+POSTHOOK: Lineage: acidparttbl PARTITION(p=100).key SCRIPT []
+POSTHOOK: Lineage: acidparttbl PARTITION(p=100).value SCRIPT []
+POSTHOOK: Lineage: acidparttbl PARTITION(p=200).key SCRIPT []
+POSTHOOK: Lineage: acidparttbl PARTITION(p=200).value SCRIPT []
+PREHOOK: query: select p, key, value from acidparttbl order by p, key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acidparttbl
+PREHOOK: Input: default@acidparttbl@p=100
+PREHOOK: Input: default@acidparttbl@p=200
+#### A masked pattern was here ####
+POSTHOOK: query: select p, key, value from acidparttbl order by p, key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acidparttbl
+POSTHOOK: Input: default@acidparttbl@p=100
+POSTHOOK: Input: default@acidparttbl@p=200
+#### A masked pattern was here ####
+100	d	4
+100	e	5
+100	f	6
+100	g	7
+200	h	8
+PREHOOK: query: insert overwrite table acidparttbl partition(p) values ('i', 9, 100), ('j', 10, 200)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@acidparttbl
+POSTHOOK: query: insert overwrite table acidparttbl partition(p) values ('i', 9, 100), ('j', 10, 200)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@acidparttbl@p=100
+POSTHOOK: Output: default@acidparttbl@p=200
+POSTHOOK: Lineage: acidparttbl PARTITION(p=100).key SCRIPT []
+POSTHOOK: Lineage: acidparttbl PARTITION(p=100).value SCRIPT []
+POSTHOOK: Lineage: acidparttbl PARTITION(p=200).key SCRIPT []
+POSTHOOK: Lineage: acidparttbl PARTITION(p=200).value SCRIPT []
+PREHOOK: query: select p, key, value from acidparttbl order by p, key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@acidparttbl
+PREHOOK: Input: default@acidparttbl@p=100
+PREHOOK: Input: default@acidparttbl@p=200
+#### A masked pattern was here ####
+POSTHOOK: query: select p, key, value from acidparttbl order by p, key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@acidparttbl
+POSTHOOK: Input: default@acidparttbl@p=100
+POSTHOOK: Input: default@acidparttbl@p=200
+#### A masked pattern was here ####
+100	i	9
+200	j	10
+#### A masked pattern was here ####
+### ACID BASE DIR ###
+### ACID BASE DIR ###
+### ACID BASE DIR ###
+### ACID BASE DIR ###
+### ACID BASE DIR ###
+### ACID BASE DIR ###
+### ACID BASE DIR ###
+### ACID DELTA DIR ###
+### ACID DELTA DIR ###
+### ACID DELTA DIR ###
+### ACID DELTA DIR ###
+### ACID DELTA DIR ###
+### ACID DELTA DIR ###
+### ACID DELTA DIR ###
+#### A masked pattern was here ####
+### ACID BASE DIR ###
+### ACID BASE DIR ###
+### ACID BASE DIR ###
+### ACID DELTA DIR ###
+### ACID DELTA DIR ###
+### ACID DELTA DIR ###
+PREHOOK: query: drop table acidparttbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@acidparttbl
+PREHOOK: Output: default@acidparttbl
+POSTHOOK: query: drop table acidparttbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@acidparttbl
+POSTHOOK: Output: default@acidparttbl
+PREHOOK: query: drop table srctbl
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@srctbl
+PREHOOK: Output: default@srctbl
+POSTHOOK: query: drop table srctbl
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@srctbl
+POSTHOOK: Output: default@srctbl