You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by px...@apache.org on 2016/03/20 18:51:29 UTC
[5/5] hive git commit: HIVE-13125: Support masking and filtering of
rows/columns (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
HIVE-13125: Support masking and filtering of rows/columns (Pengcheng Xiong, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a0a53713
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a0a53713
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a0a53713
Branch: refs/heads/master
Commit: a0a53713aa982a39f6ccb26c752473a0cf37286d
Parents: c0c08a3
Author: Pengcheng Xiong <px...@apache.org>
Authored: Sun Mar 20 10:50:47 2016 -0700
Committer: Pengcheng Xiong <px...@apache.org>
Committed: Sun Mar 20 10:50:47 2016 -0700
----------------------------------------------------------------------
...SQLStdHiveAuthorizationValidatorForTest.java | 29 +
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 161 +
.../apache/hadoop/hive/ql/parse/TableMask.java | 127 +
.../plugin/HiveAuthorizationValidator.java | 11 +
.../authorization/plugin/HiveAuthorizer.java | 69 +
.../plugin/HiveAuthorizerImpl.java | 23 +
.../authorization/plugin/HiveV1Authorizer.java | 25 +
.../sqlstd/DummyHiveAuthorizationValidator.java | 23 +
.../SQLStdHiveAuthorizationValidator.java | 22 +
ql/src/test/queries/clientpositive/masking_1.q | 27 +
ql/src/test/queries/clientpositive/masking_2.q | 17 +
ql/src/test/queries/clientpositive/masking_3.q | 27 +
ql/src/test/queries/clientpositive/masking_4.q | 30 +
ql/src/test/queries/clientpositive/masking_5.q | 22 +
.../clientpositive/masking_disablecbo_1.q | 28 +
.../clientpositive/masking_disablecbo_2.q | 18 +
.../clientpositive/masking_disablecbo_3.q | 28 +
.../clientpositive/masking_disablecbo_4.q | 31 +
.../test/results/clientpositive/masking_1.q.out | 466 ++
.../test/results/clientpositive/masking_2.q.out | 321 +
.../test/results/clientpositive/masking_3.q.out | 7765 ++++++++++++++++++
.../test/results/clientpositive/masking_4.q.out | 233 +
.../test/results/clientpositive/masking_5.q.out | 189 +
.../clientpositive/masking_disablecbo_1.q.out | 462 ++
.../clientpositive/masking_disablecbo_2.q.out | 355 +
.../clientpositive/masking_disablecbo_3.q.out | 7737 +++++++++++++++++
.../clientpositive/masking_disablecbo_4.q.out | 229 +
27 files changed, 18475 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java
----------------------------------------------------------------------
diff --git a/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java b/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java
index fd39c67..c0387e2 100644
--- a/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java
+++ b/itests/util/src/main/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidatorForTest.java
@@ -25,6 +25,7 @@ import javax.annotation.Nullable;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext;
@@ -104,4 +105,32 @@ public class SQLStdHiveAuthorizationValidatorForTest extends SQLStdHiveAuthoriza
}
+ public String getRowFilterExpression(String database, String table) throws SemanticException {
+ if (table.equals("masking_test")) {
+ return "key % 2 = 0 and key < 10";
+ } else if (table.equals("masking_test_subq")) {
+ return "key in (select key from src where src.key = masking_test_subq.key)";
+ }
+ return null;
+ }
+
+ public boolean needTransform() {
+ // In the future, we can add checking for username, groupname, etc based on
+ // HiveAuthenticationProvider. For example,
+ // "hive_test_user".equals(context.getUserName());
+ return true;
+ }
+
+ public boolean needTransform(String database, String table) {
+ return "masking_test".equals(table) || "masking_test_subq".equals(table);
+ }
+
+ public String getCellValueTransformer(String database, String table, String columnName)
+ throws SemanticException {
+ if (table.equals("masking_test") && columnName.equals("value")) {
+ return "reverse(value)";
+ }
+ return columnName;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index 0845bc9..58887d2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -32,8 +32,10 @@ import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
+import java.util.LinkedList;
import java.util.List;
import java.util.Map;
+import java.util.Queue;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeSet;
@@ -67,6 +69,7 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.MetaException;
import org.apache.hadoop.hive.metastore.api.Order;
import org.apache.hadoop.hive.ql.CompilationOpContext;
+import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.ErrorMsg;
import org.apache.hadoop.hive.ql.QueryProperties;
import org.apache.hadoop.hive.ql.exec.AbstractMapJoinOperator;
@@ -311,6 +314,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
*/
boolean rootTasksResolved;
+ private final TableMask tableMask;
+
CreateTableDesc tableDesc;
/** Not thread-safe. */
@@ -318,6 +323,14 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
protected AnalyzeRewriteContext analyzeRewrite;
+ // A mapping from a tableName to a table object in metastore.
+ Map<String, Table> tableNameToMetaDataTableObject;
+
+ // The tokens we should ignore when we are trying to do table masking.
+ private final Set<Integer> ignoredTokens = Sets.newHashSet(HiveParser.TOK_GROUPBY,
+ HiveParser.TOK_ORDERBY, HiveParser.TOK_WINDOWSPEC, HiveParser.TOK_CLUSTERBY,
+ HiveParser.TOK_DISTRIBUTEBY, HiveParser.TOK_SORTBY);
+
static class Phase1Ctx {
String dest;
int nextNum;
@@ -357,6 +370,8 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
globalLimitCtx = new GlobalLimitCtx();
viewAliasToInput = new HashMap<String, ReadEntity>();
noscan = partialscan = false;
+ tableMask = new TableMask(this, conf);
+ tableNameToMetaDataTableObject = new HashMap<>();
}
@Override
@@ -10307,6 +10322,145 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
}
}
+ private Table getMetaDataTableObjectByName(String tableName) throws HiveException {
+ if (!tableNameToMetaDataTableObject.containsKey(tableName)) {
+ Table table = db.getTable(tableName);
+ tableNameToMetaDataTableObject.put(tableName, table);
+ return table;
+ } else {
+ return tableNameToMetaDataTableObject.get(tableName);
+ }
+ }
+
+ private void walkASTMarkTABREF(ASTNode ast, Set<String> cteAlias)
+ throws SemanticException {
+ Queue<Node> queue = new LinkedList<>();
+ queue.add(ast);
+ while (!queue.isEmpty()) {
+ ASTNode astNode = (ASTNode) queue.poll();
+ if (astNode.getToken().getType() == HiveParser.TOK_TABREF) {
+ int aliasIndex = 0;
+ StringBuffer additionalTabInfo = new StringBuffer();
+ for (int index = 1; index < astNode.getChildCount(); index++) {
+ ASTNode ct = (ASTNode) astNode.getChild(index);
+ // TODO: support TOK_TABLEBUCKETSAMPLE, TOK_TABLESPLITSAMPLE, and
+ // TOK_TABLEPROPERTIES
+ if (ct.getToken().getType() == HiveParser.TOK_TABLEBUCKETSAMPLE
+ || ct.getToken().getType() == HiveParser.TOK_TABLESPLITSAMPLE
+ || ct.getToken().getType() == HiveParser.TOK_TABLEPROPERTIES) {
+ additionalTabInfo.append(ctx.getTokenRewriteStream().toString(ct.getTokenStartIndex(),
+ ct.getTokenStopIndex()));
+ } else {
+ aliasIndex = index;
+ }
+ }
+
+ ASTNode tableTree = (ASTNode) (astNode.getChild(0));
+
+ String tabIdName = getUnescapedName(tableTree);
+
+ String alias;
+ if (aliasIndex != 0) {
+ alias = unescapeIdentifier(astNode.getChild(aliasIndex).getText());
+ } else {
+ alias = getUnescapedUnqualifiedTableName(tableTree);
+ }
+
+ // We need to know if it is CTE or not.
+ // A CTE may have the same name as a table.
+ // For example,
+ // with select TAB1 [masking] as TAB2
+ // select * from TAB2 [no masking]
+ if (cteAlias.contains(tabIdName)) {
+ continue;
+ }
+
+ String replacementText = null;
+ Table table = null;
+ try {
+ table = getMetaDataTableObjectByName(tabIdName);
+ } catch (HiveException e) {
+ throw new SemanticException("Table " + tabIdName + " is not found.");
+ }
+
+ if (tableMask.needTransform(table.getDbName(), table.getTableName())) {
+ replacementText = tableMask.create(table, additionalTabInfo.toString(), alias);
+ }
+ if (replacementText != null) {
+ tableMask.setNeedsRewrite(true);
+ // we replace the tabref with replacementText here.
+ tableMask.addTableMasking(astNode, replacementText);
+ }
+ }
+ if (astNode.getChildCount() > 0 && !ignoredTokens.contains(astNode.getToken().getType())) {
+ for (Node child : astNode.getChildren()) {
+ queue.offer(child);
+ }
+ }
+ }
+ }
+
+ // We walk through the AST.
+ // We replace all the TOK_TABREF by adding additional masking and filter if
+ // the table needs to be masked or filtered.
+ // For the replacement, we leverage the methods that are used for
+ // unparseTranslator.
+ public ASTNode rewriteASTWithMaskAndFilter(ASTNode ast) throws SemanticException {
+ // 1. collect information about CTE if there is any.
+ // The base table of CTE should be masked.
+ // The CTE itself should not be masked in the references in the following main query.
+ Set<String> cteAlias = new HashSet<>();
+ if (ast.getChildCount() > 0
+ && HiveParser.TOK_CTE == ((ASTNode) ast.getChild(0)).getToken().getType()) {
+ // the structure inside CTE is like this
+ // TOK_CTE
+ // TOK_SUBQUERY
+ // sq1 (may refer to sq2)
+ // ...
+ // TOK_SUBQUERY
+ // sq2
+ ASTNode cte = (ASTNode) ast.getChild(0);
+ // we start from sq2, end up with sq1.
+ for (int index = cte.getChildCount() - 1; index >= 0; index--) {
+ ASTNode subq = (ASTNode) cte.getChild(index);
+ String alias = unescapeIdentifier(subq.getChild(1).getText());
+ if (cteAlias.contains(alias)) {
+ throw new SemanticException("Duplicate definition of " + alias);
+ } else {
+ cteAlias.add(alias);
+ walkASTMarkTABREF(subq, cteAlias);
+ }
+ }
+ // walk the other part of ast
+ for (int index = 1; index < ast.getChildCount(); index++) {
+ walkASTMarkTABREF((ASTNode) ast.getChild(index), cteAlias);
+ }
+ }
+ // there is no CTE, walk the whole AST
+ else {
+ walkASTMarkTABREF(ast, cteAlias);
+ }
+ // 2. rewrite the AST, replace TABREF with masking/filtering
+ if (tableMask.needsRewrite()) {
+ tableMask.applyTableMasking(ctx.getTokenRewriteStream());
+ String rewrittenQuery = ctx.getTokenRewriteStream().toString(ast.getTokenStartIndex(),
+ ast.getTokenStopIndex());
+ ASTNode rewrittenTree;
+ // Parse the rewritten query string
+ // check if we need to ctx.setCmd(rewrittenQuery);
+ ParseDriver pd = new ParseDriver();
+ try {
+ rewrittenTree = pd.parse(rewrittenQuery);
+ } catch (ParseException e) {
+ throw new SemanticException(e);
+ }
+ rewrittenTree = ParseUtils.findRootNonNullToken(rewrittenTree);
+ return rewrittenTree;
+ } else {
+ return ast;
+ }
+ }
+
boolean genResolvedParseTree(ASTNode ast, PlannerContext plannerCtx) throws SemanticException {
ASTNode child = ast;
this.ast = ast;
@@ -10362,6 +10516,13 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
SessionState.get().setCommandType(SemanticAnalyzerFactory.getOperation(ast.getToken().getType()));
return false;
}
+
+ // masking and filtering should be done here
+ // the basic idea is similar to unparseTranslator.
+ if (!unparseTranslator.isEnabled() && tableMask.isEnabled()) {
+ child = rewriteASTWithMaskAndFilter(ast);
+ }
+
// 4. continue analyzing from the child ASTNode.
Phase1Ctx ctx_1 = initPhase1Ctx();
preProcessForInsert(child, qb);
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java
new file mode 100644
index 0000000..c47c2bd
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/TableMask.java
@@ -0,0 +1,127 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.parse;
+
+import java.util.List;
+
+import org.antlr.runtime.TokenRewriteStream;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizer;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * The main purpose for this class is for authorization. More specifically, row
+ * filtering and column masking are done through this class. We first call
+ * create function to create the corresponding strings for row filtering and
+ * column masking. We then replace the TAB_REF with the strings.
+ */
+public class TableMask {
+
+ protected final Logger LOG = LoggerFactory.getLogger(TableMask.class);
+ HiveAuthorizer authorizer;
+ private UnparseTranslator translator;
+ private boolean enable;
+ private boolean needsRewrite;
+
+ public TableMask(SemanticAnalyzer analyzer, HiveConf conf) throws SemanticException {
+ try {
+ authorizer = SessionState.get().getAuthorizerV2();
+ if (authorizer != null && needTransform()) {
+ enable = true;
+ translator = new UnparseTranslator(conf);
+ translator.enable();
+ }
+ } catch (Exception e) {
+ LOG.warn("Failed to initialize masking policy");
+ throw new SemanticException(e);
+ }
+ }
+
+ private String createRowMask(String db, String name) throws SemanticException {
+ return authorizer.getRowFilterExpression(db, name);
+ }
+
+ private String createExpressions(String db, String tbl, String colName) throws SemanticException {
+ return authorizer.getCellValueTransformer(db, tbl, colName);
+ }
+
+ public boolean isEnabled() throws SemanticException {
+ return enable;
+ }
+
+ public boolean needTransform() throws SemanticException {
+ return authorizer.needTransform();
+ }
+
+ public boolean needTransform(String database, String table) throws SemanticException {
+ return authorizer.needTransform(database, table);
+ }
+
+ public String create(Table table, String additionalTabInfo, String alias) throws SemanticException {
+ String db = table.getDbName();
+ String tbl = table.getTableName();
+ StringBuilder sb = new StringBuilder();
+ sb.append("(SELECT ");
+ List<FieldSchema> cols = table.getAllCols();
+ boolean firstOne = true;
+ for (FieldSchema fs : cols) {
+ if (!firstOne) {
+ sb.append(", ");
+ } else {
+ firstOne = false;
+ }
+ String colName = fs.getName();
+ String expr = createExpressions(db, tbl, colName);
+ if (expr == null) {
+ sb.append(colName);
+ } else {
+ sb.append(expr + " AS " + colName);
+ }
+ }
+ sb.append(" FROM " + tbl);
+ sb.append(" " + additionalTabInfo);
+ String filter = createRowMask(db, tbl);
+ if (filter != null) {
+ sb.append(" WHERE " + filter);
+ }
+ sb.append(")" + alias);
+ LOG.debug("TableMask creates `" + sb.toString() + "`");
+ return sb.toString();
+ }
+
+ void addTableMasking(ASTNode node, String replacementText) throws SemanticException {
+ translator.addTranslation(node, replacementText);
+ }
+
+ void applyTableMasking(TokenRewriteStream tokenRewriteStream) throws SemanticException {
+ translator.applyTranslations(tokenRewriteStream);
+ }
+
+ public boolean needsRewrite() {
+ return needsRewrite;
+ }
+
+ public void setNeedsRewrite(boolean needsRewrite) {
+ this.needsRewrite = needsRewrite;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java
index 59aabe4..1b366c2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizationValidator.java
@@ -20,6 +20,8 @@ package org.apache.hadoop.hive.ql.security.authorization.plugin;
import java.util.List;
import org.apache.hadoop.classification.InterfaceAudience.Private;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider;
/**
* Interface used to check if user has privileges to perform certain action.
@@ -40,4 +42,13 @@ public interface HiveAuthorizationValidator {
List<HivePrivilegeObject> filterListCmdObjects(List<HivePrivilegeObject> listObjs,
HiveAuthzContext context);
+ public String getRowFilterExpression(String database, String table) throws SemanticException;
+
+ public String getCellValueTransformer(String database, String table, String columnName)
+ throws SemanticException;
+
+ public boolean needTransform();
+
+ public boolean needTransform(String database, String table);
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java
index c93e334..6e2ef8d 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizer.java
@@ -23,6 +23,8 @@ import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPri
import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider;
import org.apache.hadoop.hive.ql.security.authorization.HiveAuthorizationProvider;
/**
@@ -234,5 +236,72 @@ public interface HiveAuthorizer {
*/
Object getHiveAuthorizationTranslator() throws HiveAuthzPluginException;
+ /**
+ * TableMaskingPolicy defines how users can access base tables. It defines a
+ * policy on what columns and rows are hidden, masked or redacted based on
+ * user, role or location.
+ */
+ /**
+ * getRowFilterExpression is called once for each table in a query. It expects
+ * a valid filter condition to be returned. Null indicates no filtering is
+ * required.
+ *
+ * Example: table foo(c int) -> "c > 0 && c % 2 = 0"
+ *
+ * @param database
+ * the name of the database in which the table lives
+ * @param table
+ * the name of the table in question
+ * @return
+ * @throws SemanticException
+ */
+ public String getRowFilterExpression(String database, String table) throws SemanticException;
+
+ /**
+ * needTransform() is called once per user in a query. If the function returns
+ * true a call to needTransform(String database, String table) will happen.
+ * Returning false short-circuits the generation of row/column transforms.
+ *
+ * @return
+ * @throws SemanticException
+ */
+ public boolean needTransform();
+
+ /**
+ * needTransform(String database, String table) is called once per table in a
+ * query. If the function returns true a call to getRowFilterExpression and
+ * getCellValueTransformer will happen. Returning false short-circuits the
+ * generation of row/column transforms.
+ *
+ * @param database
+ * the name of the database in which the table lives
+ * @param table
+ * the name of the table in question
+ * @return
+ * @throws SemanticException
+ */
+ public boolean needTransform(String database, String table);
+
+ /**
+ * getCellValueTransformer is called once per column in each table accessed by
+ * the query. It expects a valid expression as used in a select clause. Null
+ * is not a valid option. If no transformation is needed simply return the
+ * column name.
+ *
+ * Example: column a -> "a" (no transform)
+ *
+ * Example: column a -> "reverse(a)" (call the reverse function on a)
+ *
+ * Example: column a -> "5" (replace column a with the constant 5)
+ *
+ * @param database
+ * @param table
+ * @param columnName
+ * @return
+ * @throws SemanticException
+ */
+ public String getCellValueTransformer(String database, String table, String columnName)
+ throws SemanticException;
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerImpl.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerImpl.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerImpl.java
index 00fa8cf..c73d667 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerImpl.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveAuthorizerImpl.java
@@ -22,6 +22,8 @@ import java.util.List;
import org.apache.hadoop.hive.common.classification.InterfaceAudience.LimitedPrivate;
import org.apache.hadoop.hive.common.classification.InterfaceStability.Evolving;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider;
/**
* Convenience implementation of HiveAuthorizer.
@@ -135,4 +137,25 @@ public class HiveAuthorizerImpl extends AbstractHiveAuthorizer {
accessController.applyAuthorizationConfigPolicy(hiveConf);
}
+ @Override
+ public String getRowFilterExpression(String database, String table) throws SemanticException {
+ return authValidator.getRowFilterExpression(table, table);
+ }
+
+ @Override
+ public String getCellValueTransformer(String database, String table, String columnName)
+ throws SemanticException {
+ return authValidator.getCellValueTransformer(database, table, columnName);
+ }
+
+ @Override
+ public boolean needTransform() {
+ return authValidator.needTransform();
+ }
+
+ @Override
+ public boolean needTransform(String database, String table) {
+ return authValidator.needTransform(database, table);
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveV1Authorizer.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveV1Authorizer.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveV1Authorizer.java
index 8a03989..c8aa9db 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveV1Authorizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveV1Authorizer.java
@@ -37,6 +37,8 @@ import org.apache.hadoop.hive.metastore.api.RolePrincipalGrant;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider;
import org.apache.hadoop.hive.ql.security.authorization.AuthorizationUtils;
import org.apache.hadoop.hive.ql.security.authorization.PrivilegeScope;
import org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAccessController;
@@ -394,4 +396,27 @@ public class HiveV1Authorizer extends AbstractHiveAuthorizer {
return listObjs;
}
+
+ @Override
+ public String getRowFilterExpression(String database, String table) throws SemanticException {
+ return null;
+ }
+
+
+ @Override
+ public boolean needTransform() {
+ return false;
+ }
+
+ @Override
+ public boolean needTransform(String database, String table) {
+ return false;
+ }
+
+ @Override
+ public String getCellValueTransformer(String database, String table, String columnName)
+ throws SemanticException {
+ return null;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/DummyHiveAuthorizationValidator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/DummyHiveAuthorizationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/DummyHiveAuthorizationValidator.java
index 26e3a2c..e4ddc9b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/DummyHiveAuthorizationValidator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/DummyHiveAuthorizationValidator.java
@@ -21,6 +21,8 @@ import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizationValidator;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthzContext;
@@ -48,4 +50,25 @@ public class DummyHiveAuthorizationValidator implements HiveAuthorizationValidat
return listObjs;
}
+ @Override
+ public String getRowFilterExpression(String database, String table) throws SemanticException {
+ return null;
+ }
+
+ @Override
+ public String getCellValueTransformer(String database, String table, String columnName)
+ throws SemanticException {
+ return null;
+ }
+
+ @Override
+ public boolean needTransform() {
+ return false;
+ }
+
+ @Override
+ public boolean needTransform(String database, String table) {
+ return false;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java
index 9f586be..c5d60b3 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/SQLStdHiveAuthorizationValidator.java
@@ -27,6 +27,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.IMetaStoreClient;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.security.HiveAuthenticationProvider;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAccessControlException;
import org.apache.hadoop.hive.ql.security.authorization.plugin.HiveAuthorizationValidator;
@@ -149,4 +150,25 @@ public class SQLStdHiveAuthorizationValidator implements HiveAuthorizationValida
return listObjs;
}
+ @Override
+ public String getRowFilterExpression(String database, String table) throws SemanticException {
+ return null;
+ }
+
+ @Override
+ public String getCellValueTransformer(String database, String table, String columnName)
+ throws SemanticException {
+ return null;
+ }
+
+ @Override
+ public boolean needTransform() {
+ return false;
+ }
+
+ @Override
+ public boolean needTransform(String database, String table) {
+ return false;
+ }
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/test/queries/clientpositive/masking_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/masking_1.q b/ql/src/test/queries/clientpositive/masking_1.q
new file mode 100644
index 0000000..90f009d
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/masking_1.q
@@ -0,0 +1,27 @@
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create table masking_test as select cast(key as int) as key, value from src;
+
+explain select * from masking_test;
+select * from masking_test;
+
+explain select * from masking_test where key > 0;
+select * from masking_test where key > 0;
+
+explain select key from masking_test where key > 0;
+select key from masking_test where key > 0;
+
+explain select value from masking_test where key > 0;
+select value from masking_test where key > 0;
+
+explain select * from masking_test join srcpart on (masking_test.key = srcpart.key);
+select * from masking_test join srcpart on (masking_test.key = srcpart.key);
+
+explain select * from default.masking_test where key > 0;
+select * from default.masking_test where key > 0;
+
+explain select * from masking_test where masking_test.key > 0;
+select * from masking_test where masking_test.key > 0;
+
+explain select key, value from (select key, value from (select key, upper(value) as value from src where key > 0) t where key < 10) t2 where key % 2 = 0;
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/test/queries/clientpositive/masking_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/masking_2.q b/ql/src/test/queries/clientpositive/masking_2.q
new file mode 100644
index 0000000..fc4a71e
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/masking_2.q
@@ -0,0 +1,17 @@
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create view masking_test as select cast(key as int) as key, value from src;
+
+explain select * from masking_test;
+select * from masking_test;
+
+explain select * from masking_test where key > 0;
+select * from masking_test where key > 0;
+
+explain select * from src a join masking_test b on a.key = b.value where b.key > 0;
+
+explain select a.*, b.key from masking_test a join masking_test b on a.key = b.value where b.key > 0;
+
+explain select * from masking_test a union select b.* from masking_test b where b.key > 0;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/test/queries/clientpositive/masking_3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/masking_3.q b/ql/src/test/queries/clientpositive/masking_3.q
new file mode 100644
index 0000000..584dac1
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/masking_3.q
@@ -0,0 +1,27 @@
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create table masking_test_subq as select cast(key as int) as key, value from src;
+
+explain select * from masking_test_subq;
+select * from masking_test_subq;
+
+explain select * from masking_test_subq where key > 0;
+select * from masking_test_subq where key > 0;
+
+explain select key from masking_test_subq where key > 0;
+select key from masking_test_subq where key > 0;
+
+explain select value from masking_test_subq where key > 0;
+select value from masking_test_subq where key > 0;
+
+explain select * from masking_test_subq join srcpart on (masking_test_subq.key = srcpart.key);
+select * from masking_test_subq join srcpart on (masking_test_subq.key = srcpart.key);
+
+explain select * from default.masking_test_subq where key > 0;
+select * from default.masking_test_subq where key > 0;
+
+explain select * from masking_test_subq where masking_test_subq.key > 0;
+select * from masking_test_subq where masking_test_subq.key > 0;
+
+explain select key, value from (select key, value from (select key, upper(value) as value from src where key > 0) t where key < 10) t2 where key % 2 = 0;
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/test/queries/clientpositive/masking_4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/masking_4.q b/ql/src/test/queries/clientpositive/masking_4.q
new file mode 100644
index 0000000..04dd185
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/masking_4.q
@@ -0,0 +1,30 @@
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create table masking_test as select cast(key as int) as key, value from src;
+create table masking_test_subq as select cast(key as int) as key, value from src;
+
+
+explain
+with q1 as ( select key from q2 where key = '5'),
+q2 as ( select key from src where key = '5')
+select * from (select key from q1) a;
+
+
+--should mask masking_test
+
+explain
+with q1 as ( select * from masking_test where key = '5')
+select * from q1;
+
+--should not mask masking_test_subq
+
+explain
+with masking_test_subq as ( select * from masking_test where key = '5')
+select * from masking_test_subq;
+
+--should mask masking_test_subq
+
+explain
+with q1 as ( select * from masking_test where key = '5')
+select * from masking_test_subq;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/test/queries/clientpositive/masking_5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/masking_5.q b/ql/src/test/queries/clientpositive/masking_5.q
new file mode 100644
index 0000000..a2f7d18
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/masking_5.q
@@ -0,0 +1,22 @@
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create table masking_test as select cast(key as int) as key, value from src;
+
+explain select * from masking_test tablesample (10 rows);
+select * from masking_test tablesample (10 rows);
+
+explain
+select * from masking_test tablesample(1 percent);
+select * from masking_test tablesample(1 percent);
+
+drop table masking_test;
+
+CREATE TABLE masking_test(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS;
+
+insert overwrite table masking_test
+select * from src;
+
+explain
+select * from masking_test tablesample (bucket 1 out of 2) s;
+select * from masking_test tablesample (bucket 1 out of 2) s;
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/test/queries/clientpositive/masking_disablecbo_1.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/masking_disablecbo_1.q b/ql/src/test/queries/clientpositive/masking_disablecbo_1.q
new file mode 100644
index 0000000..8fefbcf
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/masking_disablecbo_1.q
@@ -0,0 +1,28 @@
+set hive.cbo.enable=false;
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create table masking_test as select cast(key as int) as key, value from src;
+
+explain select * from masking_test;
+select * from masking_test;
+
+explain select * from masking_test where key > 0;
+select * from masking_test where key > 0;
+
+explain select key from masking_test where key > 0;
+select key from masking_test where key > 0;
+
+explain select value from masking_test where key > 0;
+select value from masking_test where key > 0;
+
+explain select * from masking_test join srcpart on (masking_test.key = srcpart.key);
+select * from masking_test join srcpart on (masking_test.key = srcpart.key);
+
+explain select * from default.masking_test where key > 0;
+select * from default.masking_test where key > 0;
+
+explain select * from masking_test where masking_test.key > 0;
+select * from masking_test where masking_test.key > 0;
+
+explain select key, value from (select key, value from (select key, upper(value) as value from src where key > 0) t where key < 10) t2 where key % 2 = 0;
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/test/queries/clientpositive/masking_disablecbo_2.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/masking_disablecbo_2.q b/ql/src/test/queries/clientpositive/masking_disablecbo_2.q
new file mode 100644
index 0000000..8375c52
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/masking_disablecbo_2.q
@@ -0,0 +1,18 @@
+set hive.cbo.enable=false;
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create view masking_test as select cast(key as int) as key, value from src;
+
+explain select * from masking_test;
+select * from masking_test;
+
+explain select * from masking_test where key > 0;
+select * from masking_test where key > 0;
+
+explain select * from src a join masking_test b on a.key = b.value where b.key > 0;
+
+explain select a.*, b.key from masking_test a join masking_test b on a.key = b.value where b.key > 0;
+
+explain select * from masking_test a union select b.* from masking_test b where b.key > 0;
+
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/test/queries/clientpositive/masking_disablecbo_3.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/masking_disablecbo_3.q b/ql/src/test/queries/clientpositive/masking_disablecbo_3.q
new file mode 100644
index 0000000..9501edd
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/masking_disablecbo_3.q
@@ -0,0 +1,28 @@
+set hive.cbo.enable=false;
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create table masking_test_subq as select cast(key as int) as key, value from src;
+
+explain select * from masking_test_subq;
+select * from masking_test_subq;
+
+explain select * from masking_test_subq where key > 0;
+select * from masking_test_subq where key > 0;
+
+explain select key from masking_test_subq where key > 0;
+select key from masking_test_subq where key > 0;
+
+explain select value from masking_test_subq where key > 0;
+select value from masking_test_subq where key > 0;
+
+explain select * from masking_test_subq join srcpart on (masking_test_subq.key = srcpart.key);
+select * from masking_test_subq join srcpart on (masking_test_subq.key = srcpart.key);
+
+explain select * from default.masking_test_subq where key > 0;
+select * from default.masking_test_subq where key > 0;
+
+explain select * from masking_test_subq where masking_test_subq.key > 0;
+select * from masking_test_subq where masking_test_subq.key > 0;
+
+explain select key, value from (select key, value from (select key, upper(value) as value from src where key > 0) t where key < 10) t2 where key % 2 = 0;
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/test/queries/clientpositive/masking_disablecbo_4.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/masking_disablecbo_4.q b/ql/src/test/queries/clientpositive/masking_disablecbo_4.q
new file mode 100644
index 0000000..7279114
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/masking_disablecbo_4.q
@@ -0,0 +1,31 @@
+set hive.cbo.enable=false;
+set hive.mapred.mode=nonstrict;
+set hive.security.authorization.manager=org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactoryForTest;
+
+create table masking_test as select cast(key as int) as key, value from src;
+create table masking_test_subq as select cast(key as int) as key, value from src;
+
+
+explain
+with q1 as ( select key from q2 where key = '5'),
+q2 as ( select key from src where key = '5')
+select * from (select key from q1) a;
+
+
+--should mask masking_test
+
+explain
+with q1 as ( select * from masking_test where key = '5')
+select * from q1;
+
+--should not mask masking_test_subq
+
+explain
+with masking_test_subq as ( select * from masking_test where key = '5')
+select * from masking_test_subq;
+
+--should mask masking_test_subq
+
+explain
+with q1 as ( select * from masking_test where key = '5')
+select * from masking_test_subq;
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/test/results/clientpositive/masking_1.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/masking_1.q.out b/ql/src/test/results/clientpositive/masking_1.q.out
new file mode 100644
index 0000000..ba2297e
--- /dev/null
+++ b/ql/src/test/results/clientpositive/masking_1.q.out
@@ -0,0 +1,466 @@
+PREHOOK: query: create table masking_test as select cast(key as int) as key, value from src
+PREHOOK: type: CREATETABLE_AS_SELECT
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@masking_test
+POSTHOOK: query: create table masking_test as select cast(key as int) as key, value from src
+POSTHOOK: type: CREATETABLE_AS_SELECT
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@masking_test
+POSTHOOK: Lineage: masking_test.key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ]
+POSTHOOK: Lineage: masking_test.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]
+PREHOOK: query: explain select * from masking_test
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from masking_test
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from masking_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from masking_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+0 0_lav
+4 4_lav
+8 8_lav
+0 0_lav
+0 0_lav
+2 2_lav
+PREHOOK: query: explain select * from masking_test where key > 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from masking_test where key > 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((key % 2) = 0) and (key < 10)) and (key > 0)) (type: boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from masking_test where key > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from masking_test where key > 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+4 4_lav
+8 8_lav
+2 2_lav
+PREHOOK: query: explain select key from masking_test where key > 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select key from masking_test where key > 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((key % 2) = 0) and (key < 10)) and (key > 0)) (type: boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int)
+ outputColumnNames: _col0
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select key from masking_test where key > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+POSTHOOK: query: select key from masking_test where key > 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+4
+8
+2
+PREHOOK: query: explain select value from masking_test where key > 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select value from masking_test where key > 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((key % 2) = 0) and (key < 10)) and (key > 0)) (type: boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: reverse(value) (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select value from masking_test where key > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+POSTHOOK: query: select value from masking_test where key > 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+4_lav
+8_lav
+2_lav
+PREHOOK: query: explain select * from masking_test join srcpart on (masking_test.key = srcpart.key)
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from masking_test join srcpart on (masking_test.key = srcpart.key)
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((key % 2) = 0) and (key < 10)) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: UDFToDouble(_col0) (type: double)
+ sort order: +
+ Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string)
+ TableScan
+ alias: srcpart
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string), ds (type: string), hr (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: UDFToDouble(_col0) (type: double)
+ sort order: +
+ Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+ Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 UDFToDouble(_col0) (type: double)
+ 1 UDFToDouble(_col0) (type: double)
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5
+ Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from masking_test join srcpart on (masking_test.key = srcpart.key)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+PREHOOK: Input: default@srcpart
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+POSTHOOK: query: select * from masking_test join srcpart on (masking_test.key = srcpart.key)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+POSTHOOK: Input: default@srcpart
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
+POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
+#### A masked pattern was here ####
+0 0_lav 0 val_0 2008-04-09 12
+0 0_lav 0 val_0 2008-04-08 11
+0 0_lav 0 val_0 2008-04-09 11
+0 0_lav 0 val_0 2008-04-08 11
+0 0_lav 0 val_0 2008-04-09 12
+0 0_lav 0 val_0 2008-04-08 12
+0 0_lav 0 val_0 2008-04-08 12
+0 0_lav 0 val_0 2008-04-08 11
+0 0_lav 0 val_0 2008-04-09 11
+0 0_lav 0 val_0 2008-04-08 12
+0 0_lav 0 val_0 2008-04-09 11
+0 0_lav 0 val_0 2008-04-09 12
+0 0_lav 0 val_0 2008-04-09 12
+0 0_lav 0 val_0 2008-04-08 11
+0 0_lav 0 val_0 2008-04-09 11
+0 0_lav 0 val_0 2008-04-08 11
+0 0_lav 0 val_0 2008-04-09 12
+0 0_lav 0 val_0 2008-04-08 12
+0 0_lav 0 val_0 2008-04-08 12
+0 0_lav 0 val_0 2008-04-08 11
+0 0_lav 0 val_0 2008-04-09 11
+0 0_lav 0 val_0 2008-04-08 12
+0 0_lav 0 val_0 2008-04-09 11
+0 0_lav 0 val_0 2008-04-09 12
+0 0_lav 0 val_0 2008-04-09 12
+0 0_lav 0 val_0 2008-04-08 11
+0 0_lav 0 val_0 2008-04-09 11
+0 0_lav 0 val_0 2008-04-08 11
+0 0_lav 0 val_0 2008-04-09 12
+0 0_lav 0 val_0 2008-04-08 12
+0 0_lav 0 val_0 2008-04-08 12
+0 0_lav 0 val_0 2008-04-08 11
+0 0_lav 0 val_0 2008-04-09 11
+0 0_lav 0 val_0 2008-04-08 12
+0 0_lav 0 val_0 2008-04-09 11
+0 0_lav 0 val_0 2008-04-09 12
+2 2_lav 2 val_2 2008-04-09 11
+2 2_lav 2 val_2 2008-04-08 11
+2 2_lav 2 val_2 2008-04-09 12
+2 2_lav 2 val_2 2008-04-08 12
+4 4_lav 4 val_4 2008-04-08 12
+4 4_lav 4 val_4 2008-04-09 12
+4 4_lav 4 val_4 2008-04-08 11
+4 4_lav 4 val_4 2008-04-09 11
+8 8_lav 8 val_8 2008-04-08 11
+8 8_lav 8 val_8 2008-04-09 11
+8 8_lav 8 val_8 2008-04-08 12
+8 8_lav 8 val_8 2008-04-09 12
+PREHOOK: query: explain select * from default.masking_test where key > 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from default.masking_test where key > 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((key % 2) = 0) and (key < 10)) and (key > 0)) (type: boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from default.masking_test where key > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from default.masking_test where key > 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+4 4_lav
+8 8_lav
+2 2_lav
+PREHOOK: query: explain select * from masking_test where masking_test.key > 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from masking_test where masking_test.key > 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: masking_test
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((key % 2) = 0) and (key < 10)) and (key > 0)) (type: boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from masking_test where masking_test.key > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+POSTHOOK: query: select * from masking_test where masking_test.key > 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+#### A masked pattern was here ####
+4 4_lav
+8 8_lav
+2 2_lav
+PREHOOK: query: explain select key, value from (select key, value from (select key, upper(value) as value from src where key > 0) t where key < 10) t2 where key % 2 = 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select key, value from (select key, value from (select key, upper(value) as value from src where key > 0) t where key < 10) t2 where key % 2 = 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((UDFToDouble(key) > 0.0) and (UDFToDouble(key) < 10.0)) and ((UDFToDouble(key) % 2.0) = 0.0)) (type: boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), upper(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
http://git-wip-us.apache.org/repos/asf/hive/blob/a0a53713/ql/src/test/results/clientpositive/masking_2.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/masking_2.q.out b/ql/src/test/results/clientpositive/masking_2.q.out
new file mode 100644
index 0000000..3feaa24
--- /dev/null
+++ b/ql/src/test/results/clientpositive/masking_2.q.out
@@ -0,0 +1,321 @@
+PREHOOK: query: create view masking_test as select cast(key as int) as key, value from src
+PREHOOK: type: CREATEVIEW
+PREHOOK: Input: default@src
+PREHOOK: Output: database:default
+PREHOOK: Output: default@masking_test
+POSTHOOK: query: create view masking_test as select cast(key as int) as key, value from src
+POSTHOOK: type: CREATEVIEW
+POSTHOOK: Input: default@src
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@masking_test
+PREHOOK: query: explain select * from masking_test
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from masking_test
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10)) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from masking_test
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select * from masking_test
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+0 0_lav
+4 4_lav
+8 8_lav
+0 0_lav
+0 0_lav
+2 2_lav
+PREHOOK: query: explain select * from masking_test where key > 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from masking_test where key > 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10)) and (UDFToInteger(key) > 0)) (type: boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select * from masking_test where key > 0
+PREHOOK: type: QUERY
+PREHOOK: Input: default@masking_test
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: select * from masking_test where key > 0
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@masking_test
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+4 4_lav
+8 8_lav
+2 2_lav
+PREHOOK: query: explain select * from src a join masking_test b on a.key = b.value where b.key > 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from src a join masking_test b on a.key = b.value where b.key > 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: a
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: key is not null (type: boolean)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: key (type: string), value (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col0 (type: string)
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col1 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10)) and (UDFToInteger(key) > 0)) and reverse(value) is not null) (type: boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col1 (type: string)
+ sort order: +
+ Map-reduce partition columns: _col1 (type: string)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 _col0 (type: string)
+ 1 _col1 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 550 Data size: 5843 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select a.*, b.key from masking_test a join masking_test b on a.key = b.value where b.key > 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select a.*, b.key from masking_test a join masking_test b on a.key = b.value where b.key > 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10)) and UDFToInteger(key) is not null) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: UDFToDouble(_col0) (type: double)
+ sort order: +
+ Map-reduce partition columns: UDFToDouble(_col0) (type: double)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int), _col1 (type: string)
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10)) and (UDFToInteger(key) > 0)) and reverse(value) is not null) (type: boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: UDFToDouble(_col1) (type: double)
+ sort order: +
+ Map-reduce partition columns: UDFToDouble(_col1) (type: double)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ value expressions: _col0 (type: int)
+ Reduce Operator Tree:
+ Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0 UDFToDouble(_col0) (type: double)
+ 1 UDFToDouble(_col1) (type: double)
+ outputColumnNames: _col0, _col1, _col2
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 91 Data size: 969 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: explain select * from masking_test a union select b.* from masking_test b where b.key > 0
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select * from masking_test a union select b.* from masking_test b where b.key > 0
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Map Reduce
+ Map Operator Tree:
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: (((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10)) (type: boolean)
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 83 Data size: 881 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
+ TableScan
+ alias: src
+ Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE
+ Filter Operator
+ predicate: ((((UDFToInteger(key) % 2) = 0) and (UDFToInteger(key) < 10)) and (UDFToInteger(key) > 0)) (type: boolean)
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Select Operator
+ expressions: UDFToInteger(key) (type: int), reverse(value) (type: string)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 27 Data size: 286 Basic stats: COMPLETE Column stats: NONE
+ Union
+ Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
+ Group By Operator
+ keys: _col0 (type: int), _col1 (type: string)
+ mode: hash
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
+ Reduce Output Operator
+ key expressions: _col0 (type: int), _col1 (type: string)
+ sort order: ++
+ Map-reduce partition columns: _col0 (type: int), _col1 (type: string)
+ Statistics: Num rows: 110 Data size: 1167 Basic stats: COMPLETE Column stats: NONE
+ Reduce Operator Tree:
+ Group By Operator
+ keys: KEY._col0 (type: int), KEY._col1 (type: string)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 55 Data size: 583 Basic stats: COMPLETE Column stats: NONE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 55 Data size: 583 Basic stats: COMPLETE Column stats: NONE
+ table:
+ input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+