You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by jc...@apache.org on 2019/11/24 16:09:36 UTC
[hive] branch master updated: HIVE-22486: Send only accessed
columns for masking policies request (Jesus Camacho Rodriguez reviewed by
Zoltan Haindrich)
This is an automated email from the ASF dual-hosted git repository.
jcamacho pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new a70408a HIVE-22486: Send only accessed columns for masking policies request (Jesus Camacho Rodriguez reviewed by Zoltan Haindrich)
a70408a is described below
commit a70408acecea5754c6cb05d65af6dbadc22502da
Author: Jesus Camacho Rodriguez <jc...@apache.org>
AuthorDate: Thu Nov 21 14:04:02 2019 -0800
HIVE-22486: Send only accessed columns for masking policies request (Jesus Camacho Rodriguez reviewed by Zoltan Haindrich)
Close apache/hive#848
---
.../calcite/rules/HiveRelFieldTrimmer.java | 29 ++++--
.../hadoop/hive/ql/parse/CalcitePlanner.java | 5 +-
.../hadoop/hive/ql/parse/ColumnAccessInfo.java | 112 +++++++++++++++++----
.../hadoop/hive/ql/parse/SemanticAnalyzer.java | 21 ++--
.../clientpositive/llap/column_access_stats.q.out | 4 +-
.../clientpositive/spark/column_access_stats.q.out | 4 +-
6 files changed, 135 insertions(+), 40 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
index c570356..9bf42ed 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/rules/HiveRelFieldTrimmer.java
@@ -61,6 +61,7 @@ import org.apache.calcite.util.mapping.IntPair;
import org.apache.calcite.util.mapping.Mapping;
import org.apache.calcite.util.mapping.MappingType;
import org.apache.calcite.util.mapping.Mappings;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.optimizer.calcite.HiveCalciteUtil;
import org.apache.hadoop.hive.ql.optimizer.calcite.RelOptHiveTable;
@@ -672,10 +673,10 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
public TrimResult trimFields(Project project, ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
// set columnAccessInfo for ViewColumnAuthorization
- for (Ord<RexNode> ord : Ord.zip(project.getProjects())) {
- if (fieldsUsed.get(ord.i)) {
- if (this.columnAccessInfo != null && this.viewProjectToTableSchema != null
- && this.viewProjectToTableSchema.containsKey(project)) {
+ if (this.columnAccessInfo != null && this.viewProjectToTableSchema != null
+ && this.viewProjectToTableSchema.containsKey(project)) {
+ for (Ord<RexNode> ord : Ord.zip(project.getProjects())) {
+ if (fieldsUsed.get(ord.i)) {
Table tab = this.viewProjectToTableSchema.get(project);
this.columnAccessInfo.add(tab.getCompleteName(), tab.getAllCols().get(ord.i).getName());
}
@@ -684,10 +685,26 @@ public class HiveRelFieldTrimmer extends RelFieldTrimmer {
return super.trimFields(project, fieldsUsed, extraFields);
}
- @Override
- public TrimResult trimFields(TableScan tableAccessRel, ImmutableBitSet fieldsUsed,
+ public TrimResult trimFields(HiveTableScan tableAccessRel, ImmutableBitSet fieldsUsed,
Set<RelDataTypeField> extraFields) {
final TrimResult result = super.trimFields(tableAccessRel, fieldsUsed, extraFields);
+ if (this.columnAccessInfo != null) {
+ // Store information about column accessed by the table so it can be used
+ // to send only this information for column masking
+ final RelOptHiveTable tab = (RelOptHiveTable) tableAccessRel.getTable();
+ final String qualifiedName = tab.getHiveTableMD().getCompleteName();
+ final List<FieldSchema> allCols = tab.getHiveTableMD().getAllCols();
+ final boolean insideView = tableAccessRel.isInsideView();
+ fieldsUsed.asList().stream()
+ .filter(idx -> idx < tab.getNoOfNonVirtualCols())
+ .forEach(idx -> {
+ if (insideView) {
+ columnAccessInfo.addIndirect(qualifiedName, allCols.get(idx).getName());
+ } else {
+ columnAccessInfo.add(qualifiedName, allCols.get(idx).getName());
+ }
+ });
+ }
if (fetchStats) {
fetchColStats(result.getKey(), tableAccessRel, fieldsUsed, extraFields);
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 4762335..2b9caac 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -307,6 +307,7 @@ public class CalcitePlanner extends SemanticAnalyzer {
private SemanticException semanticException;
private boolean runCBO = true;
private boolean disableSemJoinReordering = true;
+
private EnumSet<ExtendedCBOProfile> profilesCBO;
private static final CommonToken FROM_TOKEN =
@@ -1780,8 +1781,8 @@ public class CalcitePlanner extends SemanticAnalyzer {
// We need to get the ColumnAccessInfo and viewToTableSchema for views.
HiveRelFieldTrimmer fieldTrimmer = new HiveRelFieldTrimmer(null,
- HiveRelFactories.HIVE_BUILDER.create(optCluster, null), this.columnAccessInfo,
- this.viewProjectToTableSchema);
+ HiveRelFactories.HIVE_BUILDER.create(optCluster, null),
+ this.columnAccessInfo, this.viewProjectToTableSchema);
fieldTrimmer.trim(calciteGenPlan);
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java
index 9fb6a4e..ca1c391 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnAccessInfo.java
@@ -20,42 +20,74 @@ package org.apache.hadoop.hive.ql.parse;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
-import java.util.ArrayList;
-import java.util.Collections;
+import com.google.common.collect.LinkedHashMultimap;
+import com.google.common.collect.SetMultimap;
+
+import java.util.Collection;
import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
-import java.util.Set;
+import java.util.Objects;
+import java.util.stream.Collectors;
public class ColumnAccessInfo {
/**
- * Map of table name to names of accessed columns
+ * Map of table name to names of accessed columns (directly and indirectly -through views-).
*/
- private final Map<String, Set<String>> tableToColumnAccessMap;
+ private final SetMultimap<String, ColumnAccess> tableToColumnAccessMap;
public ColumnAccessInfo() {
// Must be deterministic order map for consistent q-test output across Java versions
- tableToColumnAccessMap = new LinkedHashMap<String, Set<String>>();
+ tableToColumnAccessMap = LinkedHashMultimap.create();
}
+ /**
+ * Adds access to column.
+ */
public void add(String table, String col) {
- Set<String> tableColumns = tableToColumnAccessMap.get(table);
- if (tableColumns == null) {
- // Must be deterministic order set for consistent q-test output across Java versions
- tableColumns = new LinkedHashSet<String>();
- tableToColumnAccessMap.put(table, tableColumns);
- }
- tableColumns.add(col);
+ tableToColumnAccessMap.put(table, new ColumnAccess(col, Access.DIRECT));
+ }
+
+ /**
+ * Adds indirect access to column (through view).
+ */
+ public void addIndirect(String table, String col) {
+ tableToColumnAccessMap.put(table, new ColumnAccess(col, Access.INDIRECT));
}
+ /**
+ * Includes direct access.
+ */
public Map<String, List<String>> getTableToColumnAccessMap() {
// Must be deterministic order map for consistent q-test output across Java versions
Map<String, List<String>> mapping = new LinkedHashMap<String, List<String>>();
- for (Map.Entry<String, Set<String>> entry : tableToColumnAccessMap.entrySet()) {
- List<String> sortedCols = new ArrayList<String>(entry.getValue());
- Collections.sort(sortedCols);
- mapping.put(entry.getKey(), sortedCols);
+ for (Map.Entry<String, Collection<ColumnAccess>> entry : tableToColumnAccessMap.asMap().entrySet()) {
+ List<String> sortedCols = entry.getValue().stream()
+ .filter(ca -> ca.access == Access.DIRECT)
+ .map(ca -> ca.columnName)
+ .sorted()
+ .collect(Collectors.toList());
+ if (!sortedCols.isEmpty()) {
+ mapping.put(entry.getKey(), sortedCols);
+ }
+ }
+ return mapping;
+ }
+
+ /**
+ * Includes direct and indirect access.
+ */
+ public Map<String, List<String>> getTableToColumnAllAccessMap() {
+ // Must be deterministic order map for consistent q-test output across Java versions
+ Map<String, List<String>> mapping = new LinkedHashMap<String, List<String>>();
+ for (Map.Entry<String, Collection<ColumnAccess>> entry : tableToColumnAccessMap.asMap().entrySet()) {
+ mapping.put(
+ entry.getKey(),
+ entry.getValue().stream()
+ .map(ca -> ca.columnName)
+ .distinct()
+ .sorted()
+ .collect(Collectors.toList()));
}
return mapping;
}
@@ -66,14 +98,50 @@ public class ColumnAccessInfo {
* @param vc
*/
public void stripVirtualColumn(VirtualColumn vc) {
- for (Map.Entry<String, Set<String>> e : tableToColumnAccessMap.entrySet()) {
- for (String columnName : e.getValue()) {
- if (vc.getName().equalsIgnoreCase(columnName)) {
- e.getValue().remove(columnName);
+ for (Map.Entry<String, Collection<ColumnAccess>> e : tableToColumnAccessMap.asMap().entrySet()) {
+ for (ColumnAccess columnAccess : e.getValue()) {
+ if (vc.getName().equalsIgnoreCase(columnAccess.columnName)) {
+ e.getValue().remove(columnAccess);
break;
}
}
}
+ }
+ /**
+ * Column access information.
+ */
+ private static class ColumnAccess {
+ private final String columnName;
+ private final Access access;
+
+ private ColumnAccess (String columnName, Access access) {
+ this.columnName = Objects.requireNonNull(columnName);
+ this.access = Objects.requireNonNull(access);
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o instanceof ColumnAccess) {
+ ColumnAccess other = (ColumnAccess) o;
+ return columnName.equals(other.columnName)
+ && access == other.access;
+ }
+ return false;
+ }
+
+ @Override
+ public int hashCode() {
+ return Objects.hash(columnName, access);
+ }
+
+ }
+
+ private enum Access {
+ DIRECT,
+ INDIRECT
}
}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
index c6c2219..ff60ba5 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
@@ -21,8 +21,6 @@ package org.apache.hadoop.hive.ql.parse;
import static java.util.Objects.nonNull;
import static org.apache.hadoop.hive.conf.HiveConf.ConfVars.HIVESTATSDBCLASS;
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.Multimap;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.security.AccessControlException;
@@ -52,7 +50,6 @@ import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Collectors;
-import com.google.common.collect.Lists;
import org.antlr.runtime.ClassicToken;
import org.antlr.runtime.CommonToken;
import org.antlr.runtime.Token;
@@ -290,7 +287,10 @@ import org.apache.hadoop.security.UserGroupInformation;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
+import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
+import com.google.common.collect.Multimap;
import com.google.common.collect.Sets;
import com.google.common.math.IntMath;
import com.google.common.math.LongMath;
@@ -12146,9 +12146,18 @@ public class SemanticAnalyzer extends BaseSemanticAnalyzer {
basicInfos.put(new HivePrivilegeObject(table.getDbName(), table.getTableName(), colNames), null);
}
} else {
- List<String> colNames = new ArrayList<>();
- List<String> colTypes = new ArrayList<>();
- extractColumnInfos(table, colNames, colTypes);
+ List<String> colNames;
+ List<String> colTypes;
+ if (isCBOExecuted() && this.columnAccessInfo != null &&
+ (colNames = this.columnAccessInfo.getTableToColumnAllAccessMap().get(table.getCompleteName())) != null) {
+ Map<String, String> colNameToType = table.getAllCols().stream()
+ .collect(Collectors.toMap(FieldSchema::getName, FieldSchema::getType));
+ colTypes = colNames.stream().map(colNameToType::get).collect(Collectors.toList());
+ } else {
+ colNames = new ArrayList<>();
+ colTypes = new ArrayList<>();
+ extractColumnInfos(table, colNames, colTypes);
+ }
basicInfos.put(new HivePrivilegeObject(table.getDbName(), table.getTableName(), colNames),
new MaskAndFilterInfo(colTypes, additionalTabInfo.toString(), alias, astNode, table.isView(), table.isNonNative()));
diff --git a/ql/src/test/results/clientpositive/llap/column_access_stats.q.out b/ql/src/test/results/clientpositive/llap/column_access_stats.q.out
index e418823..d3e9a10 100644
--- a/ql/src/test/results/clientpositive/llap/column_access_stats.q.out
+++ b/ql/src/test/results/clientpositive/llap/column_access_stats.q.out
@@ -884,10 +884,10 @@ PREHOOK: Input: default@t1_n127
PREHOOK: Input: default@t2_n75
PREHOOK: Input: default@t3_n29
#### A masked pattern was here ####
-Table:default@t2_n75
+Table:default@t1_n127
Columns:key
-Table:default@t1_n127
+Table:default@t2_n75
Columns:key
Table:default@t3_n29
diff --git a/ql/src/test/results/clientpositive/spark/column_access_stats.q.out b/ql/src/test/results/clientpositive/spark/column_access_stats.q.out
index 5db89da..f37d13e 100644
--- a/ql/src/test/results/clientpositive/spark/column_access_stats.q.out
+++ b/ql/src/test/results/clientpositive/spark/column_access_stats.q.out
@@ -836,10 +836,10 @@ PREHOOK: Input: default@t1_n127
PREHOOK: Input: default@t2_n75
PREHOOK: Input: default@t3_n29
#### A masked pattern was here ####
-Table:default@t2_n75
+Table:default@t1_n127
Columns:key
-Table:default@t1_n127
+Table:default@t2_n75
Columns:key
Table:default@t3_n29