You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by ni...@apache.org on 2019/01/29 10:26:44 UTC
[atlas] branch branch-0.8 updated: ATLAS-3033 :- Skip hive temp
table while getting Table object details from hiveContext.
This is an automated email from the ASF dual-hosted git repository.
nixon pushed a commit to branch branch-0.8
in repository https://gitbox.apache.org/repos/asf/atlas.git
The following commit(s) were added to refs/heads/branch-0.8 by this push:
new b5472ce ATLAS-3033 :- Skip hive temp table while getting Table object details from hiveContext.
b5472ce is described below
commit b5472ce313dcf24b0bffbe5201f0c95eec025848
Author: nixonrodrigues <ni...@apache.org>
AuthorDate: Wed Jan 23 16:39:17 2019 +0530
ATLAS-3033 :- Skip hive temp table while getting Table object details from hiveContext.
Change-Id: I314847c53dda47b3ed7907eb7fc2cf7277c69aca
---
.../atlas/hive/hook/AtlasHiveHookContext.java | 13 ++++++++++
.../java/org/apache/atlas/hive/hook/HiveHook.java | 29 +++++++++++++++++++++-
.../atlas/hive/hook/events/BaseHiveEvent.java | 29 ++++++++++++++++------
3 files changed, 62 insertions(+), 9 deletions(-)
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java
index 249f48b..65e6224 100644
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/AtlasHiveHookContext.java
@@ -33,6 +33,7 @@ import org.apache.hadoop.hive.ql.session.SessionState;
import java.util.Collection;
import java.util.HashMap;
+import java.util.List;
import java.util.Map;
@@ -101,6 +102,18 @@ public class AtlasHiveHookContext {
public PreprocessAction getPreprocessActionForHiveTable(String qualifiedName) {
return hook.getPreprocessActionForHiveTable(qualifiedName);
}
+
+ public List getIgnoreDummyDatabaseName() {
+ return hook.getIgnoreDummyDatabaseName();
+ }
+
+ public List getIgnoreDummyTableName() {
+ return hook.getIgnoreDummyTableName();
+ }
+
+ public String getIgnoreValuesTmpTableNamePrefix() {
+ return hook.getIgnoreValuesTmpTableNamePrefix();
+ }
public String getQualifiedName(Database db) {
return (db.getName() + QNAME_SEP_CLUSTER_NAME).toLowerCase() + getClusterName();
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
index 4a6b417..e431e7a 100644
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
@@ -26,6 +26,7 @@ import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.hive.ql.hooks.ExecuteWithHookContext;
import org.apache.hadoop.hive.ql.hooks.HookContext;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.hive.shims.Utils;
import org.apache.hadoop.security.UserGroupInformation;
@@ -41,7 +42,8 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
-
+import java.util.List;
+import java.util.ArrayList;
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.ATTRIBUTE_QUALIFIED_NAME;
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_DB;
import static org.apache.atlas.hive.hook.events.BaseHiveEvent.HIVE_TYPE_TABLE;
@@ -81,6 +83,9 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
private static final List<Pattern> hiveTablesToIgnore = new ArrayList<>();
private static final List<Pattern> hiveTablesToPrune = new ArrayList<>();
private static final Map<String, PreprocessAction> hiveTablesCache;
+ private static final List ignoreDummyDatabaseName;
+ private static final List ignoreDummyTableName;
+ private static final String ignoreValuesTmpTableNamePrefix;
private static HiveHookObjectNamesCache knownObjects = null;
@@ -134,6 +139,17 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
}
knownObjects = nameCacheEnabled ? new HiveHookObjectNamesCache(nameCacheDatabaseMaxCount, nameCacheTableMaxCount, nameCacheRebuildIntervalSeconds) : null;
+
+ List<String> defaultDummyDatabase = new ArrayList<>();
+ defaultDummyDatabase.add(SemanticAnalyzer.DUMMY_DATABASE);
+
+ List<String> defaultDummyTable = new ArrayList<>();
+ defaultDummyTable.add(SemanticAnalyzer.DUMMY_TABLE);
+
+ ignoreDummyDatabaseName = atlasProperties.getList("atlas.hook.hive.ignore.dummy.database.name", defaultDummyDatabase);
+ ignoreDummyTableName = atlasProperties.getList("atlas.hook.hive.ignore.dummy.table.name", defaultDummyTable);
+ ignoreValuesTmpTableNamePrefix = atlasProperties.getString("atlas.hook.hive.ignore.values.tmp.table.name.prefix", "Values__Tmp__Table__" );
+
}
@@ -287,6 +303,17 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
return ret;
}
+ public List getIgnoreDummyDatabaseName() {
+ return ignoreDummyDatabaseName;
+ }
+
+ public List getIgnoreDummyTableName() {
+ return ignoreDummyTableName;
+ }
+
+ public String getIgnoreValuesTmpTableNamePrefix() {
+ return ignoreValuesTmpTableNamePrefix;
+ }
public static class HiveHookObjectNamesCache {
private final int dbMaxCacheCount;
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java
index 5c52cf4..eae88bc 100644
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java
@@ -42,6 +42,7 @@ import org.apache.hadoop.hive.ql.hooks.LineageInfo.DependencyKey;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.parse.SemanticAnalyzer;
import org.apache.hadoop.hive.ql.plan.HiveOperation;
import org.apache.hadoop.security.UserGroupInformation;
import org.slf4j.Logger;
@@ -215,19 +216,31 @@ public abstract class BaseHiveEvent {
switch (entity.getType()) {
case DATABASE: {
- Database db = getHive().getDatabase(entity.getDatabase().getName());
-
- ret = toDbEntity(db);
+ if (!context.getIgnoreDummyDatabaseName().contains(entity.getDatabase().getName())) {
+ Database db = getHive().getDatabase(entity.getDatabase().getName());
+ ret = toDbEntity(db);
+ }
}
break;
case TABLE:
case PARTITION: {
- Table table = getHive().getTable(entity.getTable().getDbName(), entity.getTable().getTableName());
- ret = toTableEntity(table, entityExtInfo);
- }
- break;
+ String dbName = entity.getTable().getDbName();
+ String tableName = entity.getTable().getTableName();
+ boolean skipTable = StringUtils.isNotEmpty(context.getIgnoreValuesTmpTableNamePrefix()) && tableName.toLowerCase().startsWith(context.getIgnoreValuesTmpTableNamePrefix());
+
+ if (!skipTable) {
+ skipTable = context.getIgnoreDummyTableName().contains(tableName) && context.getIgnoreDummyDatabaseName().contains(dbName);
+ }
+
+ if (!skipTable) {
+ Table table = getHive().getTable(dbName, tableName);
+
+ ret = toTableEntity(table, entityExtInfo);
+ }
+ }
+ break;
case DFS_DIR: {
URI location = entity.getLocation();
@@ -239,7 +252,7 @@ public abstract class BaseHiveEvent {
break;
default:
- break;
+ break;
}
return ret;