You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by sa...@apache.org on 2021/06/03 16:33:52 UTC
[atlas] branch master updated: ATLAS-4324: FS entity created for
load data inpath is created as shell entity
This is an automated email from the ASF dual-hosted git repository.
sarath pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/atlas.git
The following commit(s) were added to refs/heads/master by this push:
new a9e528b ATLAS-4324: FS entity created for load data inpath is created as shell entity
a9e528b is described below
commit a9e528bca9277707a1847d110c33288188a8fff5
Author: Radhika Kundam <rk...@cloudera.com>
AuthorDate: Thu Jun 3 00:10:12 2021 -0700
ATLAS-4324: FS entity created for load data inpath is created as shell entity
Signed-off-by: Sarath Subramanian <sa...@apache.org>
---
.../java/org/apache/atlas/hive/hook/HiveHook.java | 1 +
.../atlas/hive/hook/utils/HiveDDLEntityFilter.java | 47 +++++++++++---
.../hive/hook/utils/ActiveEntityFilterTest.java | 34 +++++++++-
.../src/test/resources/json/hs2-create-db-v2.json | 24 +++++++
...eate-db-with-no-pathentities-to-retain-v2.json} | 0
...2-create-db-with-no-pathentities-to-retain.json | 73 ++++++++++++++++++++++
.../test/resources/json/hs2-create-table-v2.json | 17 ++++-
.../test/resources/json/hs2-load-inpath-v2.json | 18 ++++++
...d-inpath-with-no-pathentities-to-retain-v2.json | 4 ++
...load-inpath-with-no-pathentities-to-retain.json | 18 ++++++
.../src/test/resources/json/hs2-load-inpath.json | 18 ++++++
11 files changed, 240 insertions(+), 14 deletions(-)
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
index cf918ef..94ef225 100644
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
@@ -70,6 +70,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
public static final String HOOK_HIVE_TABLE_PRUNE_PATTERN = CONF_PREFIX + "hive_table.prune.pattern";
public static final String HOOK_HIVE_TABLE_CACHE_SIZE = CONF_PREFIX + "hive_table.cache.size";
public static final String HOOK_HIVE_IGNORE_DDL_OPERATIONS = CONF_PREFIX + "hs2.ignore.ddl.operations";
+ public static final String HOOK_HIVE_FILTER_ENTITY_TYPES_TO_RETAIN = CONF_PREFIX + "hs2.filter.entity.types.to.retain";
public static final String DEFAULT_HOST_NAME = "localhost";
private static final Map<String, HiveOperation> OPERATION_MAP = new HashMap<>();
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/HiveDDLEntityFilter.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/HiveDDLEntityFilter.java
index 2b39e81..9163c47 100644
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/HiveDDLEntityFilter.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/utils/HiveDDLEntityFilter.java
@@ -18,15 +18,20 @@
package org.apache.atlas.hive.hook.utils;
import com.google.common.annotations.VisibleForTesting;
+import org.apache.atlas.ApplicationProperties;
+import org.apache.atlas.hive.hook.HiveHook;
import org.apache.atlas.hive.hook.events.BaseHiveEvent;
import org.apache.atlas.model.instance.AtlasEntity;
import org.apache.atlas.model.instance.AtlasObjectId;
-import org.apache.atlas.model.instance.AtlasRelatedObjectId;
import org.apache.atlas.model.notification.HookNotification;
+import org.apache.atlas.utils.AtlasPathExtractorUtil;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.MapUtils;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
@@ -36,14 +41,38 @@ import java.util.function.Function;
import java.util.stream.Collectors;
public class HiveDDLEntityFilter implements EntityFilter {
+ private static final Logger LOG = LoggerFactory.getLogger(HiveDDLEntityFilter.class);
+
+ private static final Set<String> defaultPathTypes = new HashSet<String>() {{
+ add(AtlasPathExtractorUtil.HDFS_TYPE_PATH);
+ add(AtlasPathExtractorUtil.ADLS_GEN2_DIRECTORY);
+ add(AtlasPathExtractorUtil.GCS_VIRTUAL_DIR);
+ add(AtlasPathExtractorUtil.AWS_S3_V2_PSEUDO_DIR);
+ add(AtlasPathExtractorUtil.AWS_S3_PSEUDO_DIR);
+ }};
+
private static final Set<String> typesToRetain = new HashSet<String>() {{
add(BaseHiveEvent.HIVE_TYPE_PROCESS);
add(BaseHiveEvent.HIVE_TYPE_PROCESS_EXECUTION);
add(BaseHiveEvent.HIVE_TYPE_COLUMN_LINEAGE);
add(BaseHiveEvent.HIVE_DB_DDL);
add(BaseHiveEvent.HIVE_TABLE_DDL);
+ addAll(defaultPathTypes);
+ addAll(getConfiguredTypesToRetain());
}};
+ private static List<String> getConfiguredTypesToRetain() {
+ String[] configuredTypesToRetain = null;
+
+ try {
+ configuredTypesToRetain = ApplicationProperties.get().getStringArray(HiveHook.HOOK_HIVE_FILTER_ENTITY_TYPES_TO_RETAIN);
+ } catch (Exception e) {
+ LOG.error("Failed to load application properties", e);
+ }
+
+ return configuredTypesToRetain != null ? Arrays.asList(configuredTypesToRetain) : new ArrayList<>();
+ }
+
public List<HookNotification> apply(List<HookNotification> incoming) {
if (CollectionUtils.isEmpty(incoming)) {
return incoming;
@@ -164,15 +193,11 @@ public class HiveDDLEntityFilter implements EntityFilter {
}
private static void inferObjectTypeResetGuid(Object o) {
- if (o instanceof AtlasRelatedObjectId) {
- AtlasRelatedObjectId oid = (AtlasRelatedObjectId) o;
- if (oid.getUniqueAttributes() != null) {
- oid.setGuid(null);
- }
- }
if (o instanceof AtlasObjectId) {
- AtlasObjectId oid = (AtlasObjectId) o;
- if (oid.getUniqueAttributes() != null) {
+ AtlasObjectId oid = (AtlasObjectId) o;
+ String typeName = oid.getTypeName();
+
+ if (oid.getUniqueAttributes() != null && !defaultPathTypes.contains(typeName)) {
oid.setGuid(null);
}
} else {
@@ -181,7 +206,9 @@ public class HiveDDLEntityFilter implements EntityFilter {
return;
}
- if (hm.containsKey(BaseHiveEvent.ATTRIBUTE_UNIQUE_ATTRIBUTES)) {
+ String typeName = hm.containsKey(AtlasObjectId.KEY_TYPENAME) ? (String) hm.get(AtlasObjectId.KEY_TYPENAME) : null;
+
+ if (hm.containsKey(BaseHiveEvent.ATTRIBUTE_UNIQUE_ATTRIBUTES) && !defaultPathTypes.contains(typeName)) {
hm.put(BaseHiveEvent.ATTRIBUTE_GUID, null);
}
}
diff --git a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/utils/ActiveEntityFilterTest.java b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/utils/ActiveEntityFilterTest.java
index 6a7af5e..4dde1dc 100644
--- a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/utils/ActiveEntityFilterTest.java
+++ b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/utils/ActiveEntityFilterTest.java
@@ -35,6 +35,7 @@ import java.util.Map;
import java.util.Set;
import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNotEquals;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertTrue;
@@ -55,6 +56,9 @@ public class ActiveEntityFilterTest {
assertMessageFromFile("hs2-alter-view");
assertMessageFromFile("hs2-drop-table");
assertAtlasEntitiesWithExtInfoFromFile("hs2-create-process");
+ assertAtlasEntitiesWithExtInfoFromFile("hs2-load-inpath");
+ assertAtlasEntitiesWithExtInfoFromFile("hs2-create-db-with-no-pathentities-to-retain", false);
+ assertAtlasEntitiesWithExtInfoFromFile("hs2-load-inpath-with-no-pathentities-to-retain", false);
}
private void assertMessageFromFile(String msgFile) throws IOException {
@@ -135,20 +139,31 @@ public class ActiveEntityFilterTest {
}
private void assertAtlasEntitiesWithExtInfoFromFile(String entityFile) throws IOException {
+ assertAtlasEntitiesWithExtInfoFromFile(entityFile, true);
+ }
+
+ private void assertAtlasEntitiesWithExtInfoFromFile(String entityFile, boolean retainPathEntities) throws IOException {
AtlasEntity.AtlasEntitiesWithExtInfo incoming = TestResourceFileUtils.readObjectFromJson("", entityFile, AtlasEntity.AtlasEntitiesWithExtInfo.class);
AtlasEntity.AtlasEntitiesWithExtInfo expected = TestResourceFileUtils.readObjectFromJson("", entityFile + FILE_SUFFIX, AtlasEntity.AtlasEntitiesWithExtInfo.class);
HiveDDLEntityFilter hiveLineageEntityFilter = new HiveDDLEntityFilter();
AtlasEntity.AtlasEntitiesWithExtInfo actual = hiveLineageEntityFilter.apply(incoming);
- assertAtlasEntitiesWithExtInfo(actual, expected);
+
+ if (retainPathEntities) {
+ assertAtlasEntitiesWithExtInfo(actual, expected);
+ } else {
+ assertAtlasEntitiesWithNoPathEntitiesToRetain(actual, expected);
+ }
}
private void assertAtlasEntitiesWithExtInfo(AtlasEntity.AtlasEntitiesWithExtInfo actual, AtlasEntity.AtlasEntitiesWithExtInfo expected) {
assertNotNull(actual);
assertNotNull(expected);
- assertEquals(actual.getEntities().size(), expected.getEntities().size());
- assertEntity(actual.getEntities(), expected.getEntities());
+ if (expected.getEntities() != null && actual.getEntities() != null) {
+ assertEquals(actual.getEntities().size(), expected.getEntities().size());
+ assertEntity(actual.getEntities(), expected.getEntities());
+ }
assertEquals(MapUtils.isEmpty(actual.getReferredEntities()), MapUtils.isEmpty(expected.getReferredEntities()));
if (expected.getReferredEntities() != null && actual.getReferredEntities() != null) {
@@ -156,6 +171,19 @@ public class ActiveEntityFilterTest {
}
}
+ private void assertAtlasEntitiesWithNoPathEntitiesToRetain(AtlasEntity.AtlasEntitiesWithExtInfo actual, AtlasEntity.AtlasEntitiesWithExtInfo expected) {
+ assertNotNull(actual);
+ assertNotNull(expected);
+
+ if (expected.getEntities() != null && actual.getEntities() != null) {
+ assertNotEquals(actual.getEntities().size(), expected.getEntities().size());
+ }
+
+ if (expected.getReferredEntities() != null && actual.getReferredEntities() != null) {
+ assertNotEquals(actual.getReferredEntities().size(), expected.getReferredEntities().size());
+ }
+ }
+
private void assertEntity(Map<String, AtlasEntity> actual, Map<String, AtlasEntity> expected) {
assertEquals(actual.size(), expected.size());
}
diff --git a/addons/hive-bridge/src/test/resources/json/hs2-create-db-v2.json b/addons/hive-bridge/src/test/resources/json/hs2-create-db-v2.json
index 42553b5..881ee10 100644
--- a/addons/hive-bridge/src/test/resources/json/hs2-create-db-v2.json
+++ b/addons/hive-bridge/src/test/resources/json/hs2-create-db-v2.json
@@ -25,6 +25,30 @@
}
},
"proxy": false
+ },
+ {
+ "typeName": "hdfs_path",
+ "attributes": {
+ "path": "hdfs://ve0126.halxg.cloudera.com:8020/warehouse/tablespace/external/hive/cadb02.db",
+ "qualifiedName": "hdfs://ve0126.halxg.cloudera.com:8020/warehouse/tablespace/external/hive/cadb02.db@cm",
+ "clusterName": "cm",
+ "name": "/warehouse/tablespace/external/hive/cadb02.db"
+ },
+ "guid": "-14529329955589450",
+ "isIncomplete": false,
+ "provenanceType": 0,
+ "version": 0,
+ "relationshipAttributes": {
+ "hiveDb": {
+ "guid": "-14529329955589448",
+ "typeName": "hive_db",
+ "uniqueAttributes": {
+ "qualifiedName": "cadb02@cm"
+ },
+ "relationshipType": "hive_db_location"
+ }
+ },
+ "proxy": false
}
]
}
diff --git a/addons/hive-bridge/src/test/resources/json/hs2-create-db-v2.json b/addons/hive-bridge/src/test/resources/json/hs2-create-db-with-no-pathentities-to-retain-v2.json
similarity index 100%
copy from addons/hive-bridge/src/test/resources/json/hs2-create-db-v2.json
copy to addons/hive-bridge/src/test/resources/json/hs2-create-db-with-no-pathentities-to-retain-v2.json
diff --git a/addons/hive-bridge/src/test/resources/json/hs2-create-db-with-no-pathentities-to-retain.json b/addons/hive-bridge/src/test/resources/json/hs2-create-db-with-no-pathentities-to-retain.json
new file mode 100644
index 0000000..a5b810f
--- /dev/null
+++ b/addons/hive-bridge/src/test/resources/json/hs2-create-db-with-no-pathentities-to-retain.json
@@ -0,0 +1,73 @@
+{
+ "referredEntities": {},
+ "entities": [
+ {
+ "typeName": "hive_db",
+ "attributes": {
+ "owner": "hive",
+ "ownerType": "USER",
+ "managedLocation": null,
+ "qualifiedName": "cadb02@cm",
+ "clusterName": "cm",
+ "name": "cadb02",
+ "location": "hdfs://ve0126.halxg.cloudera.com:8020/warehouse/tablespace/external/hive/cadb02.db",
+ "parameters": {}
+ },
+ "guid": "-14529329955589448",
+ "isIncomplete": false,
+ "provenanceType": 0,
+ "version": 0,
+ "proxy": false
+ },
+ {
+ "typeName": "hive_db_ddl",
+ "attributes": {
+ "serviceType": "hive",
+ "qualifiedName": "cadb02@cm:1616450673617",
+ "execTime": 1616450673617,
+ "queryText": "create database cadb02",
+ "name": "create database cadb02",
+ "userName": "hive"
+ },
+ "guid": "-14529329955589449",
+ "isIncomplete": false,
+ "provenanceType": 0,
+ "version": 0,
+ "relationshipAttributes": {
+ "db": {
+ "guid": "-14529329955589448",
+ "typeName": "hive_db",
+ "uniqueAttributes": {
+ "qualifiedName": "cadb02@cm"
+ },
+ "relationshipType": "hive_db_ddl_queries"
+ }
+ },
+ "proxy": false
+ },
+ {
+ "typeName": "hdfs_path",
+ "attributes": {
+ "path": "hdfs://ve0126.halxg.cloudera.com:8020/warehouse/tablespace/external/hive/cadb02.db",
+ "qualifiedName": "hdfs://ve0126.halxg.cloudera.com:8020/warehouse/tablespace/external/hive/cadb02.db@cm",
+ "clusterName": "cm",
+ "name": "/warehouse/tablespace/external/hive/cadb02.db"
+ },
+ "guid": "-14529329955589450",
+ "isIncomplete": false,
+ "provenanceType": 0,
+ "version": 0,
+ "relationshipAttributes": {
+ "hiveDb": {
+ "guid": "-14529329955589448",
+ "typeName": "hive_db",
+ "uniqueAttributes": {
+ "qualifiedName": "cadb02@cm"
+ },
+ "relationshipType": "hive_db_location"
+ }
+ },
+ "proxy": false
+ }
+ ]
+}
diff --git a/addons/hive-bridge/src/test/resources/json/hs2-create-table-v2.json b/addons/hive-bridge/src/test/resources/json/hs2-create-table-v2.json
index b67f2ff..801918e 100644
--- a/addons/hive-bridge/src/test/resources/json/hs2-create-table-v2.json
+++ b/addons/hive-bridge/src/test/resources/json/hs2-create-table-v2.json
@@ -1,5 +1,20 @@
{
- "referredEntities": {},
+ "referredEntities": {
+ "-14529329955589455": {
+ "typeName": "hdfs_path",
+ "attributes": {
+ "path": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv",
+ "qualifiedName": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv@cm",
+ "clusterName": "cm",
+ "name": "/tmp/external/hh6.csv"
+ },
+ "guid": "-14529329955589455",
+ "isIncomplete": false,
+ "provenanceType": 0,
+ "version": 0,
+ "proxy": false
+ }
+ },
"entities": [
{
"typeName": "hive_process",
diff --git a/addons/hive-bridge/src/test/resources/json/hs2-load-inpath-v2.json b/addons/hive-bridge/src/test/resources/json/hs2-load-inpath-v2.json
new file mode 100644
index 0000000..dd31aa0
--- /dev/null
+++ b/addons/hive-bridge/src/test/resources/json/hs2-load-inpath-v2.json
@@ -0,0 +1,18 @@
+{
+ "referredEntities": {
+ "-14529329955589455": {
+ "typeName": "hdfs_path",
+ "attributes": {
+ "path": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv",
+ "qualifiedName": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv@cm",
+ "clusterName": "cm",
+ "name": "/tmp/external/hh6.csv"
+ },
+ "guid": "-14529329955589455",
+ "isIncomplete": false,
+ "provenanceType": 0,
+ "version": 0,
+ "proxy": false
+ }
+ }
+}
\ No newline at end of file
diff --git a/addons/hive-bridge/src/test/resources/json/hs2-load-inpath-with-no-pathentities-to-retain-v2.json b/addons/hive-bridge/src/test/resources/json/hs2-load-inpath-with-no-pathentities-to-retain-v2.json
new file mode 100644
index 0000000..7f90d19
--- /dev/null
+++ b/addons/hive-bridge/src/test/resources/json/hs2-load-inpath-with-no-pathentities-to-retain-v2.json
@@ -0,0 +1,4 @@
+{
+ "referredEntities": {
+ }
+}
\ No newline at end of file
diff --git a/addons/hive-bridge/src/test/resources/json/hs2-load-inpath-with-no-pathentities-to-retain.json b/addons/hive-bridge/src/test/resources/json/hs2-load-inpath-with-no-pathentities-to-retain.json
new file mode 100644
index 0000000..dd31aa0
--- /dev/null
+++ b/addons/hive-bridge/src/test/resources/json/hs2-load-inpath-with-no-pathentities-to-retain.json
@@ -0,0 +1,18 @@
+{
+ "referredEntities": {
+ "-14529329955589455": {
+ "typeName": "hdfs_path",
+ "attributes": {
+ "path": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv",
+ "qualifiedName": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv@cm",
+ "clusterName": "cm",
+ "name": "/tmp/external/hh6.csv"
+ },
+ "guid": "-14529329955589455",
+ "isIncomplete": false,
+ "provenanceType": 0,
+ "version": 0,
+ "proxy": false
+ }
+ }
+}
\ No newline at end of file
diff --git a/addons/hive-bridge/src/test/resources/json/hs2-load-inpath.json b/addons/hive-bridge/src/test/resources/json/hs2-load-inpath.json
new file mode 100644
index 0000000..dd31aa0
--- /dev/null
+++ b/addons/hive-bridge/src/test/resources/json/hs2-load-inpath.json
@@ -0,0 +1,18 @@
+{
+ "referredEntities": {
+ "-14529329955589455": {
+ "typeName": "hdfs_path",
+ "attributes": {
+ "path": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv",
+ "qualifiedName": "hdfs://ve0126.halxg.cloudera.com:8020/tmp/external/hh6.csv@cm",
+ "clusterName": "cm",
+ "name": "/tmp/external/hh6.csv"
+ },
+ "guid": "-14529329955589455",
+ "isIncomplete": false,
+ "provenanceType": 0,
+ "version": 0,
+ "proxy": false
+ }
+ }
+}
\ No newline at end of file