You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@nifi.apache.org by pv...@apache.org on 2022/03/10 12:16:52 UTC
[nifi] branch main updated: NIFI-9764: Atlas reporting task sends 'unknown' hive_table when table is name not available
This is an automated email from the ASF dual-hosted git repository.
pvillard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/nifi.git
The following commit(s) were added to refs/heads/main by this push:
new 92202a5 NIFI-9764: Atlas reporting task sends 'unknown' hive_table when table is name not available
92202a5 is described below
commit 92202a5b9502609c6d26f7b5ee299bfc36441fb1
Author: Peter Turcsanyi <tu...@apache.org>
AuthorDate: Fri Mar 4 21:42:34 2022 +0100
NIFI-9764: Atlas reporting task sends 'unknown' hive_table when table is name not available
Signed-off-by: Pierre Villard <pi...@gmail.com>
This closes #5839.
---
.../nifi/atlas/provenance/analyzer/Hive2JDBC.java | 27 ++++++++++++----------
.../atlas/provenance/analyzer/TestHive2JDBC.java | 11 +++++----
2 files changed, 21 insertions(+), 17 deletions(-)
diff --git a/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/java/org/apache/nifi/atlas/provenance/analyzer/Hive2JDBC.java b/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/java/org/apache/nifi/atlas/provenance/analyzer/Hive2JDBC.java
index 51babe8..886cdd0 100644
--- a/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/java/org/apache/nifi/atlas/provenance/analyzer/Hive2JDBC.java
+++ b/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/main/java/org/apache/nifi/atlas/provenance/analyzer/Hive2JDBC.java
@@ -26,6 +26,7 @@ import org.apache.nifi.util.Tuple;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import java.util.Collections;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -61,6 +62,8 @@ public class Hive2JDBC extends AbstractHiveAnalyzer {
private static final String URI_PATTERN_STR = "jdbc:hive2://([^/]+)/?(.*)$";
private static final Pattern URI_PATTERN = Pattern.compile(URI_PATTERN_STR);
+ private static final String UNKNOWN_TABLE = "unknown";
+
@Override
public DataSetRefs analyze(AnalysisContext context, ProvenanceEventRecord event) {
@@ -72,7 +75,7 @@ public class Hive2JDBC extends AbstractHiveAnalyzer {
final Matcher uriMatcher = URI_PATTERN.matcher(transitUri);
if (!uriMatcher.matches()) {
- logger.warn("Unexpected transit URI: {}", new Object[]{transitUri});
+ logger.warn("Unexpected transit URI: {}", transitUri);
return null;
}
@@ -89,13 +92,20 @@ public class Hive2JDBC extends AbstractHiveAnalyzer {
connectedDatabaseName = "default";
}
- final Set<Tuple<String, String>> inputTables = parseTableNames(connectedDatabaseName, event.getAttribute(ATTR_INPUT_TABLES));
- final Set<Tuple<String, String>> outputTables = parseTableNames(connectedDatabaseName, event.getAttribute(ATTR_OUTPUT_TABLES));
+ Set<Tuple<String, String>> inputTables = parseTableNames(connectedDatabaseName, event.getAttribute(ATTR_INPUT_TABLES));
+ Set<Tuple<String, String>> outputTables = parseTableNames(connectedDatabaseName, event.getAttribute(ATTR_OUTPUT_TABLES));
if (inputTables.isEmpty() && outputTables.isEmpty()) {
- // If input/output tables are unknown, create database level lineage.
+ // If input/output tables are unknown, create hive_table entity with name 'unknown' (hive_db is not a DataSet entity and therefore it cannot be used in the lineage).
// Handle case insensitivity of database and table names in Hive: send names uniformly in lower case
- return getDatabaseRef(event.getComponentId(), event.getEventType(), namespace, connectedDatabaseName.toLowerCase());
+ final ProvenanceEventType eventType = event.getEventType();
+ if (eventType == ProvenanceEventType.RECEIVE || eventType == ProvenanceEventType.FETCH) {
+ logger.warn("Input table name is missing, defaults to '{}'. Transit URI: {}", UNKNOWN_TABLE, transitUri);
+ inputTables = Collections.singleton(new Tuple<>(connectedDatabaseName.toLowerCase(), UNKNOWN_TABLE));
+ } else if (eventType == ProvenanceEventType.SEND) {
+ logger.warn("Output table name is missing, defaults to '{}'. Transit URI: {}", UNKNOWN_TABLE, transitUri);
+ outputTables = Collections.singleton(new Tuple<>(connectedDatabaseName.toLowerCase(), UNKNOWN_TABLE));
+ }
}
final DataSetRefs refs = new DataSetRefs(event.getComponentId());
@@ -104,13 +114,6 @@ public class Hive2JDBC extends AbstractHiveAnalyzer {
return refs;
}
- private DataSetRefs getDatabaseRef(String componentId, ProvenanceEventType eventType,
- String namespace, String databaseName) {
- final Referenceable ref = createDatabaseRef(namespace, databaseName);
-
- return singleDataSetRef(componentId, eventType, ref);
- }
-
private void addRefs(DataSetRefs refs, boolean isInput, String namespace,
Set<Tuple<String, String>> tableNames) {
tableNames.forEach(tableName -> {
diff --git a/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/test/java/org/apache/nifi/atlas/provenance/analyzer/TestHive2JDBC.java b/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/test/java/org/apache/nifi/atlas/provenance/analyzer/TestHive2JDBC.java
index f6f32a8..7e647c6 100644
--- a/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/test/java/org/apache/nifi/atlas/provenance/analyzer/TestHive2JDBC.java
+++ b/nifi-nar-bundles/nifi-atlas-bundle/nifi-atlas-reporting-task/src/test/java/org/apache/nifi/atlas/provenance/analyzer/TestHive2JDBC.java
@@ -45,10 +45,11 @@ public class TestHive2JDBC {
/**
* If a provenance event does not have table name attributes,
- * then a database lineage should be created.
+ * then a table lineage is created with table name 'unknown'.
+ * Database lineage cannot be sent to Atlas because hive_db is not a DataSet entity.
*/
@Test
- public void testDatabaseLineage() {
+ public void testUnknownTableLineage() {
final String processorName = "PutHiveQL";
final String transitUri = "jdbc:hive2://0.example.com:10000/database_A";
final ProvenanceEventRecord record = Mockito.mock(ProvenanceEventRecord.class);
@@ -69,9 +70,9 @@ public class TestHive2JDBC {
assertEquals(0, refs.getInputs().size());
assertEquals(1, refs.getOutputs().size());
Referenceable ref = refs.getOutputs().iterator().next();
- assertEquals("hive_db", ref.getTypeName());
- assertEquals("database_a", ref.get(ATTR_NAME));
- assertEquals("database_a@namespace1", ref.get(ATTR_QUALIFIED_NAME));
+ assertEquals("hive_table", ref.getTypeName());
+ assertEquals("unknown", ref.get(ATTR_NAME));
+ assertEquals("database_a.unknown@namespace1", ref.get(ATTR_QUALIFIED_NAME));
}
/**