You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by sa...@apache.org on 2019/10/23 20:52:07 UTC
[atlas] branch branch-2.0 updated: ATLAS-3461: Changed from
hardcoded match to pattern bases regex match ATLAS-3461: UT Added
This is an automated email from the ASF dual-hosted git repository.
sarath pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/atlas.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 25c8d2d ATLAS-3461: Changed from hardcoded match to pattern bases regex match ATLAS-3461: UT Added
25c8d2d is described below
commit 25c8d2d553c317b06395e015b3e3aa55b61eb8af
Author: Sid <si...@gmail.com>
AuthorDate: Sun Oct 13 12:23:14 2019 -0700
ATLAS-3461: Changed from hardcoded match to pattern bases regex match ATLAS-3461: UT Added
Signed-off-by: Sarath Subramanian <sa...@apache.org>
(cherry picked from commit f7df0f1b8173a509c0342928710ad2036759fe5c)
---
.../atlas/impala/hook/ImpalaOperationParser.java | 37 +++--
.../apache/atlas/impala/ImpalaLineageToolIT.java | 176 +++++++++++++++++++++
.../impalaAlterViewAsSelectWithCommentSpaces.json | 66 ++++++++
...impalaCreateTableAsSelectWithCommentSpaces.json | 66 ++++++++
.../impalaCreateViewWithCommentSpaces.json | 66 ++++++++
5 files changed, 398 insertions(+), 13 deletions(-)
diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaOperationParser.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaOperationParser.java
index b9dd894..98f3eed 100644
--- a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaOperationParser.java
+++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaOperationParser.java
@@ -20,32 +20,40 @@ package org.apache.atlas.impala.hook;
import org.apache.atlas.impala.model.ImpalaOperationType;
import org.apache.commons.lang.StringUtils;
+import java.util.regex.Pattern;
/**
* Parse an Impala query text and output the impala operation type
*/
public class ImpalaOperationParser {
+ private static final Pattern COMMENT_PATTERN = Pattern.compile("/\\*.*?\\*/", Pattern.DOTALL);
+
+ private static final Pattern CREATE_VIEW_PATTERN =
+ Pattern.compile("^[ ]*\\bcreate\\b.*\\bview\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
+
+ private static final Pattern CREATE_TABLE_AS_SELECT_PATTERN =
+ Pattern.compile("^[ ]*\\bcreate\\b.*\\btable\\b.*\\bas\\b.*\\bselect\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
+
+ private static final Pattern ALTER_VIEW_AS_SELECT_PATTERN =
+ Pattern.compile("^[ ]*\\balter\\b.*\\bview\\b.*\\bas.*\\bselect\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
+
+ private static final Pattern INSERT_SELECT_FROM_PATTERN =
+ Pattern.compile("^[ ]*\\binsert\\b.*\\b(into|overwrite)\\b.*\\bselect\\b.*\\bfrom\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
+
public ImpalaOperationParser() {
}
public static ImpalaOperationType getImpalaOperationType(String queryText) {
- // Impala does no generate lineage record for command "LOAD DATA INPATH"
- if (StringUtils.startsWithIgnoreCase(queryText, "create view")) {
+ // Impala does no generate lineage record for command "LOAD DATA IN PATH"
+ String queryTextWithNoComments = COMMENT_PATTERN.matcher(queryText).replaceAll("");
+ if (doesMatch(queryTextWithNoComments, CREATE_VIEW_PATTERN)) {
return ImpalaOperationType.CREATEVIEW;
- } else if (StringUtils.startsWithIgnoreCase(queryText, "create table") &&
- StringUtils.containsIgnoreCase(queryText, "as select")) {
+ } else if (doesMatch(queryTextWithNoComments, CREATE_TABLE_AS_SELECT_PATTERN)) {
return ImpalaOperationType.CREATETABLE_AS_SELECT;
- } else if (StringUtils.startsWithIgnoreCase(queryText, "alter view") &&
- StringUtils.containsIgnoreCase(queryText, "as select")) {
+ } else if (doesMatch(queryTextWithNoComments, ALTER_VIEW_AS_SELECT_PATTERN)) {
return ImpalaOperationType.ALTERVIEW_AS;
- } else if (StringUtils.containsIgnoreCase(queryText, "insert into") &&
- StringUtils.containsIgnoreCase(queryText, "select") &&
- StringUtils.containsIgnoreCase(queryText, "from")) {
- return ImpalaOperationType.QUERY;
- } else if (StringUtils.containsIgnoreCase(queryText,"insert overwrite") &&
- StringUtils.containsIgnoreCase(queryText, "select") &&
- StringUtils.containsIgnoreCase(queryText, "from")) {
+ } else if (doesMatch(queryTextWithNoComments, INSERT_SELECT_FROM_PATTERN)) {
return ImpalaOperationType.QUERY;
}
@@ -64,5 +72,8 @@ public class ImpalaOperationParser {
return ImpalaOperationType.UNKNOWN;
}
+ private static boolean doesMatch(final String queryText, final Pattern pattern) {
+ return pattern.matcher(queryText).matches();
+ }
}
\ No newline at end of file
diff --git a/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageToolIT.java b/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageToolIT.java
index dc4d133..53e9b12 100644
--- a/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageToolIT.java
+++ b/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageToolIT.java
@@ -102,6 +102,68 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
}
/**
+ * This tests is for create view query with extra comment and spaces added in between:
+ * 1) ImpalaLineageTool can parse one lineage file that contains " create view" command lineage
+ * 2) Lineage is sent to Atlas
+ * 3) Atlas can get this lineage from Atlas
+ */
+ @Test
+ public void testCreateViewWithCommentSpacesFromFile() {
+ // this file contains a single lineage record for "create view".
+ // It has table vertex with createTime
+ String IMPALA = dir + "impalaCreateViewWithCommentSpaces.json";
+ String IMPALA_WAL = dir + "WALimpala.wal";
+
+ List<ImpalaQuery> lineageList = new ArrayList<>();
+ ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook();
+
+ try {
+ // create database and tables to simulate Impala behavior that Impala updates metadata
+ // to HMS and HMSHook sends the metadata to Atlas, which has to happen before
+ // Atlas can handle lineage notification
+ String dbName = "db_8";
+ createDatabase(dbName);
+
+ String sourceTableName = "table_1";
+ createTable(dbName, sourceTableName,"(id string, count int)", false);
+
+ String targetTableName = "view_1";
+ createTable(dbName, targetTableName,"(count int, id string)", false);
+
+ // process lineage record, and send corresponding notification to Atlas
+ String[] args = new String[]{"-d", "./", "-p", "impala"};
+ ImpalaLineageTool toolInstance = new ImpalaLineageTool(args);
+ toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL);
+
+ // verify the process is saved in Atlas
+ // the value is from info in IMPALA_3
+ String createTime = new Long((long)(1554750072)*1000).toString();
+ String processQFName =
+ "db_8.view_1" + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE +
+ CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime;
+
+ processQFName = processQFName.toLowerCase();
+
+ String queryString = " create /* comment1 */ view db_8.view_1 as select /* comment2 */ count, id from db_8.table_1";
+ AtlasEntity processEntity1 = validateProcess(processQFName, queryString);
+ AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString);
+ AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
+ BaseImpalaEvent.ATTRIBUTE_PROCESS));
+ Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
+ Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
+
+ String guid = assertTableIsRegistered(dbName, targetTableName);
+ AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity();
+ List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+ assertNotNull(ddlQueries);
+ assertEquals(ddlQueries.size(), 1);
+ } catch (Exception e) {
+ System.out.print("Appending file error");
+ }
+ }
+
+ /**
* This tests
* 1) ImpalaLineageTool can parse one lineage file that contains "create view" command lineage,
* but there is no table vertex with createTime.
@@ -232,6 +294,63 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
}
/**
+ * This tests is based on extra comment and spaces adding to create table as select query
+ * 1) ImpalaLineageTool can parse one lineage file that contains "create table as select" command lineage,
+ * there is table vertex with createTime.
+ * 2) Lineage is sent to Atlas
+ * 3) Atlas can get this lineage from Atlas
+ */
+ @Test
+ public void testCreateTableAsSelectWithCommentSpacesFromFile() throws Exception {
+ String IMPALA = dir + "impalaCreateTableAsSelectWithCommentSpaces.json";
+ String IMPALA_WAL = dir + "WALimpala.wal";
+
+ ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook();
+
+ // create database and tables to simulate Impala behavior that Impala updates metadata
+ // to HMS and HMSHook sends the metadata to Atlas, which has to happen before
+ // Atlas can handle lineage notification
+ String dbName = "db_9";
+ createDatabase(dbName);
+
+ String sourceTableName = "table_1";
+ createTable(dbName, sourceTableName,"(id string, count int)", false);
+
+ String targetTableName = "table_2";
+ createTable(dbName, targetTableName,"(count int, id string)", false);
+
+ // process lineage record, and send corresponding notification to Atlas
+ String[] args = new String[]{"-d", "./", "-p", "impala"};
+ ImpalaLineageTool toolInstance = new ImpalaLineageTool(args);
+ toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL);
+
+ // verify the process is saved in Atlas
+ // the value is from info in IMPALA_4.
+ String createTime = new Long(TABLE_CREATE_TIME*1000).toString();
+ String processQFName =
+ dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE +
+ CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime;
+
+ processQFName = processQFName.toLowerCase();
+
+ String queryString = "create /* Test */ table " + dbName + "."
+ + targetTableName + " as /* Test */ select count, id from " + dbName + "." + sourceTableName;
+ AtlasEntity processEntity1 = validateProcess(processQFName, queryString);
+ AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString);
+ AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
+ BaseImpalaEvent.ATTRIBUTE_PROCESS));
+ Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
+ Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
+
+ String guid = assertTableIsRegistered(dbName, targetTableName);
+ AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity();
+ List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+ assertNotNull(ddlQueries);
+ assertEquals(ddlQueries.size(), 1);
+ }
+
+ /**
* This tests
* 1) ImpalaLineageTool can parse one lineage file that contains "alter view as select" command lineage,
* there is table vertex with createTime.
@@ -288,6 +407,63 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
}
/**
+ * This tests is for extra comment and spaces present in alter view as select query
+ * 1) ImpalaLineageTool can parse one lineage file that contains "alter view as select" command lineage,
+ * there is table vertex with createTime.
+ * 2) Lineage is sent to Atlas
+ * 3) Atlas can get this lineage from Atlas
+ */
+ @Test
+ public void testAlterViewAsSelectWithCommentSpacesFromFile() throws Exception {
+ String IMPALA = dir + "impalaAlterViewAsSelectWithCommentSpaces.json";
+ String IMPALA_WAL = dir + "WALimpala.wal";
+
+ ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook();
+
+ // create database and tables to simulate Impala behavior that Impala updates metadata
+ // to HMS and HMSHook sends the metadata to Atlas, which has to happen before
+ // Atlas can handle lineage notification
+ String dbName = "db_10";
+ createDatabase(dbName);
+
+ String sourceTableName = "table_1";
+ createTable(dbName, sourceTableName,"(id string, count int)", false);
+
+ String targetTableName = "view_1";
+ createTable(dbName, targetTableName,"(count int, id string)", false);
+
+ // process lineage record, and send corresponding notification to Atlas
+ String[] args = new String[]{"-d", "./", "-p", "impala"};
+ ImpalaLineageTool toolInstance = new ImpalaLineageTool(args);
+ toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL);
+
+ // verify the process is saved in Atlas
+ // the value is from info in IMPALA_4.
+ String createTime = new Long(TABLE_CREATE_TIME*1000).toString();
+ String processQFName =
+ dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE +
+ CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime;
+
+ processQFName = processQFName.toLowerCase();
+
+ String queryString = "alter /* comment1 */ view " + dbName + "." + targetTableName
+ + " as select /* comment1 */ count, id from " + dbName + "." + sourceTableName;
+ AtlasEntity processEntity1 = validateProcess(processQFName, queryString);
+ AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString);
+ AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
+ BaseImpalaEvent.ATTRIBUTE_PROCESS));
+ Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
+ Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
+
+ String guid = assertTableIsRegistered(dbName, targetTableName);
+ AtlasEntity entity = atlasClientV2.getEntityByGuid(guid).getEntity();
+ List ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+ assertNotNull(ddlQueries);
+ assertEquals(ddlQueries.size(), 1);
+ }
+
+ /**
* This tests
* 1) ImpalaLineageTool can parse one lineage file that contains "insert into" command lineage,
* there is table vertex with createTime.
diff --git a/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelectWithCommentSpaces.json b/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelectWithCommentSpaces.json
new file mode 100644
index 0000000..322abb5
--- /dev/null
+++ b/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelectWithCommentSpaces.json
@@ -0,0 +1,66 @@
+{
+ "queryText":"alter /* comment1 */ view db_10.view_1 as select /* comment1 */ count, id from db_10.table_1",
+ "queryId":"3a441d0c130962f8:7f634aec00000000",
+ "hash":"64ff0425ccdfaada53e3f2fd76f566f7",
+ "user":"admin",
+ "timestamp":1554750072,
+ "endTime":1554750554,
+ "edges":[
+ {
+ "sources":[
+ 1
+ ],
+ "targets":[
+ 0
+ ],
+ "edgeType":"PROJECTION"
+ },
+ {
+ "sources":[
+ 3
+ ],
+ "targets":[
+ 2
+ ],
+ "edgeType":"PROJECTION"
+ }
+ ],
+ "vertices":[
+ {
+ "id":0,
+ "vertexType":"COLUMN",
+ "vertexId":"db_10.view_1.count",
+ "metadata": {
+ "tableName": "db_10.view_1",
+ "tableCreateTime": 1554750072
+ }
+ },
+ {
+ "id":1,
+ "vertexType":"COLUMN",
+ "vertexId":"db_10.table_1.count",
+ "metadata": {
+ "tableName": "db_10.table_1",
+ "tableCreateTime": 1554750070
+ }
+ },
+ {
+ "id":2,
+ "vertexType":"COLUMN",
+ "vertexId":"db_10.view_1.id",
+ "metadata": {
+ "tableName": "db_10.view_1",
+ "tableCreateTime": 1554750072
+ }
+ },
+ {
+ "id":3,
+ "vertexType":"COLUMN",
+ "vertexId":"db_10.table_1.id",
+ "metadata": {
+ "tableName": "db_10.table_1",
+ "tableCreateTime": 1554750070
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelectWithCommentSpaces.json b/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelectWithCommentSpaces.json
new file mode 100644
index 0000000..f588190
--- /dev/null
+++ b/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelectWithCommentSpaces.json
@@ -0,0 +1,66 @@
+{
+ "queryText":"create /* Test */ table db_9.table_2 as /* Test */ select count, id from db_9.table_1",
+ "queryId":"3a441d0c130962f8:7f634aec00000000",
+ "hash":"64ff0425ccdfaada53e3f2fd76f566f7",
+ "user":"admin",
+ "timestamp":1554750072,
+ "endTime":1554750554,
+ "edges":[
+ {
+ "sources":[
+ 1
+ ],
+ "targets":[
+ 0
+ ],
+ "edgeType":"PROJECTION"
+ },
+ {
+ "sources":[
+ 3
+ ],
+ "targets":[
+ 2
+ ],
+ "edgeType":"PROJECTION"
+ }
+ ],
+ "vertices":[
+ {
+ "id":0,
+ "vertexType":"COLUMN",
+ "vertexId":"db_9.table_2.count",
+ "metadata": {
+ "tableName": "db_9.table_2",
+ "tableCreateTime": 1554750072
+ }
+ },
+ {
+ "id":1,
+ "vertexType":"COLUMN",
+ "vertexId":"db_9.table_1.count",
+ "metadata": {
+ "tableName": "db_9.table_1",
+ "tableCreateTime": 1554750070
+ }
+ },
+ {
+ "id":2,
+ "vertexType":"COLUMN",
+ "vertexId":"db_9.table_2.id",
+ "metadata": {
+ "tableName": "db_9.table_2",
+ "tableCreateTime": 1554750072
+ }
+ },
+ {
+ "id":3,
+ "vertexType":"COLUMN",
+ "vertexId":"db_9.table_1.id",
+ "metadata": {
+ "tableName": "db_9.table_1",
+ "tableCreateTime": 1554750070
+ }
+ }
+ ]
+}
\ No newline at end of file
diff --git a/addons/impala-bridge/src/test/resources/impalaCreateViewWithCommentSpaces.json b/addons/impala-bridge/src/test/resources/impalaCreateViewWithCommentSpaces.json
new file mode 100644
index 0000000..e49b6b7
--- /dev/null
+++ b/addons/impala-bridge/src/test/resources/impalaCreateViewWithCommentSpaces.json
@@ -0,0 +1,66 @@
+{
+ "queryText":" create /* comment1 */ view db_8.view_1 as select /* comment2 */ count, id from db_8.table_1",
+ "queryId":"3a441d0c130962f8:7f634aec00000000",
+ "hash":"64ff0425ccdfaada53e3f2fd76f566f7",
+ "user":"admin",
+ "timestamp":1554750072,
+ "endTime":1554750554,
+ "edges":[
+ {
+ "sources":[
+ 1
+ ],
+ "targets":[
+ 0
+ ],
+ "edgeType":"PROJECTION"
+ },
+ {
+ "sources":[
+ 3
+ ],
+ "targets":[
+ 2
+ ],
+ "edgeType":"PROJECTION"
+ }
+ ],
+ "vertices":[
+ {
+ "id":0,
+ "vertexType":"COLUMN",
+ "vertexId":"db_8.view_1.count",
+ "metadata": {
+ "tableName": "db_8.view_1",
+ "tableCreateTime": 1554750072
+ }
+ },
+ {
+ "id":1,
+ "vertexType":"COLUMN",
+ "vertexId":"db_8.table_1.count",
+ "metadata": {
+ "tableName": "db_8.table_1",
+ "tableCreateTime": 1554750070
+ }
+ },
+ {
+ "id":2,
+ "vertexType":"COLUMN",
+ "vertexId":"db_8.view_1.id",
+ "metadata": {
+ "tableName": "db_8.view_1",
+ "tableCreateTime": 1554750072
+ }
+ },
+ {
+ "id":3,
+ "vertexType":"COLUMN",
+ "vertexId":"db_8.table_1.id",
+ "metadata": {
+ "tableName": "db_8.table_1",
+ "tableCreateTime": 1554750070
+ }
+ }
+ ]
+}
\ No newline at end of file