You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by sa...@apache.org on 2019/10/23 20:51:36 UTC

[atlas] branch master updated: ATLAS-3461: Changed from hardcoded match to pattern bases regex match ATLAS-3461: UT Added

This is an automated email from the ASF dual-hosted git repository.

sarath pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/atlas.git


The following commit(s) were added to refs/heads/master by this push:
     new f7df0f1  ATLAS-3461: Changed from hardcoded match to pattern bases regex match ATLAS-3461: UT Added
f7df0f1 is described below

commit f7df0f1b8173a509c0342928710ad2036759fe5c
Author: Sid <si...@gmail.com>
AuthorDate: Sun Oct 13 12:23:14 2019 -0700

    ATLAS-3461: Changed from hardcoded match to pattern bases regex match ATLAS-3461: UT Added
    
    Signed-off-by: Sarath Subramanian <sa...@apache.org>
---
 .../atlas/impala/hook/ImpalaOperationParser.java   |  37 +++--
 .../apache/atlas/impala/ImpalaLineageToolIT.java   | 176 +++++++++++++++++++++
 .../impalaAlterViewAsSelectWithCommentSpaces.json  |  66 ++++++++
 ...impalaCreateTableAsSelectWithCommentSpaces.json |  66 ++++++++
 .../impalaCreateViewWithCommentSpaces.json         |  66 ++++++++
 5 files changed, 398 insertions(+), 13 deletions(-)

diff --git a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaOperationParser.java b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaOperationParser.java
index b9dd894..98f3eed 100644
--- a/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaOperationParser.java
+++ b/addons/impala-bridge/src/main/java/org/apache/atlas/impala/hook/ImpalaOperationParser.java
@@ -20,32 +20,40 @@ package org.apache.atlas.impala.hook;
 
 import org.apache.atlas.impala.model.ImpalaOperationType;
 import org.apache.commons.lang.StringUtils;
+import java.util.regex.Pattern;
 
 /**
  * Parse an Impala query text and output the impala operation type
  */
 public class ImpalaOperationParser {
 
+    private static final Pattern COMMENT_PATTERN = Pattern.compile("/\\*.*?\\*/", Pattern.DOTALL);
+
+    private static final Pattern CREATE_VIEW_PATTERN =
+            Pattern.compile("^[ ]*\\bcreate\\b.*\\bview\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
+
+    private static final Pattern CREATE_TABLE_AS_SELECT_PATTERN =
+            Pattern.compile("^[ ]*\\bcreate\\b.*\\btable\\b.*\\bas\\b.*\\bselect\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
+
+    private static final Pattern ALTER_VIEW_AS_SELECT_PATTERN =
+            Pattern.compile("^[ ]*\\balter\\b.*\\bview\\b.*\\bas.*\\bselect\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
+
+    private static final Pattern INSERT_SELECT_FROM_PATTERN =
+            Pattern.compile("^[ ]*\\binsert\\b.*\\b(into|overwrite)\\b.*\\bselect\\b.*\\bfrom\\b.*", Pattern.DOTALL | Pattern.CASE_INSENSITIVE);
+
     public ImpalaOperationParser() {
     }
 
     public static ImpalaOperationType getImpalaOperationType(String queryText) {
-        // Impala does no generate lineage record for command "LOAD DATA INPATH"
-        if (StringUtils.startsWithIgnoreCase(queryText, "create view")) {
+        // Impala does no generate lineage record for command "LOAD DATA IN PATH"
+        String queryTextWithNoComments = COMMENT_PATTERN.matcher(queryText).replaceAll("");
+        if (doesMatch(queryTextWithNoComments, CREATE_VIEW_PATTERN)) {
             return ImpalaOperationType.CREATEVIEW;
-        } else if (StringUtils.startsWithIgnoreCase(queryText, "create table") &&
-        StringUtils.containsIgnoreCase(queryText, "as select")) {
+        } else if (doesMatch(queryTextWithNoComments, CREATE_TABLE_AS_SELECT_PATTERN)) {
             return ImpalaOperationType.CREATETABLE_AS_SELECT;
-        } else if (StringUtils.startsWithIgnoreCase(queryText, "alter view") &&
-            StringUtils.containsIgnoreCase(queryText, "as select")) {
+        } else if (doesMatch(queryTextWithNoComments, ALTER_VIEW_AS_SELECT_PATTERN)) {
             return ImpalaOperationType.ALTERVIEW_AS;
-        } else if (StringUtils.containsIgnoreCase(queryText, "insert into") &&
-            StringUtils.containsIgnoreCase(queryText, "select") &&
-            StringUtils.containsIgnoreCase(queryText, "from")) {
-            return ImpalaOperationType.QUERY;
-        } else if (StringUtils.containsIgnoreCase(queryText,"insert overwrite") &&
-            StringUtils.containsIgnoreCase(queryText, "select") &&
-            StringUtils.containsIgnoreCase(queryText, "from")) {
+        } else if (doesMatch(queryTextWithNoComments, INSERT_SELECT_FROM_PATTERN)) {
             return ImpalaOperationType.QUERY;
         }
 
@@ -64,5 +72,8 @@ public class ImpalaOperationParser {
         return ImpalaOperationType.UNKNOWN;
     }
 
+    private static boolean doesMatch(final String queryText, final Pattern pattern) {
+        return pattern.matcher(queryText).matches();
+    }
 
 }
\ No newline at end of file
diff --git a/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageToolIT.java b/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageToolIT.java
index dc4d133..53e9b12 100644
--- a/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageToolIT.java
+++ b/addons/impala-bridge/src/test/java/org/apache/atlas/impala/ImpalaLineageToolIT.java
@@ -102,6 +102,68 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
     }
 
     /**
+     * This tests is for create view query with extra comment and spaces added in between:
+     * 1) ImpalaLineageTool can parse one lineage file that contains " create   view" command lineage
+     * 2) Lineage is sent to Atlas
+     * 3) Atlas can get this lineage from Atlas
+     */
+    @Test
+    public void testCreateViewWithCommentSpacesFromFile() {
+        // this file contains a single lineage record for "create view".
+        // It has table vertex with createTime
+        String IMPALA = dir + "impalaCreateViewWithCommentSpaces.json";
+        String IMPALA_WAL = dir + "WALimpala.wal";
+
+        List<ImpalaQuery> lineageList = new ArrayList<>();
+        ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook();
+
+        try {
+            // create database and tables to simulate Impala behavior that Impala updates metadata
+            // to HMS and HMSHook sends the metadata to Atlas, which has to happen before
+            // Atlas can handle lineage notification
+            String dbName = "db_8";
+            createDatabase(dbName);
+
+            String sourceTableName = "table_1";
+            createTable(dbName, sourceTableName,"(id string, count int)", false);
+
+            String targetTableName = "view_1";
+            createTable(dbName, targetTableName,"(count int, id string)", false);
+
+            // process lineage record, and send corresponding notification to Atlas
+            String[] args = new String[]{"-d", "./", "-p", "impala"};
+            ImpalaLineageTool toolInstance = new ImpalaLineageTool(args);
+            toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL);
+
+            // verify the process is saved in Atlas
+            // the value is from info in IMPALA_3
+            String createTime = new Long((long)(1554750072)*1000).toString();
+            String processQFName =
+                    "db_8.view_1" + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE +
+                            CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime;
+
+            processQFName = processQFName.toLowerCase();
+
+            String      queryString             = " create   /* comment1 */ view db_8.view_1 as   select /* comment2 */ count, id from db_8.table_1";
+            AtlasEntity processEntity1          = validateProcess(processQFName, queryString);
+            AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString);
+            AtlasObjectId process1              = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
+                    BaseImpalaEvent.ATTRIBUTE_PROCESS));
+            Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
+            Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
+
+            String      guid       = assertTableIsRegistered(dbName, targetTableName);
+            AtlasEntity entity     = atlasClientV2.getEntityByGuid(guid).getEntity();
+            List        ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+            assertNotNull(ddlQueries);
+            assertEquals(ddlQueries.size(), 1);
+        } catch (Exception e) {
+            System.out.print("Appending file error");
+        }
+    }
+
+    /**
      * This tests
      * 1) ImpalaLineageTool can parse one lineage file that contains "create view" command lineage,
      *    but there is no table vertex with createTime.
@@ -232,6 +294,63 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
     }
 
     /**
+     * This tests is based on extra comment and spaces adding to create table as select query
+     * 1) ImpalaLineageTool can parse one lineage file that contains "create   table   as   select" command lineage,
+     *    there is table vertex with createTime.
+     * 2) Lineage is sent to Atlas
+     * 3) Atlas can get this lineage from Atlas
+     */
+    @Test
+    public void testCreateTableAsSelectWithCommentSpacesFromFile() throws Exception {
+        String IMPALA = dir + "impalaCreateTableAsSelectWithCommentSpaces.json";
+        String IMPALA_WAL = dir + "WALimpala.wal";
+
+        ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook();
+
+        // create database and tables to simulate Impala behavior that Impala updates metadata
+        // to HMS and HMSHook sends the metadata to Atlas, which has to happen before
+        // Atlas can handle lineage notification
+        String dbName = "db_9";
+        createDatabase(dbName);
+
+        String sourceTableName = "table_1";
+        createTable(dbName, sourceTableName,"(id string, count int)", false);
+
+        String targetTableName = "table_2";
+        createTable(dbName, targetTableName,"(count int, id string)", false);
+
+        // process lineage record, and send corresponding notification to Atlas
+        String[] args = new String[]{"-d", "./", "-p", "impala"};
+        ImpalaLineageTool toolInstance = new ImpalaLineageTool(args);
+        toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL);
+
+        // verify the process is saved in Atlas
+        // the value is from info in IMPALA_4.
+        String createTime = new Long(TABLE_CREATE_TIME*1000).toString();
+        String processQFName =
+                dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE +
+                        CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime;
+
+        processQFName = processQFName.toLowerCase();
+
+        String queryString = "create   /* Test */   table " + dbName + "."
+                + targetTableName + "   as /* Test */ select count, id from " + dbName + "." + sourceTableName;
+        AtlasEntity processEntity1 = validateProcess(processQFName, queryString);
+        AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString);
+        AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
+                BaseImpalaEvent.ATTRIBUTE_PROCESS));
+        Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
+        Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
+
+        String      guid       = assertTableIsRegistered(dbName, targetTableName);
+        AtlasEntity entity     = atlasClientV2.getEntityByGuid(guid).getEntity();
+        List        ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+        assertNotNull(ddlQueries);
+        assertEquals(ddlQueries.size(), 1);
+    }
+
+    /**
      * This tests
      * 1) ImpalaLineageTool can parse one lineage file that contains "alter view as select" command lineage,
      *    there is table vertex with createTime.
@@ -288,6 +407,63 @@ public class ImpalaLineageToolIT extends ImpalaLineageITBase {
     }
 
     /**
+     * This tests is for extra comment and spaces present in alter view as select query
+     * 1) ImpalaLineageTool can parse one lineage file that contains "alter view as select" command lineage,
+     *    there is table vertex with createTime.
+     * 2) Lineage is sent to Atlas
+     * 3) Atlas can get this lineage from Atlas
+     */
+    @Test
+    public void testAlterViewAsSelectWithCommentSpacesFromFile() throws Exception {
+        String IMPALA = dir + "impalaAlterViewAsSelectWithCommentSpaces.json";
+        String IMPALA_WAL = dir + "WALimpala.wal";
+
+        ImpalaLineageHook impalaLineageHook = new ImpalaLineageHook();
+
+        // create database and tables to simulate Impala behavior that Impala updates metadata
+        // to HMS and HMSHook sends the metadata to Atlas, which has to happen before
+        // Atlas can handle lineage notification
+        String dbName = "db_10";
+        createDatabase(dbName);
+
+        String sourceTableName = "table_1";
+        createTable(dbName, sourceTableName,"(id string, count int)", false);
+
+        String targetTableName = "view_1";
+        createTable(dbName, targetTableName,"(count int, id string)", false);
+
+        // process lineage record, and send corresponding notification to Atlas
+        String[] args = new String[]{"-d", "./", "-p", "impala"};
+        ImpalaLineageTool toolInstance = new ImpalaLineageTool(args);
+        toolInstance.importHImpalaEntities(impalaLineageHook, IMPALA, IMPALA_WAL);
+
+        // verify the process is saved in Atlas
+        // the value is from info in IMPALA_4.
+        String createTime = new Long(TABLE_CREATE_TIME*1000).toString();
+        String processQFName =
+                dbName + "." + targetTableName + AtlasImpalaHookContext.QNAME_SEP_METADATA_NAMESPACE +
+                        CLUSTER_NAME + AtlasImpalaHookContext.QNAME_SEP_PROCESS + createTime;
+
+        processQFName = processQFName.toLowerCase();
+
+        String queryString = "alter   /* comment1 */ view " + dbName + "." + targetTableName
+                + " as   select /* comment1 */ count, id from " + dbName + "." + sourceTableName;
+        AtlasEntity processEntity1 = validateProcess(processQFName, queryString);
+        AtlasEntity processExecutionEntity1 = validateProcessExecution(processEntity1, queryString);
+        AtlasObjectId process1 = toAtlasObjectId(processExecutionEntity1.getRelationshipAttribute(
+                BaseImpalaEvent.ATTRIBUTE_PROCESS));
+        Assert.assertEquals(process1.getGuid(), processEntity1.getGuid());
+        Assert.assertEquals(numberOfProcessExecutions(processEntity1), 1);
+
+        String      guid       = assertTableIsRegistered(dbName, targetTableName);
+        AtlasEntity entity     = atlasClientV2.getEntityByGuid(guid).getEntity();
+        List        ddlQueries = (List) entity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+        assertNotNull(ddlQueries);
+        assertEquals(ddlQueries.size(), 1);
+    }
+
+    /**
      * This tests
      * 1) ImpalaLineageTool can parse one lineage file that contains "insert into" command lineage,
      *    there is table vertex with createTime.
diff --git a/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelectWithCommentSpaces.json b/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelectWithCommentSpaces.json
new file mode 100644
index 0000000..322abb5
--- /dev/null
+++ b/addons/impala-bridge/src/test/resources/impalaAlterViewAsSelectWithCommentSpaces.json
@@ -0,0 +1,66 @@
+{
+  "queryText":"alter   /* comment1 */ view db_10.view_1 as   select /* comment1 */ count, id from db_10.table_1",
+  "queryId":"3a441d0c130962f8:7f634aec00000000",
+  "hash":"64ff0425ccdfaada53e3f2fd76f566f7",
+  "user":"admin",
+  "timestamp":1554750072,
+  "endTime":1554750554,
+  "edges":[
+    {
+      "sources":[
+        1
+      ],
+      "targets":[
+        0
+      ],
+      "edgeType":"PROJECTION"
+    },
+    {
+      "sources":[
+        3
+      ],
+      "targets":[
+        2
+      ],
+      "edgeType":"PROJECTION"
+    }
+  ],
+  "vertices":[
+    {
+      "id":0,
+      "vertexType":"COLUMN",
+      "vertexId":"db_10.view_1.count",
+      "metadata": {
+        "tableName": "db_10.view_1",
+        "tableCreateTime": 1554750072
+      }
+    },
+    {
+      "id":1,
+      "vertexType":"COLUMN",
+      "vertexId":"db_10.table_1.count",
+      "metadata": {
+        "tableName": "db_10.table_1",
+        "tableCreateTime": 1554750070
+      }
+    },
+    {
+      "id":2,
+      "vertexType":"COLUMN",
+      "vertexId":"db_10.view_1.id",
+      "metadata": {
+        "tableName": "db_10.view_1",
+        "tableCreateTime": 1554750072
+      }
+    },
+    {
+      "id":3,
+      "vertexType":"COLUMN",
+      "vertexId":"db_10.table_1.id",
+      "metadata": {
+        "tableName": "db_10.table_1",
+        "tableCreateTime": 1554750070
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelectWithCommentSpaces.json b/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelectWithCommentSpaces.json
new file mode 100644
index 0000000..f588190
--- /dev/null
+++ b/addons/impala-bridge/src/test/resources/impalaCreateTableAsSelectWithCommentSpaces.json
@@ -0,0 +1,66 @@
+{
+  "queryText":"create   /* Test */   table db_9.table_2   as /* Test */ select count, id from db_9.table_1",
+  "queryId":"3a441d0c130962f8:7f634aec00000000",
+  "hash":"64ff0425ccdfaada53e3f2fd76f566f7",
+  "user":"admin",
+  "timestamp":1554750072,
+  "endTime":1554750554,
+  "edges":[
+    {
+      "sources":[
+        1
+      ],
+      "targets":[
+        0
+      ],
+      "edgeType":"PROJECTION"
+    },
+    {
+      "sources":[
+        3
+      ],
+      "targets":[
+        2
+      ],
+      "edgeType":"PROJECTION"
+    }
+  ],
+  "vertices":[
+    {
+      "id":0,
+      "vertexType":"COLUMN",
+      "vertexId":"db_9.table_2.count",
+      "metadata": {
+        "tableName": "db_9.table_2",
+        "tableCreateTime": 1554750072
+      }
+    },
+    {
+      "id":1,
+      "vertexType":"COLUMN",
+      "vertexId":"db_9.table_1.count",
+      "metadata": {
+        "tableName": "db_9.table_1",
+        "tableCreateTime": 1554750070
+      }
+    },
+    {
+      "id":2,
+      "vertexType":"COLUMN",
+      "vertexId":"db_9.table_2.id",
+      "metadata": {
+        "tableName": "db_9.table_2",
+        "tableCreateTime": 1554750072
+      }
+    },
+    {
+      "id":3,
+      "vertexType":"COLUMN",
+      "vertexId":"db_9.table_1.id",
+      "metadata": {
+        "tableName": "db_9.table_1",
+        "tableCreateTime": 1554750070
+      }
+    }
+  ]
+}
\ No newline at end of file
diff --git a/addons/impala-bridge/src/test/resources/impalaCreateViewWithCommentSpaces.json b/addons/impala-bridge/src/test/resources/impalaCreateViewWithCommentSpaces.json
new file mode 100644
index 0000000..e49b6b7
--- /dev/null
+++ b/addons/impala-bridge/src/test/resources/impalaCreateViewWithCommentSpaces.json
@@ -0,0 +1,66 @@
+{
+  "queryText":" create   /* comment1 */ view db_8.view_1 as   select /* comment2 */ count, id from db_8.table_1",
+  "queryId":"3a441d0c130962f8:7f634aec00000000",
+  "hash":"64ff0425ccdfaada53e3f2fd76f566f7",
+  "user":"admin",
+  "timestamp":1554750072,
+  "endTime":1554750554,
+  "edges":[
+    {
+      "sources":[
+        1
+      ],
+      "targets":[
+        0
+      ],
+      "edgeType":"PROJECTION"
+    },
+    {
+      "sources":[
+        3
+      ],
+      "targets":[
+        2
+      ],
+      "edgeType":"PROJECTION"
+    }
+  ],
+  "vertices":[
+    {
+      "id":0,
+      "vertexType":"COLUMN",
+      "vertexId":"db_8.view_1.count",
+      "metadata": {
+        "tableName": "db_8.view_1",
+        "tableCreateTime": 1554750072
+      }
+    },
+    {
+      "id":1,
+      "vertexType":"COLUMN",
+      "vertexId":"db_8.table_1.count",
+      "metadata": {
+        "tableName": "db_8.table_1",
+        "tableCreateTime": 1554750070
+      }
+    },
+    {
+      "id":2,
+      "vertexType":"COLUMN",
+      "vertexId":"db_8.view_1.id",
+      "metadata": {
+        "tableName": "db_8.view_1",
+        "tableCreateTime": 1554750072
+      }
+    },
+    {
+      "id":3,
+      "vertexType":"COLUMN",
+      "vertexId":"db_8.table_1.id",
+      "metadata": {
+        "tableName": "db_8.table_1",
+        "tableCreateTime": 1554750070
+      }
+    }
+  ]
+}
\ No newline at end of file