You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by ma...@apache.org on 2019/05/09 01:10:48 UTC

[atlas] 02/02: ATLAS-3197: capture DDL statements for hive_table and hive_db operations

This is an automated email from the ASF dual-hosted git repository.

madhan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/atlas.git

commit d5437734fa15de763dc33bcd83e22f61ae172d51
Author: Le Ma <lm...@cloudera.com>
AuthorDate: Tue May 7 10:38:07 2019 -0700

    ATLAS-3197: capture DDL statements for hive_table and hive_db operations
    
    Signed-off-by: Madhan Neethiraj <ma...@apache.org>
---
 .../atlas/hive/hook/events/AlterTableRename.java   |  7 ++
 .../atlas/hive/hook/events/BaseHiveEvent.java      | 33 ++++++++
 .../atlas/hive/hook/events/CreateDatabase.java     |  7 +-
 .../apache/atlas/hive/hook/events/CreateTable.java |  6 ++
 .../org/apache/atlas/hive/hook/HiveHookIT.java     | 67 ++++++++++++++--
 addons/models/1000-Hadoop/1030-hive_model.json     | 90 ++++++++++++++++++++++
 6 files changed, 203 insertions(+), 7 deletions(-)

diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRename.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRename.java
index d3d8349..67ea527 100644
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRename.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/AlterTableRename.java
@@ -147,6 +147,13 @@ public class AlterTableRename extends BaseHiveEvent {
         // update qualifiedName and other attributes (like params - which include lastModifiedTime, lastModifiedBy) of the table
         ret.add(new EntityPartialUpdateRequestV2(getUserName(), oldTableId, renamedTableEntity));
 
+        // partial update relationship attribute ddl
+        AtlasEntity ddlEntity = createHiveDDLEntity(renamedTableEntity.getEntity(), true);
+
+        if (ddlEntity != null) {
+            ret.add(new HookNotification.EntityCreateRequestV2(getUserName(), new AtlasEntitiesWithExtInfo(ddlEntity)));
+        }
+
         context.removeFromKnownTable(oldTableQualifiedName);
     }
 
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java
index de8de63..593c413 100644
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/BaseHiveEvent.java
@@ -77,6 +77,8 @@ public abstract class BaseHiveEvent {
     public static final String HIVE_TYPE_SERDE                     = "hive_serde";
     public static final String HIVE_TYPE_ORDER                     = "hive_order";
     public static final String HIVE_TYPE_PROCESS_EXECUTION         = "hive_process_execution";
+    public static final String HIVE_DB_DDL                         = "hive_db_ddl";
+    public static final String HIVE_TABLE_DDL                      = "hive_table_ddl";
     public static final String HDFS_TYPE_PATH                      = "hdfs_path";
     public static final String HBASE_TYPE_TABLE                    = "hbase_table";
     public static final String HBASE_TYPE_NAMESPACE                = "hbase_namespace";
@@ -144,6 +146,8 @@ public abstract class BaseHiveEvent {
     public static final String ATTRIBUTE_OBJECT_PREFIX             = "objectPrefix";
     public static final String ATTRIBUTE_BUCKET                    = "bucket";
     public static final String ATTRIBUTE_HOSTNAME                  = "hostName";
+    public static final String ATTRIBUTE_EXEC_TIME                 = "execTime";
+    public static final String ATTRIBUTE_DDL_QUERIES               = "ddlQueries";
 
     public static final String HBASE_STORAGE_HANDLER_CLASS         = "org.apache.hadoop.hive.hbase.HBaseStorageHandler";
     public static final String HBASE_DEFAULT_NAMESPACE             = "default";
@@ -663,6 +667,35 @@ public abstract class BaseHiveEvent {
         return ret;
     }
 
+    protected AtlasEntity createHiveDDLEntity(AtlasEntity dbOrTable) {
+        return createHiveDDLEntity(dbOrTable, false);
+    }
+
+    protected AtlasEntity createHiveDDLEntity(AtlasEntity dbOrTable, boolean excludeEntityGuid) {
+        AtlasObjectId objId   = BaseHiveEvent.getObjectId(dbOrTable);
+        AtlasEntity   hiveDDL = null;
+
+        if (excludeEntityGuid) {
+            objId.setGuid(null);
+        }
+
+        if (StringUtils.equals(objId.getTypeName(), HIVE_TYPE_DB)) {
+            hiveDDL = new AtlasEntity(HIVE_DB_DDL, ATTRIBUTE_DB, objId);
+        } else if (StringUtils.equals(objId.getTypeName(), HIVE_TYPE_TABLE)) {
+            hiveDDL = new AtlasEntity(HIVE_TABLE_DDL, ATTRIBUTE_TABLE, objId);
+        }
+
+        if (hiveDDL != null) {
+            hiveDDL.setAttribute(ATTRIBUTE_EXEC_TIME, getQueryStartTime());
+            hiveDDL.setAttribute(ATTRIBUTE_QUERY_TEXT, getQueryString());
+            hiveDDL.setAttribute(ATTRIBUTE_USER_NAME, getUserName());
+            hiveDDL.setAttribute(ATTRIBUTE_NAME, getQueryString() + QNAME_SEP_PROCESS + getQueryStartTime().toString());
+            hiveDDL.setAttribute(ATTRIBUTE_QUALIFIED_NAME, hiveDDL.getAttribute(ATTRIBUTE_NAME));
+        }
+
+        return hiveDDL;
+    }
+
     protected String getClusterName() {
         return context.getClusterName();
     }
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateDatabase.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateDatabase.java
index b01f61f..f4f933e 100644
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateDatabase.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateDatabase.java
@@ -84,9 +84,14 @@ public class CreateDatabase extends BaseHiveEvent {
                 }
 
                 if (db != null) {
-                    AtlasEntity dbEntity = toDbEntity(db);
+                    AtlasEntity dbEntity    = toDbEntity(db);
+                    AtlasEntity dbDDLEntity = createHiveDDLEntity(dbEntity);
 
                     ret.addEntity(dbEntity);
+
+                    if (dbDDLEntity != null) {
+                        ret.addEntity(dbDDLEntity);
+                    }
                 } else {
                     LOG.error("CreateDatabase.getEntities(): failed to retrieve db");
                 }
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
index b5ce093..ef4e687 100644
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/events/CreateTable.java
@@ -149,6 +149,12 @@ public class CreateTable extends BaseHiveEvent {
                     }
                 }
             }
+
+            AtlasEntity tableDDLEntity = createHiveDDLEntity(tblEntity);
+
+            if (tableDDLEntity != null) {
+                ret.addEntity(tableDDLEntity);
+            }
         }
     }
 
diff --git a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
index 24de30f..28c600e 100755
--- a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
+++ b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
@@ -92,9 +92,13 @@ public class HiveHookIT extends HiveITBase {
 
         runCommand("create database " + dbName + " WITH DBPROPERTIES ('p1'='v1', 'p2'='v2')");
 
-        String      dbId     = assertDatabaseIsRegistered(dbName);
-        AtlasEntity dbEntity = atlasClientV2.getEntityByGuid(dbId).getEntity();
-        Map         params   = (Map) dbEntity.getAttribute(ATTRIBUTE_PARAMETERS);
+        String      dbId       = assertDatabaseIsRegistered(dbName);
+        AtlasEntity dbEntity   = atlasClientV2.getEntityByGuid(dbId).getEntity();
+        Map         params     = (Map) dbEntity.getAttribute(ATTRIBUTE_PARAMETERS);
+        List        ddlQueries = (List) dbEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+        Assert.assertNotNull(ddlQueries);
+        Assert.assertEquals(ddlQueries.size(),1);
 
         Assert.assertNotNull(params);
         Assert.assertEquals(params.size(), 2);
@@ -124,10 +128,14 @@ public class HiveHookIT extends HiveITBase {
         String      tableId   = assertTableIsRegistered(dbName, tableName);
         String      colId     = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, dbName, tableName), colName)); //there is only one instance of column registered
         AtlasEntity colEntity = atlasClientV2.getEntityByGuid(colId).getEntity();
+        AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(colId).getEntity();
 
         Assert.assertEquals(colEntity.getAttribute(ATTRIBUTE_QUALIFIED_NAME), String.format("%s.%s.%s@%s", dbName.toLowerCase(), tableName.toLowerCase(), colName.toLowerCase(), CLUSTER_NAME));
         Assert.assertNotNull(colEntity.getAttribute(ATTRIBUTE_TABLE));
 
+        Assert.assertNotNull(tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES));
+        Assert.assertEquals(((List)tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES)).size(), 1);
+
         AtlasObjectId tblObjId = toAtlasObjectId(colEntity.getAttribute(ATTRIBUTE_TABLE));
 
         Assert.assertEquals(tblObjId.getGuid(), tableId);
@@ -1204,7 +1212,7 @@ public class HiveHookIT extends HiveITBase {
         assertTrait(partColumnGuid, partColTraitDetails);
         assertTableIsNotRegistered(DEFAULT_DB, tableName);
 
-        assertTableIsRegistered(newDBName, newTableName, new AssertPredicate() {
+        String renamedTableId = assertTableIsRegistered(newDBName, newTableName, new AssertPredicate() {
             @Override
             public void assertOnEntity(final AtlasEntity entity) throws Exception {
                 AtlasObjectId sd = toAtlasObjectId(entity.getAttribute(ATTRIBUTE_STORAGEDESC));
@@ -1212,6 +1220,13 @@ public class HiveHookIT extends HiveITBase {
                 assertNotNull(sd);
             }
         });
+
+        AtlasEntity renamedTableEntity = atlasClientV2.getEntityByGuid(renamedTableId).getEntity();
+        List        ddlQueries         = (List) renamedTableEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+        Assert.assertNotNull(ddlQueries);
+        Assert.assertEquals(ddlQueries.size(), 2);
+
     }
 
     private List<AtlasEntity> getColumns(String dbName, String tableName) throws Exception {
@@ -1266,6 +1281,14 @@ public class HiveHookIT extends HiveITBase {
         List<AtlasEntity> columns = getColumns(DEFAULT_DB, tableName);
 
         Assert.assertEquals(columns.size(), 3);
+
+        String      tblId      = assertTableIsRegistered(DEFAULT_DB, tableName);
+        AtlasEntity tblEntity  = atlasClientV2.getEntityByGuid(tblId).getEntity();
+        List        ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+        Assert.assertNotNull(ddlQueries);
+        Assert.assertEquals(ddlQueries.size(), 2);
+
     }
 
     //ATLAS-1321: Disable problematic tests. Need to revisit and fix them later
@@ -1284,6 +1307,13 @@ public class HiveHookIT extends HiveITBase {
 
         assertEquals(columns.size(), 1);
         assertEquals(columns.get(0).getAttribute(NAME), "name");
+
+        String tblId = assertTableIsRegistered(DEFAULT_DB, tableName);
+        AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity();
+        List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+        Assert.assertNotNull(ddlQueries);
+        Assert.assertEquals(ddlQueries.size(), 2);
     }
 
     @Test
@@ -1304,6 +1334,13 @@ public class HiveHookIT extends HiveITBase {
 
         Assert.assertEquals(columns.size(), 2);
 
+        String tblId = assertTableIsRegistered(DEFAULT_DB, tableName);
+        AtlasEntity tblEntity = atlasClientV2.getEntityByGuid(tblId).getEntity();
+        List ddlQueries = (List) tblEntity.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+        Assert.assertNotNull(ddlQueries);
+        Assert.assertEquals(ddlQueries.size(), 2);
+
         //Change column type
         oldColName = "name1";
         newColName = "name2";
@@ -1329,6 +1366,12 @@ public class HiveHookIT extends HiveITBase {
 
         assertColumnIsNotRegistered(HiveMetaStoreBridge.getColumnQualifiedName(HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName), oldColName));
 
+        AtlasEntity tblEntity2  = atlasClientV2.getEntityByGuid(tblId).getEntity();
+        List        ddlQueries2 = (List) tblEntity2.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+        Assert.assertNotNull(ddlQueries2);
+        Assert.assertEquals(ddlQueries2.size(), 3);
+
         //Change name and add comment
         oldColName = "name2";
         newColName = "name3";
@@ -1373,7 +1416,7 @@ public class HiveHookIT extends HiveITBase {
 
         String finalNewColName = newColName;
 
-        assertTableIsRegistered(DEFAULT_DB, tableName, new AssertPredicate() {
+        String tblId3 = assertTableIsRegistered(DEFAULT_DB, tableName, new AssertPredicate() {
                     @Override
                     public void assertOnEntity(AtlasEntity entity) throws Exception {
                         List<AtlasObjectId> columns = toAtlasObjectIdList(entity.getAttribute(ATTRIBUTE_COLUMNS));
@@ -1383,6 +1426,12 @@ public class HiveHookIT extends HiveITBase {
                 }
         );
 
+        AtlasEntity tblEntity3  = atlasClientV2.getEntityByGuid(tblId3).getEntity();
+        List        ddlQueries3 = (List) tblEntity3.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+        Assert.assertNotNull(ddlQueries3);
+        Assert.assertEquals(ddlQueries3.size(), 4);
+
         //Change col position again
         oldColName = "name4";
         newColName = "name5";
@@ -1403,7 +1452,7 @@ public class HiveHookIT extends HiveITBase {
         //Check col position
         String finalNewColName2 = newColName;
 
-        assertTableIsRegistered(DEFAULT_DB, tableName, new AssertPredicate() {
+        String tblId4 = assertTableIsRegistered(DEFAULT_DB, tableName, new AssertPredicate() {
                     @Override
                     public void assertOnEntity(AtlasEntity entity) throws Exception {
                         List<AtlasObjectId> columns = toAtlasObjectIdList(entity.getAttribute(ATTRIBUTE_COLUMNS));
@@ -1412,6 +1461,12 @@ public class HiveHookIT extends HiveITBase {
                     }
                 }
         );
+
+        AtlasEntity tblEntity4  = atlasClientV2.getEntityByGuid(tblId4).getEntity();
+        List        ddlQueries4 = (List) tblEntity4.getRelationshipAttribute(ATTRIBUTE_DDL_QUERIES);
+
+        Assert.assertNotNull(ddlQueries4);
+        Assert.assertEquals(ddlQueries4.size(), 5);
     }
 
     /**
diff --git a/addons/models/1000-Hadoop/1030-hive_model.json b/addons/models/1000-Hadoop/1030-hive_model.json
index ee6945a..417c4c5 100644
--- a/addons/models/1000-Hadoop/1030-hive_model.json
+++ b/addons/models/1000-Hadoop/1030-hive_model.json
@@ -531,6 +531,58 @@
                     "isUnique": false
                 }
             ]
+        },
+        {
+            "name": "hive_ddl",
+            "superTypes": [
+                "Referenceable"
+            ],
+            "serviceType": "hive",
+            "typeVersion": "1.0",
+            "attributeDefs": [
+                {
+                    "name": "queryText",
+                    "typeName": "string",
+                    "cardinality": "SINGLE",
+                    "isIndexable": true,
+                    "isOptional": false,
+                    "isUnique": false
+                },
+                {
+                    "name": "execTime",
+                    "typeName": "date",
+                    "cardinality": "SINGLE",
+                    "isIndexable": false,
+                    "isOptional": false,
+                    "isUnique": false
+                },
+                {
+                    "name": "userName",
+                    "typeName": "string",
+                    "cardinality": "SINGLE",
+                    "isIndexable": true,
+                    "isOptional": false,
+                    "isUnique": false
+                }
+            ]
+        },
+        {
+            "name": "hive_db_ddl",
+            "superTypes": [
+                "hive_ddl"
+            ],
+            "serviceType": "hive",
+            "typeVersion": "1.0",
+            "attributeDefs": []
+        },
+        {
+            "name": "hive_table_ddl",
+            "superTypes": [
+                "hive_ddl"
+            ],
+            "serviceType": "hive",
+            "typeVersion": "1.0",
+            "attributeDefs": []
         }
     ],
     "relationshipDefs": [
@@ -659,6 +711,44 @@
                 "cardinality": "SINGLE"
             },
             "propagateTags": "NONE"
+        },
+        {
+            "name": "hive_table_ddl_queries",
+            "serviceType": "hive",
+            "typeVersion": "1.0",
+            "relationshipCategory": "COMPOSITION",
+            "endDef1": {
+                "type": "hive_table",
+                "name": "ddlQueries",
+                "isContainer": true,
+                "cardinality": "SET"
+            },
+            "endDef2": {
+                "type": "hive_table_ddl",
+                "name": "table",
+                "isContainer": false,
+                "cardinality": "SINGLE"
+            },
+            "propagateTags": "NONE"
+        },
+        {
+            "name": "hive_db_ddl_queries",
+            "serviceType": "hive",
+            "typeVersion": "1.0",
+            "relationshipCategory": "COMPOSITION",
+            "endDef1": {
+                "type": "hive_db",
+                "name": "ddlQueries",
+                "isContainer": true,
+                "cardinality": "SET"
+            },
+            "endDef2": {
+                "type": "hive_db_ddl",
+                "name": "db",
+                "isContainer": false,
+                "cardinality": "SINGLE"
+            },
+            "propagateTags": "NONE"
         }
     ]
 }