You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by sa...@apache.org on 2020/07/13 19:30:22 UTC

[atlas] branch branch-2.0 updated: ATLAS-3885: import-hive.sh: Hive entities with Ozone location created by import-hive.sh creates hdfs_path entity instead of ozone in Atlas

This is an automated email from the ASF dual-hosted git repository.

sarath pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/atlas.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 87602b6  ATLAS-3885: import-hive.sh: Hive entities with Ozone location created by import-hive.sh creates hdfs_path entity instead of ozone in Atlas
87602b6 is described below

commit 87602b693951f76d2af836cd74db4b1b2a3f2be6
Author: Nikhil Bonte <ni...@freestoneinfotech.com>
AuthorDate: Wed Jul 8 20:31:27 2020 +0530

    ATLAS-3885: import-hive.sh: Hive entities with Ozone location created by import-hive.sh creates hdfs_path entity instead of ozone in Atlas
    
    Signed-off-by: Sarath Subramanian <sa...@apache.org>
    (cherry picked from commit cec7aa851e6befa6d32f3dfd80cc79cc8c3c799a)
---
 .../atlas/hive/bridge/HiveMetaStoreBridge.java     | 62 +++++++++-------------
 1 file changed, 26 insertions(+), 36 deletions(-)

diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
index eb691dc..534cdaf 100755
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
@@ -20,6 +20,7 @@ package org.apache.atlas.hive.bridge;
 
 import com.google.common.annotations.VisibleForTesting;
 import com.sun.jersey.api.client.ClientResponse;
+import org.apache.atlas.type.AtlasType;
 import org.apache.atlas.type.AtlasTypeUtil;
 import org.apache.atlas.ApplicationProperties;
 import org.apache.atlas.AtlasClientV2;
@@ -30,6 +31,7 @@ import org.apache.atlas.hook.AtlasHookException;
 import org.apache.atlas.model.instance.AtlasEntityHeader;
 import org.apache.atlas.model.instance.EntityMutationResponse;
 import org.apache.atlas.model.instance.EntityMutations;
+import org.apache.atlas.utils.AtlasPathExtractorUtil;
 import org.apache.atlas.utils.AuthenticationUtil;
 import org.apache.atlas.utils.HdfsNameServiceResolver;
 import org.apache.atlas.model.instance.AtlasEntity;
@@ -37,6 +39,7 @@ import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
 import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
 import org.apache.atlas.model.instance.AtlasObjectId;
 import org.apache.atlas.model.instance.AtlasStruct;
+import org.apache.atlas.utils.PathExtractorContext;
 import org.apache.commons.cli.ParseException;
 import org.apache.commons.collections.CollectionUtils;
 
@@ -87,6 +90,7 @@ public class HiveMetaStoreBridge {
     public static final String CLUSTER_NAME_KEY                = "atlas.cluster.name";
     public static final String HIVE_METADATA_NAMESPACE         = "atlas.metadata.namespace";
     public static final String HDFS_PATH_CONVERT_TO_LOWER_CASE = CONF_PREFIX + "hdfs_path.convert_to_lowercase";
+    public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION = CONF_PREFIX + "aws_s3.atlas.model.version";
     public static final String DEFAULT_CLUSTER_NAME            = "primary";
     public static final String TEMP_TABLE_PREFIX               = "_temp-";
     public static final String ATLAS_ENDPOINT                  = "atlas.rest.address";
@@ -94,6 +98,8 @@ public class HiveMetaStoreBridge {
     public static final String HDFS_PATH                       = "hdfs_path";
     public static final String DEFAULT_METASTORE_CATALOG       = "hive";
 
+    public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2  = "v2";
+
     private static final int    EXIT_CODE_SUCCESS = 0;
     private static final int    EXIT_CODE_FAILED  = 1;
     private static final String DEFAULT_ATLAS_URL = "http://localhost:21000/";
@@ -103,6 +109,8 @@ public class HiveMetaStoreBridge {
     private final AtlasClientV2 atlasClientV2;
     private final boolean       convertHdfsPathToLowerCase;
 
+    private String awsS3AtlasModelVersion = null;
+
 
     public static void main(String[] args) {
         int exitCode = EXIT_CODE_FAILED;
@@ -216,6 +224,7 @@ public class HiveMetaStoreBridge {
         this.hiveClient                 = Hive.get(hiveConf);
         this.atlasClientV2              = atlasClientV2;
         this.convertHdfsPathToLowerCase = atlasProperties.getBoolean(HDFS_PATH_CONVERT_TO_LOWER_CASE, false);
+        this.awsS3AtlasModelVersion     = atlasProperties.getString(HOOK_AWS_S3_ATLAS_MODEL_VERSION, HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2);
     }
 
     /**
@@ -355,12 +364,17 @@ public class HiveMetaStoreBridge {
                 AtlasEntityWithExtInfo processEntity        = findProcessEntity(processQualifiedName);
 
                 if (processEntity == null) {
-                    String      tableLocation = isConvertHdfsPathToLowerCase() ? lower(table.getDataLocation().toString()) : table.getDataLocation().toString();
-                    String      query         = getCreateTableString(table, tableLocation);
-                    AtlasEntity pathInst      = toHdfsPathEntity(tableLocation);
-                    AtlasEntity tableInst     = tableEntity.getEntity();
-                    AtlasEntity processInst   = new AtlasEntity(HiveDataTypes.HIVE_PROCESS.getName());
-                    long        now           = System.currentTimeMillis();
+                    String tableLocationString = isConvertHdfsPathToLowerCase() ? lower(table.getDataLocation().toString()) : table.getDataLocation().toString();
+                    Path   location            = table.getDataLocation();
+                    String query               = getCreateTableString(table, tableLocationString);
+
+                    PathExtractorContext   pathExtractorCtx  = new PathExtractorContext(getMetadataNamespace(), isConvertHdfsPathToLowerCase(), awsS3AtlasModelVersion);
+                    AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(location, pathExtractorCtx);
+                    AtlasEntity            pathInst          = entityWithExtInfo.getEntity();
+                    AtlasEntity            tableInst         = tableEntity.getEntity();
+                    AtlasEntity            processInst       = new AtlasEntity(HiveDataTypes.HIVE_PROCESS.getName());
+
+                    long now = System.currentTimeMillis();
 
                     processInst.setAttribute(ATTRIBUTE_QUALIFIED_NAME, processQualifiedName);
                     processInst.setAttribute(ATTRIBUTE_NAME, query);
@@ -379,7 +393,12 @@ public class HiveMetaStoreBridge {
                     AtlasEntitiesWithExtInfo createTableProcess = new AtlasEntitiesWithExtInfo();
 
                     createTableProcess.addEntity(processInst);
-                    createTableProcess.addEntity(pathInst);
+
+                    if (pathExtractorCtx.getKnownEntities() != null) {
+                        pathExtractorCtx.getKnownEntities().values().forEach(entity -> createTableProcess.addEntity(entity));
+                    } else {
+                        createTableProcess.addEntity(pathInst);
+                    }
 
                     registerInstances(createTableProcess);
                 } else {
@@ -725,35 +744,6 @@ public class HiveMetaStoreBridge {
         return ret;
     }
 
-    private AtlasEntity toHdfsPathEntity(String pathUri) {
-        AtlasEntity ret           = new AtlasEntity(HDFS_PATH);
-        String      nameServiceID = HdfsNameServiceResolver.getNameServiceIDForPath(pathUri);
-        Path        path          = new Path(pathUri);
-
-        ret.setAttribute(ATTRIBUTE_NAME, Path.getPathWithoutSchemeAndAuthority(path).toString());
-        ret.setAttribute(ATTRIBUTE_CLUSTER_NAME, metadataNamespace);
-
-        if (StringUtils.isNotEmpty(nameServiceID)) {
-            // Name service resolution is successful, now get updated HDFS path where the host port info is replaced by resolved name service
-            String updatedHdfsPath = HdfsNameServiceResolver.getPathWithNameServiceID(pathUri);
-
-            ret.setAttribute(ATTRIBUTE_PATH, updatedHdfsPath);
-            ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getHdfsPathQualifiedName(updatedHdfsPath));
-            ret.setAttribute(ATTRIBUTE_NAMESERVICE_ID, nameServiceID);
-        } else {
-            ret.setAttribute(ATTRIBUTE_PATH, pathUri);
-
-            // Only append metadataNamespace for the HDFS path
-            if (pathUri.startsWith(HdfsNameServiceResolver.HDFS_SCHEME)) {
-                ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getHdfsPathQualifiedName(pathUri));
-            } else {
-                ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, pathUri);
-            }
-        }
-
-        return ret;
-    }
-
     /**
      * Gets the atlas entity for the database
      * @param databaseName  database Name