You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by sa...@apache.org on 2020/07/13 19:30:22 UTC
[atlas] branch branch-2.0 updated: ATLAS-3885: import-hive.sh: Hive
entities with Ozone location created by import-hive.sh creates hdfs_path
entity instead of ozone in Atlas
This is an automated email from the ASF dual-hosted git repository.
sarath pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/atlas.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 87602b6 ATLAS-3885: import-hive.sh: Hive entities with Ozone location created by import-hive.sh creates hdfs_path entity instead of ozone in Atlas
87602b6 is described below
commit 87602b693951f76d2af836cd74db4b1b2a3f2be6
Author: Nikhil Bonte <ni...@freestoneinfotech.com>
AuthorDate: Wed Jul 8 20:31:27 2020 +0530
ATLAS-3885: import-hive.sh: Hive entities with Ozone location created by import-hive.sh creates hdfs_path entity instead of ozone in Atlas
Signed-off-by: Sarath Subramanian <sa...@apache.org>
(cherry picked from commit cec7aa851e6befa6d32f3dfd80cc79cc8c3c799a)
---
.../atlas/hive/bridge/HiveMetaStoreBridge.java | 62 +++++++++-------------
1 file changed, 26 insertions(+), 36 deletions(-)
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
index eb691dc..534cdaf 100755
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/bridge/HiveMetaStoreBridge.java
@@ -20,6 +20,7 @@ package org.apache.atlas.hive.bridge;
import com.google.common.annotations.VisibleForTesting;
import com.sun.jersey.api.client.ClientResponse;
+import org.apache.atlas.type.AtlasType;
import org.apache.atlas.type.AtlasTypeUtil;
import org.apache.atlas.ApplicationProperties;
import org.apache.atlas.AtlasClientV2;
@@ -30,6 +31,7 @@ import org.apache.atlas.hook.AtlasHookException;
import org.apache.atlas.model.instance.AtlasEntityHeader;
import org.apache.atlas.model.instance.EntityMutationResponse;
import org.apache.atlas.model.instance.EntityMutations;
+import org.apache.atlas.utils.AtlasPathExtractorUtil;
import org.apache.atlas.utils.AuthenticationUtil;
import org.apache.atlas.utils.HdfsNameServiceResolver;
import org.apache.atlas.model.instance.AtlasEntity;
@@ -37,6 +39,7 @@ import org.apache.atlas.model.instance.AtlasEntity.AtlasEntityWithExtInfo;
import org.apache.atlas.model.instance.AtlasEntity.AtlasEntitiesWithExtInfo;
import org.apache.atlas.model.instance.AtlasObjectId;
import org.apache.atlas.model.instance.AtlasStruct;
+import org.apache.atlas.utils.PathExtractorContext;
import org.apache.commons.cli.ParseException;
import org.apache.commons.collections.CollectionUtils;
@@ -87,6 +90,7 @@ public class HiveMetaStoreBridge {
public static final String CLUSTER_NAME_KEY = "atlas.cluster.name";
public static final String HIVE_METADATA_NAMESPACE = "atlas.metadata.namespace";
public static final String HDFS_PATH_CONVERT_TO_LOWER_CASE = CONF_PREFIX + "hdfs_path.convert_to_lowercase";
+ public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION = CONF_PREFIX + "aws_s3.atlas.model.version";
public static final String DEFAULT_CLUSTER_NAME = "primary";
public static final String TEMP_TABLE_PREFIX = "_temp-";
public static final String ATLAS_ENDPOINT = "atlas.rest.address";
@@ -94,6 +98,8 @@ public class HiveMetaStoreBridge {
public static final String HDFS_PATH = "hdfs_path";
public static final String DEFAULT_METASTORE_CATALOG = "hive";
+ public static final String HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2 = "v2";
+
private static final int EXIT_CODE_SUCCESS = 0;
private static final int EXIT_CODE_FAILED = 1;
private static final String DEFAULT_ATLAS_URL = "http://localhost:21000/";
@@ -103,6 +109,8 @@ public class HiveMetaStoreBridge {
private final AtlasClientV2 atlasClientV2;
private final boolean convertHdfsPathToLowerCase;
+ private String awsS3AtlasModelVersion = null;
+
public static void main(String[] args) {
int exitCode = EXIT_CODE_FAILED;
@@ -216,6 +224,7 @@ public class HiveMetaStoreBridge {
this.hiveClient = Hive.get(hiveConf);
this.atlasClientV2 = atlasClientV2;
this.convertHdfsPathToLowerCase = atlasProperties.getBoolean(HDFS_PATH_CONVERT_TO_LOWER_CASE, false);
+ this.awsS3AtlasModelVersion = atlasProperties.getString(HOOK_AWS_S3_ATLAS_MODEL_VERSION, HOOK_AWS_S3_ATLAS_MODEL_VERSION_V2);
}
/**
@@ -355,12 +364,17 @@ public class HiveMetaStoreBridge {
AtlasEntityWithExtInfo processEntity = findProcessEntity(processQualifiedName);
if (processEntity == null) {
- String tableLocation = isConvertHdfsPathToLowerCase() ? lower(table.getDataLocation().toString()) : table.getDataLocation().toString();
- String query = getCreateTableString(table, tableLocation);
- AtlasEntity pathInst = toHdfsPathEntity(tableLocation);
- AtlasEntity tableInst = tableEntity.getEntity();
- AtlasEntity processInst = new AtlasEntity(HiveDataTypes.HIVE_PROCESS.getName());
- long now = System.currentTimeMillis();
+ String tableLocationString = isConvertHdfsPathToLowerCase() ? lower(table.getDataLocation().toString()) : table.getDataLocation().toString();
+ Path location = table.getDataLocation();
+ String query = getCreateTableString(table, tableLocationString);
+
+ PathExtractorContext pathExtractorCtx = new PathExtractorContext(getMetadataNamespace(), isConvertHdfsPathToLowerCase(), awsS3AtlasModelVersion);
+ AtlasEntityWithExtInfo entityWithExtInfo = AtlasPathExtractorUtil.getPathEntity(location, pathExtractorCtx);
+ AtlasEntity pathInst = entityWithExtInfo.getEntity();
+ AtlasEntity tableInst = tableEntity.getEntity();
+ AtlasEntity processInst = new AtlasEntity(HiveDataTypes.HIVE_PROCESS.getName());
+
+ long now = System.currentTimeMillis();
processInst.setAttribute(ATTRIBUTE_QUALIFIED_NAME, processQualifiedName);
processInst.setAttribute(ATTRIBUTE_NAME, query);
@@ -379,7 +393,12 @@ public class HiveMetaStoreBridge {
AtlasEntitiesWithExtInfo createTableProcess = new AtlasEntitiesWithExtInfo();
createTableProcess.addEntity(processInst);
- createTableProcess.addEntity(pathInst);
+
+ if (pathExtractorCtx.getKnownEntities() != null) {
+ pathExtractorCtx.getKnownEntities().values().forEach(entity -> createTableProcess.addEntity(entity));
+ } else {
+ createTableProcess.addEntity(pathInst);
+ }
registerInstances(createTableProcess);
} else {
@@ -725,35 +744,6 @@ public class HiveMetaStoreBridge {
return ret;
}
- private AtlasEntity toHdfsPathEntity(String pathUri) {
- AtlasEntity ret = new AtlasEntity(HDFS_PATH);
- String nameServiceID = HdfsNameServiceResolver.getNameServiceIDForPath(pathUri);
- Path path = new Path(pathUri);
-
- ret.setAttribute(ATTRIBUTE_NAME, Path.getPathWithoutSchemeAndAuthority(path).toString());
- ret.setAttribute(ATTRIBUTE_CLUSTER_NAME, metadataNamespace);
-
- if (StringUtils.isNotEmpty(nameServiceID)) {
- // Name service resolution is successful, now get updated HDFS path where the host port info is replaced by resolved name service
- String updatedHdfsPath = HdfsNameServiceResolver.getPathWithNameServiceID(pathUri);
-
- ret.setAttribute(ATTRIBUTE_PATH, updatedHdfsPath);
- ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getHdfsPathQualifiedName(updatedHdfsPath));
- ret.setAttribute(ATTRIBUTE_NAMESERVICE_ID, nameServiceID);
- } else {
- ret.setAttribute(ATTRIBUTE_PATH, pathUri);
-
- // Only append metadataNamespace for the HDFS path
- if (pathUri.startsWith(HdfsNameServiceResolver.HDFS_SCHEME)) {
- ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, getHdfsPathQualifiedName(pathUri));
- } else {
- ret.setAttribute(ATTRIBUTE_QUALIFIED_NAME, pathUri);
- }
- }
-
- return ret;
- }
-
/**
* Gets the atlas entity for the database
* @param databaseName database Name