You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@atlas.apache.org by sh...@apache.org on 2016/05/27 09:34:50 UTC
incubator-atlas git commit: ATLAS-752 Column renames should retain
traits/tags (svimal2106 via shwethags)
Repository: incubator-atlas
Updated Branches:
refs/heads/master 70f715705 -> 2e02ae628
ATLAS-752 Column renames should retain traits/tags (svimal2106 via shwethags)
Project: http://git-wip-us.apache.org/repos/asf/incubator-atlas/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-atlas/commit/2e02ae62
Tree: http://git-wip-us.apache.org/repos/asf/incubator-atlas/tree/2e02ae62
Diff: http://git-wip-us.apache.org/repos/asf/incubator-atlas/diff/2e02ae62
Branch: refs/heads/master
Commit: 2e02ae62850eef6b9fd65fb0c796748d0d348044
Parents: 70f7157
Author: Shwetha GS <ss...@hortonworks.com>
Authored: Fri May 27 15:04:39 2016 +0530
Committer: Shwetha GS <ss...@hortonworks.com>
Committed: Fri May 27 15:04:39 2016 +0530
----------------------------------------------------------------------
.../org/apache/atlas/hive/hook/HiveHook.java | 77 +++++++++++++++++++-
.../org/apache/atlas/hive/hook/HiveHookIT.java | 57 ++++++++++++---
release-log.txt | 1 +
3 files changed, 121 insertions(+), 14 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/2e02ae62/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
----------------------------------------------------------------------
diff --git a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
index 418e755..4234664 100755
--- a/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
+++ b/addons/hive-bridge/src/main/java/org/apache/atlas/hive/hook/HiveHook.java
@@ -36,6 +36,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.Database;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.ExplainTask;
import org.apache.hadoop.hive.ql.exec.Task;
@@ -53,6 +54,7 @@ import org.apache.hadoop.security.UserGroupInformation;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import scala.tools.cmd.gen.AnyVals;
import java.net.MalformedURLException;
import java.util.ArrayList;
@@ -229,10 +231,12 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
case ALTERTABLE_SERIALIZER:
case ALTERTABLE_ADDCOLS:
case ALTERTABLE_REPLACECOLS:
- case ALTERTABLE_RENAMECOL:
case ALTERTABLE_PARTCOLTYPE:
handleEventOutputs(dgiBridge, event, Type.TABLE);
break;
+ case ALTERTABLE_RENAMECOL:
+ renameColumn(dgiBridge, event);
+ break;
case ALTERTABLE_LOCATION:
List<Pair<? extends Entity, Referenceable>> tablesUpdated = handleEventOutputs(dgiBridge, event, Type.TABLE);
if (tablesUpdated != null && tablesUpdated.size() > 0) {
@@ -297,6 +301,64 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
}
}
+ private Pair<String, String> findChangedColNames(List<FieldSchema> oldColList, List<FieldSchema> newColList){
+ HashMap<FieldSchema, Integer> oldColHashMap = new HashMap<>();
+ HashMap<FieldSchema, Integer> newColHashMap = new HashMap<>();
+ for (int i = 0; i < oldColList.size(); i++){
+ oldColHashMap.put(oldColList.get(i), i);
+ newColHashMap.put(newColList.get(i), i);
+ }
+
+ String changedColStringOldName = oldColList.get(0).getName();
+ String changedColStringNewName = changedColStringOldName;
+
+ for(int i = 0; i < oldColList.size(); i++){
+ if (!newColHashMap.containsKey(oldColList.get(i))){
+ changedColStringOldName = oldColList.get(i).getName();
+ break;
+ }
+ }
+
+ for(int i = 0; i < newColList.size(); i++){
+ if (!oldColHashMap.containsKey(newColList.get(i))){
+ changedColStringNewName = newColList.get(i).getName();
+ break;
+ }
+ }
+
+ return Pair.of(changedColStringOldName, changedColStringNewName);
+ }
+
+ private void renameColumn(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws Exception{
+ assert event.getInputs() != null && event.getInputs().size() == 1;
+ assert event.getOutputs() != null && event.getOutputs().size() > 0;
+ Table oldTable = event.getInputs().iterator().next().getTable();
+ List<FieldSchema> oldColList = oldTable.getAllCols();
+ List<FieldSchema> newColList = dgiBridge.hiveClient.getTable(event.getOutputs().iterator().next().getTable().getTableName()).getAllCols();
+ assert oldColList.size() == newColList.size();
+
+ Pair<String, String> changedColNamePair = findChangedColNames(oldColList, newColList);
+ String oldColName = changedColNamePair.getLeft();
+ String newColName = changedColNamePair.getRight();
+ for(WriteEntity writeEntity : event.getOutputs()){
+ if (writeEntity.getType() == Type.TABLE){
+ Table newTable = writeEntity.getTable();
+ createOrUpdateEntities(dgiBridge, event.getUser(), writeEntity, true, oldTable);
+ final String newQualifiedTableName = dgiBridge.getTableQualifiedName(dgiBridge.getClusterName(),
+ newTable);
+ String oldColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, oldColName);
+ String newColumnQFName = HiveMetaStoreBridge.getColumnQualifiedName(newQualifiedTableName, newColName);
+ Referenceable newColEntity = new Referenceable(HiveDataTypes.HIVE_COLUMN.getName());
+ newColEntity.set(AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME, newColumnQFName);
+
+ messages.add(new HookNotification.EntityPartialUpdateRequest(event.getUser(),
+ HiveDataTypes.HIVE_COLUMN.getName(), AtlasClient.REFERENCEABLE_ATTRIBUTE_NAME,
+ oldColumnQFName, newColEntity));
+ }
+ }
+ handleEventOutputs(dgiBridge, event, Type.TABLE);
+ }
+
private void renameTable(HiveMetaStoreBridge dgiBridge, HiveEventContext event) throws Exception {
//crappy, no easy of getting new name
assert event.getInputs() != null && event.getInputs().size() == 1;
@@ -389,7 +451,7 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
return newSDEntity;
}
- private Referenceable createOrUpdateEntities(HiveMetaStoreBridge dgiBridge, String user, Entity entity, boolean skipTempTables) throws Exception {
+ private Referenceable createOrUpdateEntities(HiveMetaStoreBridge dgiBridge, String user, Entity entity, boolean skipTempTables, Table existTable) throws Exception {
Database db = null;
Table table = null;
Partition partition = null;
@@ -419,13 +481,16 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
Referenceable tableEntity = null;
if (table != null) {
- table = dgiBridge.hiveClient.getTable(table.getDbName(), table.getTableName());
+ if (existTable != null) {
+ table = existTable;
+ } else {
+ table = dgiBridge.hiveClient.getTable(table.getDbName(), table.getTableName());
+ }
//If its an external table, even though the temp table skip flag is on,
// we create the table since we need the HDFS path to temp table lineage.
if (skipTempTables &&
table.isTemporary() &&
!TableType.EXTERNAL_TABLE.equals(table.getTableType())) {
-
LOG.debug("Skipping temporary table registration {} since it is not an external table {} ", table.getTableName(), table.getTableType().name());
} else {
@@ -438,6 +503,10 @@ public class HiveHook extends AtlasHook implements ExecuteWithHookContext {
return tableEntity;
}
+ private Referenceable createOrUpdateEntities(HiveMetaStoreBridge dgiBridge, String user, Entity entity, boolean skipTempTables) throws Exception{
+ return createOrUpdateEntities(dgiBridge, user, entity, skipTempTables, null);
+ }
+
private List<Pair<? extends Entity, Referenceable>> handleEventOutputs(HiveMetaStoreBridge dgiBridge, HiveEventContext event, Type entityType) throws Exception {
List<Pair<? extends Entity, Referenceable>> entitiesCreatedOrUpdated = new ArrayList<>();
for (Entity entity : event.getOutputs()) {
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/2e02ae62/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
----------------------------------------------------------------------
diff --git a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
index 84d9a52..4223d90 100755
--- a/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
+++ b/addons/hive-bridge/src/test/java/org/apache/atlas/hive/hook/HiveHookIT.java
@@ -68,6 +68,7 @@ import static org.apache.atlas.hive.hook.HiveHook.normalize;
import static org.apache.atlas.hive.model.HiveDataModelGenerator.NAME;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
public class HiveHookIT {
@@ -327,8 +328,8 @@ public class HiveHookIT {
String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, viewName);
JSONObject response = atlasClient.getInputGraph(datasetName);
JSONObject vertices = response.getJSONObject("values").getJSONObject("vertices");
- Assert.assertTrue(vertices.has(viewId));
- Assert.assertTrue(vertices.has(table1Id));
+ assertTrue(vertices.has(viewId));
+ assertTrue(vertices.has(table1Id));
//Alter the view from table2
String table2Name = createTable();
@@ -343,13 +344,13 @@ public class HiveHookIT {
datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, viewName);
response = atlasClient.getInputGraph(datasetName);
vertices = response.getJSONObject("values").getJSONObject("vertices");
- Assert.assertTrue(vertices.has(viewId));
+ assertTrue(vertices.has(viewId));
//This is through the alter view process
- Assert.assertTrue(vertices.has(table2Id));
+ assertTrue(vertices.has(table2Id));
//This is through the Create view process
- Assert.assertTrue(vertices.has(table1Id));
+ assertTrue(vertices.has(table1Id));
//Outputs dont exist
response = atlasClient.getOutputGraph(datasetName);
@@ -668,7 +669,7 @@ public class HiveHookIT {
public void assertOnEntity(final Referenceable entity) throws Exception {
Referenceable sd = ((Referenceable) entity.get(HiveDataModelGenerator.STORAGE_DESC));
String location = (String) sd.get(HiveDataModelGenerator.LOCATION);
- Assert.assertTrue(location.contains(newTableName));
+ assertTrue(location.contains(newTableName));
}
});
}
@@ -912,6 +913,42 @@ public class HiveHookIT {
}
@Test
+ public void testAlterTableWithoutHookConf() throws Exception {
+ HiveConf conf = new HiveConf();
+ conf.set("hive.exec.post.hooks", "");
+ SessionState ss = new SessionState(conf);
+ ss = SessionState.start(ss);
+ SessionState.setCurrentSessionState(ss);
+ Driver driver = new Driver(conf);
+ String tableName = tableName();
+ String createCommand = "create table " + tableName + " (id int, name string)";
+ driver.run(createCommand);
+ assertTableIsNotRegistered(DEFAULT_DB, tableName);
+ String command = "alter table " + tableName + " change id id_new string";
+ runCommand(command);
+ assertTableIsRegistered(DEFAULT_DB, tableName);
+ String tbqn = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName);
+ assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(tbqn, "id_new"));
+ }
+
+ @Test
+ public void testTraitsPreservedOnColumnRename() throws Exception {
+ String tableName = createTable();
+ String tbqn = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, tableName);
+ String guid = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(tbqn, "id"));
+ String trait = createTrait(guid);
+ String oldColName = "id";
+ String newColName = "id_new";
+ String query = String.format("alter table %s change %s %s string", tableName, oldColName, newColName);
+ runCommand(query);
+
+ String guid2 = assertColumnIsRegistered(HiveMetaStoreBridge.getColumnQualifiedName(tbqn, "id_new"));
+ assertEquals(guid2, guid);
+
+ assertTrue(atlasClient.getEntity(guid2).getTraits().contains(trait));
+ }
+
+ @Test
public void testAlterViewRename() throws Exception {
String tableName = createTable();
String viewName = tableName();
@@ -1490,14 +1527,14 @@ public class HiveHookIT {
String datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, db2, table2);
JSONObject response = atlasClient.getInputGraph(datasetName);
JSONObject vertices = response.getJSONObject("values").getJSONObject("vertices");
- Assert.assertTrue(vertices.has(table1Id));
- Assert.assertTrue(vertices.has(table2Id));
+ assertTrue(vertices.has(table1Id));
+ assertTrue(vertices.has(table2Id));
datasetName = HiveMetaStoreBridge.getTableQualifiedName(CLUSTER_NAME, DEFAULT_DB, table1);
response = atlasClient.getOutputGraph(datasetName);
vertices = response.getJSONObject("values").getJSONObject("vertices");
- Assert.assertTrue(vertices.has(table1Id));
- Assert.assertTrue(vertices.has(table2Id));
+ assertTrue(vertices.has(table1Id));
+ assertTrue(vertices.has(table2Id));
}
//For ATLAS-448
http://git-wip-us.apache.org/repos/asf/incubator-atlas/blob/2e02ae62/release-log.txt
----------------------------------------------------------------------
diff --git a/release-log.txt b/release-log.txt
index bceb7e9..b6d1f9d 100644
--- a/release-log.txt
+++ b/release-log.txt
@@ -22,6 +22,7 @@ ATLAS-409 Atlas will not import avro tables with schema read from a file (dosset
ATLAS-379 Create sqoop and falcon metadata addons (venkatnrangan,bvellanki,sowmyaramesh via shwethags)
ALL CHANGES:
+ATLAS-752 Column renames should retain traits/tags (svimal2106 via shwethags)
ATLAS-821 Atlas UI - Add arrow to navigate to child term (kevalbhatt18 via yhemanth)
ATLAS-812 Atlas UI - Associate Terms with Assets (kevalbhatt18 via yhemanth)
ATLAS-809 JAAS configuration needed for Kafka interaction via Atlas config file (abhayk via shwethags)