You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by aa...@apache.org on 2021/08/09 10:46:10 UTC

[hive] branch master updated: HIVE-25350. Replication fails for external tables on setting owner/groups. (#2498)(Ayush Saxena, reviewed by Arko Sharma)

This is an automated email from the ASF dual-hosted git repository.

aasha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 8785dd2  HIVE-25350. Replication fails for external tables on setting owner/groups. (#2498)(Ayush Saxena, reviewed by Arko Sharma)
8785dd2 is described below

commit 8785dd23c72e7a3fb61da1bce34ae33ff8c7561d
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Mon Aug 9 16:15:51 2021 +0530

    HIVE-25350. Replication fails for external tables on setting owner/groups. (#2498)(Ayush Saxena, reviewed by Arko Sharma)
---
 .../TestReplicationScenariosExternalTables.java    | 45 ++++++++++++++---
 .../hadoop/hive/ql/exec/repl/DirCopyTask.java      | 59 ++++++++++++++--------
 2 files changed, 76 insertions(+), 28 deletions(-)

diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
index 3e794ef..44fdd00 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
@@ -87,6 +87,7 @@ import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLI
 import static org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.INC_BOOTSTRAP_ROOT_DIR_NAME;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
 import static org.junit.Assert.assertTrue;
 
 public class TestReplicationScenariosExternalTables extends BaseReplicationAcrossInstances {
@@ -357,6 +358,7 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
         "/" + testName.getMethodName() + "/" + primaryDbName + "/" + "a/");
     DistributedFileSystem fs = primary.miniDFSCluster.getFileSystem();
     fs.mkdirs(externalTableLocation, new FsPermission("777"));
+    fs.setOwner(externalTableLocation,"user1","group1");
 
     Path externalFileLoc = new Path(externalTableLocation, "file1.txt");
     try (FSDataOutputStream outputStream = fs.create(externalFileLoc)) {
@@ -382,11 +384,9 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
     fs.modifyAclEntries(externalTableLocation, aclEntries);
     fs.modifyAclEntries(externalFileLoc, aclEntries);
 
-    // Run bootstrap with distcp options to preserve ACL.
-    List<String> withClause = Arrays
-        .asList("'distcp.options.update'=''", "'distcp.options.puga'=''",
-            "'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname
-                + "'='true'");
+    // Run bootstrap without distcp options to preserve options.
+    List<String> withClause = Arrays.asList("'distcp.options.update'=''",
+        "'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname + "'='true'");
 
     primary.run("use " + primaryDbName).run(
         "create external table a (i int, j int) "
@@ -400,13 +400,41 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
         .verifyResults(new String[] {"1", "13"}).run("select j from a")
         .verifyResults(new String[] {"2", "21"});
 
-    // Verify the ACL's of the destination table directory and data file are
-    // same as that of source.
+    // Verify the attributes of the destination table directory and data file are
+    // not same as that of source.
     Hive hiveForReplica = Hive.get(replica.hiveConf);
     org.apache.hadoop.hive.ql.metadata.Table replicaTable =
         hiveForReplica.getTable(replicatedDbName + ".a");
     Path dataLocation = replicaTable.getDataLocation();
 
+    assertNotEquals("ACL entries are same for the data file.",
+        fs.getAclStatus(externalFileLoc).getEntries().size(),
+        fs.getAclStatus(new Path(dataLocation, "file1.txt")).getEntries()
+            .size());
+    assertNotEquals("ACL entries are same for the table directory.",
+        fs.getAclStatus(externalTableLocation).getEntries().size(),
+        fs.getAclStatus(dataLocation).getEntries().size());
+
+    assertNotEquals(fs.getFileStatus(externalTableLocation).getOwner(), fs.getFileStatus(dataLocation).getOwner());
+    assertNotEquals(fs.getFileStatus(externalTableLocation).getGroup(), fs.getFileStatus(dataLocation).getGroup());
+
+    // Dump & load with preserve attributes set.
+    withClause = Arrays
+        .asList("'distcp.options.update'=''", "'distcp.options.pugpa'=''",
+            "'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname
+                + "'='true'");
+
+    primary.run("use " + primaryDbName).dump(primaryDbName, withClause);
+
+    // Verify load is success and has the appropriate data.
+    replica.load(replicatedDbName, primaryDbName, withClause)
+        .run("use " + replicatedDbName).run("select i From a")
+        .verifyResults(new String[] {"1", "13"}).run("select j from a")
+        .verifyResults(new String[] {"2", "21"});
+
+    // Verify the ACL's of the destination table directory and data file are
+    // same as that of source.
+
     assertEquals("ACL entries are not same for the data file.",
         fs.getAclStatus(externalFileLoc).getEntries().size(),
         fs.getAclStatus(new Path(dataLocation, "file1.txt")).getEntries()
@@ -414,6 +442,9 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
     assertEquals("ACL entries are not same for the table directory.",
         fs.getAclStatus(externalTableLocation).getEntries().size(),
         fs.getAclStatus(dataLocation).getEntries().size());
+
+    assertEquals(fs.getFileStatus(externalTableLocation).getOwner(), fs.getFileStatus(dataLocation).getOwner());
+    assertEquals(fs.getFileStatus(externalTableLocation).getGroup(), fs.getFileStatus(dataLocation).getGroup());
   }
 
   /**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java
index 2e0154a..0890082 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java
@@ -85,39 +85,56 @@ public class DirCopyTask extends Task<DirCopyWork> implements Serializable {
     }
     LOG.info("Setting permission for path dest {} from source {} owner {} : {} : {}",
             destPath, sourcePath, status.getOwner(), status.getGroup(), status.getPermission());
-    destPath.getFileSystem(clonedConf).setOwner(destPath, status.getOwner(), status.getGroup());
-    destPath.getFileSystem(clonedConf).setPermission(destPath, status.getPermission());
-    setAclsToTarget(status, sourcePath, destPath, clonedConf);
+    preserveDistCpAttributes(destPath, sourcePath, clonedConf, status);
     return createdDir;
   }
 
-  private void setAclsToTarget(FileStatus sourceStatus, Path sourcePath, Path destPath, HiveConf clonedConf) throws IOException {
+  private void preserveDistCpAttributes(Path destPath, Path sourcePath, HiveConf clonedConf, FileStatus status)
+      throws IOException {
+    String preserveString = getPreserveString(clonedConf);
+    LOG.info("Preserving DistCp Attributes: {}", preserveString);
+    FileSystem destFs = destPath.getFileSystem(clonedConf);
+    // If both preserve user and group are specified.
+    if (preserveString.contains("u") && preserveString.contains("g")) {
+      destFs.setOwner(destPath, status.getOwner(), status.getGroup());
+    } else if (preserveString.contains("u")) {
+      // If only preserve user is specified.
+      destFs.setOwner(destPath, status.getOwner(), null);
+    } else if (preserveString.contains("g")) {
+      // If only preserve group is specified.
+      destFs.setOwner(destPath, null, status.getGroup());
+    }
+    // Preserve permissions
+    if (preserveString.contains("p")) {
+      destFs.setPermission(destPath, status.getPermission());
+    }
+    // Preserve ACL
+    if (preserveString.contains("a")) {
+      setAclsToTarget(status, sourcePath, destPath, clonedConf);
+    }
+  }
+
+  private void setAclsToTarget(FileStatus sourceStatus, Path sourcePath, Path destPath, HiveConf clonedConf)
+      throws IOException {
     // Check if distCp options contains preserve ACL.
-    if (isPreserveAcl(clonedConf)) {
-      AclStatus sourceAcls =
-          sourcePath.getFileSystem(clonedConf).getAclStatus(sourcePath);
-      if (sourceAcls != null && sourceAcls.getEntries().size() > 0) {
-        destPath.getFileSystem(clonedConf).removeAcl(destPath);
-        List<AclEntry> effectiveAclEntries = AclUtil
-            .getAclFromPermAndEntries(sourceStatus.getPermission(),
-                sourceAcls.getEntries());
-        destPath.getFileSystem(clonedConf).setAcl(destPath, effectiveAclEntries);
-      }
+    AclStatus sourceAcls = sourcePath.getFileSystem(clonedConf).getAclStatus(sourcePath);
+    if (sourceAcls != null && sourceAcls.getEntries().size() > 0) {
+      destPath.getFileSystem(clonedConf).removeAcl(destPath);
+      List<AclEntry> effectiveAclEntries =
+          AclUtil.getAclFromPermAndEntries(sourceStatus.getPermission(), sourceAcls.getEntries());
+      destPath.getFileSystem(clonedConf).setAcl(destPath, effectiveAclEntries);
     }
   }
 
-  private boolean isPreserveAcl(HiveConf clonedConf) {
+  private String getPreserveString(HiveConf clonedConf) {
     List<String> distCpOptions = constructDistCpOptions(clonedConf);
     for (String option : distCpOptions) {
       if (option.startsWith("-p")) {
-        if (option.contains("a")) {
-          return true;
-        } else {
-          return false;
-        }
+
+        return option.replaceFirst("-p", "");
       }
     }
-    return false;
+    return "";
   }
 
   private boolean setTargetPathOwner(Path targetPath, Path sourcePath, UserGroupInformation proxyUser,