You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by aa...@apache.org on 2021/08/09 10:46:10 UTC
[hive] branch master updated: HIVE-25350. Replication fails for
external tables on setting owner/groups. (#2498)(Ayush Saxena,
reviewed by Arko Sharma)
This is an automated email from the ASF dual-hosted git repository.
aasha pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 8785dd2 HIVE-25350. Replication fails for external tables on setting owner/groups. (#2498)(Ayush Saxena, reviewed by Arko Sharma)
8785dd2 is described below
commit 8785dd23c72e7a3fb61da1bce34ae33ff8c7561d
Author: Ayush Saxena <ay...@apache.org>
AuthorDate: Mon Aug 9 16:15:51 2021 +0530
HIVE-25350. Replication fails for external tables on setting owner/groups. (#2498)(Ayush Saxena, reviewed by Arko Sharma)
---
.../TestReplicationScenariosExternalTables.java | 45 ++++++++++++++---
.../hadoop/hive/ql/exec/repl/DirCopyTask.java | 59 ++++++++++++++--------
2 files changed, 76 insertions(+), 28 deletions(-)
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
index 3e794ef..44fdd00 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/ql/parse/TestReplicationScenariosExternalTables.java
@@ -87,6 +87,7 @@ import static org.apache.hadoop.hive.metastore.ReplChangeManager.SOURCE_OF_REPLI
import static org.apache.hadoop.hive.ql.exec.repl.util.ReplUtils.INC_BOOTSTRAP_ROOT_DIR_NAME;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
public class TestReplicationScenariosExternalTables extends BaseReplicationAcrossInstances {
@@ -357,6 +358,7 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
"/" + testName.getMethodName() + "/" + primaryDbName + "/" + "a/");
DistributedFileSystem fs = primary.miniDFSCluster.getFileSystem();
fs.mkdirs(externalTableLocation, new FsPermission("777"));
+ fs.setOwner(externalTableLocation,"user1","group1");
Path externalFileLoc = new Path(externalTableLocation, "file1.txt");
try (FSDataOutputStream outputStream = fs.create(externalFileLoc)) {
@@ -382,11 +384,9 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
fs.modifyAclEntries(externalTableLocation, aclEntries);
fs.modifyAclEntries(externalFileLoc, aclEntries);
- // Run bootstrap with distcp options to preserve ACL.
- List<String> withClause = Arrays
- .asList("'distcp.options.update'=''", "'distcp.options.puga'=''",
- "'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname
- + "'='true'");
+ // Run bootstrap without distcp options to preserve options.
+ List<String> withClause = Arrays.asList("'distcp.options.update'=''",
+ "'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname + "'='true'");
primary.run("use " + primaryDbName).run(
"create external table a (i int, j int) "
@@ -400,13 +400,41 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
.verifyResults(new String[] {"1", "13"}).run("select j from a")
.verifyResults(new String[] {"2", "21"});
- // Verify the ACL's of the destination table directory and data file are
- // same as that of source.
+ // Verify the attributes of the destination table directory and data file are
+ // not same as that of source.
Hive hiveForReplica = Hive.get(replica.hiveConf);
org.apache.hadoop.hive.ql.metadata.Table replicaTable =
hiveForReplica.getTable(replicatedDbName + ".a");
Path dataLocation = replicaTable.getDataLocation();
+ assertNotEquals("ACL entries are same for the data file.",
+ fs.getAclStatus(externalFileLoc).getEntries().size(),
+ fs.getAclStatus(new Path(dataLocation, "file1.txt")).getEntries()
+ .size());
+ assertNotEquals("ACL entries are same for the table directory.",
+ fs.getAclStatus(externalTableLocation).getEntries().size(),
+ fs.getAclStatus(dataLocation).getEntries().size());
+
+ assertNotEquals(fs.getFileStatus(externalTableLocation).getOwner(), fs.getFileStatus(dataLocation).getOwner());
+ assertNotEquals(fs.getFileStatus(externalTableLocation).getGroup(), fs.getFileStatus(dataLocation).getGroup());
+
+ // Dump & load with preserve attributes set.
+ withClause = Arrays
+ .asList("'distcp.options.update'=''", "'distcp.options.pugpa'=''",
+ "'" + HiveConf.ConfVars.REPL_RUN_DATA_COPY_TASKS_ON_TARGET.varname
+ + "'='true'");
+
+ primary.run("use " + primaryDbName).dump(primaryDbName, withClause);
+
+ // Verify load is success and has the appropriate data.
+ replica.load(replicatedDbName, primaryDbName, withClause)
+ .run("use " + replicatedDbName).run("select i From a")
+ .verifyResults(new String[] {"1", "13"}).run("select j from a")
+ .verifyResults(new String[] {"2", "21"});
+
+ // Verify the ACL's of the destination table directory and data file are
+ // same as that of source.
+
assertEquals("ACL entries are not same for the data file.",
fs.getAclStatus(externalFileLoc).getEntries().size(),
fs.getAclStatus(new Path(dataLocation, "file1.txt")).getEntries()
@@ -414,6 +442,9 @@ public class TestReplicationScenariosExternalTables extends BaseReplicationAcros
assertEquals("ACL entries are not same for the table directory.",
fs.getAclStatus(externalTableLocation).getEntries().size(),
fs.getAclStatus(dataLocation).getEntries().size());
+
+ assertEquals(fs.getFileStatus(externalTableLocation).getOwner(), fs.getFileStatus(dataLocation).getOwner());
+ assertEquals(fs.getFileStatus(externalTableLocation).getGroup(), fs.getFileStatus(dataLocation).getGroup());
}
/**
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java
index 2e0154a..0890082 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/repl/DirCopyTask.java
@@ -85,39 +85,56 @@ public class DirCopyTask extends Task<DirCopyWork> implements Serializable {
}
LOG.info("Setting permission for path dest {} from source {} owner {} : {} : {}",
destPath, sourcePath, status.getOwner(), status.getGroup(), status.getPermission());
- destPath.getFileSystem(clonedConf).setOwner(destPath, status.getOwner(), status.getGroup());
- destPath.getFileSystem(clonedConf).setPermission(destPath, status.getPermission());
- setAclsToTarget(status, sourcePath, destPath, clonedConf);
+ preserveDistCpAttributes(destPath, sourcePath, clonedConf, status);
return createdDir;
}
- private void setAclsToTarget(FileStatus sourceStatus, Path sourcePath, Path destPath, HiveConf clonedConf) throws IOException {
+ private void preserveDistCpAttributes(Path destPath, Path sourcePath, HiveConf clonedConf, FileStatus status)
+ throws IOException {
+ String preserveString = getPreserveString(clonedConf);
+ LOG.info("Preserving DistCp Attributes: {}", preserveString);
+ FileSystem destFs = destPath.getFileSystem(clonedConf);
+ // If both preserve user and group are specified.
+ if (preserveString.contains("u") && preserveString.contains("g")) {
+ destFs.setOwner(destPath, status.getOwner(), status.getGroup());
+ } else if (preserveString.contains("u")) {
+ // If only preserve user is specified.
+ destFs.setOwner(destPath, status.getOwner(), null);
+ } else if (preserveString.contains("g")) {
+ // If only preserve group is specified.
+ destFs.setOwner(destPath, null, status.getGroup());
+ }
+ // Preserve permissions
+ if (preserveString.contains("p")) {
+ destFs.setPermission(destPath, status.getPermission());
+ }
+ // Preserve ACL
+ if (preserveString.contains("a")) {
+ setAclsToTarget(status, sourcePath, destPath, clonedConf);
+ }
+ }
+
+ private void setAclsToTarget(FileStatus sourceStatus, Path sourcePath, Path destPath, HiveConf clonedConf)
+ throws IOException {
// Check if distCp options contains preserve ACL.
- if (isPreserveAcl(clonedConf)) {
- AclStatus sourceAcls =
- sourcePath.getFileSystem(clonedConf).getAclStatus(sourcePath);
- if (sourceAcls != null && sourceAcls.getEntries().size() > 0) {
- destPath.getFileSystem(clonedConf).removeAcl(destPath);
- List<AclEntry> effectiveAclEntries = AclUtil
- .getAclFromPermAndEntries(sourceStatus.getPermission(),
- sourceAcls.getEntries());
- destPath.getFileSystem(clonedConf).setAcl(destPath, effectiveAclEntries);
- }
+ AclStatus sourceAcls = sourcePath.getFileSystem(clonedConf).getAclStatus(sourcePath);
+ if (sourceAcls != null && sourceAcls.getEntries().size() > 0) {
+ destPath.getFileSystem(clonedConf).removeAcl(destPath);
+ List<AclEntry> effectiveAclEntries =
+ AclUtil.getAclFromPermAndEntries(sourceStatus.getPermission(), sourceAcls.getEntries());
+ destPath.getFileSystem(clonedConf).setAcl(destPath, effectiveAclEntries);
}
}
- private boolean isPreserveAcl(HiveConf clonedConf) {
+ private String getPreserveString(HiveConf clonedConf) {
List<String> distCpOptions = constructDistCpOptions(clonedConf);
for (String option : distCpOptions) {
if (option.startsWith("-p")) {
- if (option.contains("a")) {
- return true;
- } else {
- return false;
- }
+
+ return option.replaceFirst("-p", "");
}
}
- return false;
+ return "";
}
private boolean setTargetPathOwner(Path targetPath, Path sourcePath, UserGroupInformation proxyUser,