You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/02/23 20:16:43 UTC
hive git commit: HIVE-18710 : extend inheritPerms to ACID in Hive 2.X
(Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Repository: hive
Updated Branches:
refs/heads/branch-2 d72c35e47 -> 0dddd7cff
HIVE-18710 : extend inheritPerms to ACID in Hive 2.X (Sergey Shelukhin, reviewed by Ashutosh Chauhan)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/0dddd7cf
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/0dddd7cf
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/0dddd7cf
Branch: refs/heads/branch-2
Commit: 0dddd7cff1f102bd6e715c5c10730b9402fa5547
Parents: d72c35e
Author: sergey <se...@apache.org>
Authored: Fri Feb 23 12:16:27 2018 -0800
Committer: sergey <se...@apache.org>
Committed: Fri Feb 23 12:16:27 2018 -0800
----------------------------------------------------------------------
.../apache/hadoop/hive/common/FileUtils.java | 18 +++++++-
.../org/apache/hadoop/hive/io/HdfsUtils.java | 8 +++-
.../hive/ql/exec/AbstractFileMergeOperator.java | 1 +
.../hadoop/hive/ql/exec/FileSinkOperator.java | 2 +
.../apache/hadoop/hive/ql/exec/MoveTask.java | 17 +++++---
.../apache/hadoop/hive/ql/exec/Utilities.java | 2 +-
.../apache/hadoop/hive/ql/metadata/Hive.java | 45 +++++++++++++-------
.../hive/ql/txn/compactor/CompactorMR.java | 8 +++-
8 files changed, 73 insertions(+), 28 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/0dddd7cf/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index 2b7a57b..6f51ce0 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -510,6 +510,7 @@ public final class FileUtils {
*
* @param fs FileSystem to use
* @param f path to create.
+ * @param inheritPerms whether directory inherits the permission of the last-existing parent path
* @param conf Hive configuration
*
* @return true if directory created successfully. False otherwise, including if it exists.
@@ -678,8 +679,7 @@ public final class FileUtils {
} else {
//rename the directory
if (fs.rename(sourcePath, destPath)) {
- HdfsUtils.setFullFileStatus(conf, new HdfsUtils.HadoopFileStatus(conf, fs, destPath.getParent()), fs, destPath,
- true);
+ HdfsUtils.setParentFileStatus(conf, fs, destPath, true);
return true;
}
@@ -687,6 +687,20 @@ public final class FileUtils {
}
}
+ public static boolean renameWithPermsNoCheck(FileSystem fs, Path sourcePath,
+ Path destPath, Configuration conf) throws IOException {
+ return renameWithPermsNoCheck(fs, sourcePath, destPath, shouldInheritPerms(conf, fs), conf);
+ }
+
+ public static boolean renameWithPermsNoCheck(FileSystem fs, Path sourcePath,
+ Path destPath, boolean inheritPerms, Configuration conf) throws IOException {
+ LOG.debug("Renaming " + sourcePath + " to " + destPath);
+ boolean result = fs.rename(sourcePath, destPath);
+ if (!result || !inheritPerms) return result;
+ HdfsUtils.setParentFileStatus(conf, fs, destPath, true);
+ return true;
+ }
+
/**
* @param fs1
* @param fs2
http://git-wip-us.apache.org/repos/asf/hive/blob/0dddd7cf/common/src/java/org/apache/hadoop/hive/io/HdfsUtils.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/io/HdfsUtils.java b/common/src/java/org/apache/hadoop/hive/io/HdfsUtils.java
index 16fc96e..a3f5d6d 100644
--- a/common/src/java/org/apache/hadoop/hive/io/HdfsUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/io/HdfsUtils.java
@@ -26,7 +26,6 @@ import java.util.List;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.lang.ArrayUtils;
-
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -39,7 +38,6 @@ import org.apache.hadoop.fs.permission.AclStatus;
import org.apache.hadoop.fs.permission.FsAction;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DistributedFileSystem;
-
import org.apache.hadoop.hive.common.StorageUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -296,4 +294,10 @@ public class HdfsUtils {
return this.aclStatus;
}
}
+
+ public static void setParentFileStatus(
+ Configuration conf, FileSystem fs, Path destPath, boolean recursive) throws IOException {
+ setFullFileStatus(conf, new HdfsUtils.HadoopFileStatus(conf, fs, destPath.getParent()),
+ fs, destPath, recursive);
+ }
}
http://git-wip-us.apache.org/repos/asf/hive/blob/0dddd7cf/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
index 6bba057..e016d0e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractFileMergeOperator.java
@@ -220,6 +220,7 @@ public abstract class AbstractFileMergeOperator<T extends FileMergeDesc>
// if outPath does not exist, then it means all paths within combine split are skipped as
// they are incompatible for merge (for example: files without stripe stats).
// Those files will be added to incompatFileSet
+ // TODO: we might want to inherit permissions here: FileUtils.renameWithPermsNoCheck(fs, outPath, finalPath, jc)
if (fs.exists(outPath)) {
FileStatus fss = fs.getFileStatus(outPath);
if (!fs.rename(outPath, finalPath)) {
http://git-wip-us.apache.org/repos/asf/hive/blob/0dddd7cf/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
index a9d03d0..abe6f4e 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FileSinkOperator.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.hive.ql.exec;
import com.google.common.collect.Lists;
+
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
@@ -640,6 +641,7 @@ public class FileSinkOperator extends TerminalOperator<FileSinkDesc> implements
}
}
+
/**
* Report status to JT so that JT won't kill this task if closing takes too long
* due to too many files to close and the NN is overloaded.
http://git-wip-us.apache.org/repos/asf/hive/blob/0dddd7cf/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
index 5cf2c2b..6671519 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
@@ -35,6 +35,7 @@ import org.apache.hadoop.hive.common.HiveStatsUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.io.HdfsUtils;
import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.Order;
@@ -79,6 +80,7 @@ public class MoveTask extends Task<MoveWork> implements Serializable {
private static final long serialVersionUID = 1L;
private static transient final Logger LOG = LoggerFactory.getLogger(MoveTask.class);
+ private boolean inheritPerms;
public MoveTask() {
super();
@@ -94,7 +96,7 @@ public class MoveTask extends Task<MoveWork> implements Serializable {
FileSystem fs = sourcePath.getFileSystem(conf);
if (isDfsDir) {
- moveFileInDfs (sourcePath, targetPath, fs);
+ moveFileInDfs(sourcePath, targetPath, fs, conf);
} else {
// This is a local file
FileSystem dstFs = FileSystem.getLocal(conf);
@@ -106,7 +108,7 @@ public class MoveTask extends Task<MoveWork> implements Serializable {
}
}
- private void moveFileInDfs (Path sourcePath, Path targetPath, FileSystem fs)
+ private void moveFileInDfs(Path sourcePath, Path targetPath, FileSystem fs, HiveConf conf)
throws HiveException, IOException {
// if source exists, rename. Otherwise, create a empty directory
if (fs.exists(sourcePath)) {
@@ -129,7 +131,7 @@ public class MoveTask extends Task<MoveWork> implements Serializable {
throw new HiveException("Unable to rename: " + sourcePath
+ " to: " + targetPath);
}
- } else if (!fs.mkdirs(targetPath)) {
+ } else if (!FileUtils.mkdir(fs, targetPath, inheritPerms, conf)) {
throw new HiveException("Unable to make directory: " + targetPath);
}
}
@@ -182,7 +184,8 @@ public class MoveTask extends Task<MoveWork> implements Serializable {
}
fs.mkdirs(mkDirPath);
if (FileUtils.shouldInheritPerms(conf, fs)) {
- FileUtils.inheritPerms(conf, new HdfsUtils.HadoopFileStatus(conf, fs, actualPath), fs, mkDirPath, true);
+ FileUtils.inheritPerms(conf, new HdfsUtils.HadoopFileStatus(conf, fs, actualPath),
+ fs, mkDirPath, true);
}
}
return deletePath;
@@ -244,6 +247,8 @@ public class MoveTask extends Task<MoveWork> implements Serializable {
@Override
public int execute(DriverContext driverContext) {
+ inheritPerms = HiveConf.getBoolVar(conf, ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);
+
try {
if (driverContext.getCtx().getExplainAnalyze() == AnalyzeState.RUNNING) {
return 0;
@@ -270,9 +275,9 @@ public class MoveTask extends Task<MoveWork> implements Serializable {
Path destPath = lmfd.getTargetDirs().get(i);
FileSystem fs = destPath.getFileSystem(conf);
if (!fs.exists(destPath.getParent())) {
- fs.mkdirs(destPath.getParent());
+ FileUtils.mkdir(fs, destPath.getParent(), inheritPerms, conf);
}
- moveFile(srcPath, destPath, isDfsDir);
+ moveFile(srcPath, destPath, isDfsDir);
i++;
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/0dddd7cf/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index 614c29b..ef78b35 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -1147,6 +1147,7 @@ public final class Utilities {
* the src directory
* @param dst
* the target directory
+ * @param jc
* @throws IOException
*/
public static void renameOrMoveFiles(FileSystem fs, Path src, Path dst) throws IOException,
@@ -1159,7 +1160,6 @@ public final class Utilities {
// move file by file
FileStatus[] files = fs.listStatus(src);
for (FileStatus file : files) {
-
Path srcFilePath = file.getPath();
String fileName = srcFilePath.getName();
Path dstFilePath = new Path(dst, fileName);
http://git-wip-us.apache.org/repos/asf/hive/blob/0dddd7cf/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index eefa8f7..c30035b 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2913,6 +2913,8 @@ private void constructOneLBLocationMap(FileStatus fSta,
final boolean needToCopy = needToCopy(srcP, destf, srcFs, destFs);
final boolean isRenameAllowed = !needToCopy && !isSrcLocal;
+
+
// If we do a rename for a non-local file, we will be transfering the original
// file permissions from source to the destination. Else, in case of mvFile() where we
// copy from source to destination, we will inherit the destination's parent group ownership.
@@ -2920,7 +2922,11 @@ private void constructOneLBLocationMap(FileStatus fSta,
fullDestStatus.getFileStatus().getGroup();
if (null == pool) {
try {
+
Path destPath = mvFile(conf, srcFs, srcP, destFs, destf, isSrcLocal, isRenameAllowed);
+ if (inheritPerms) {
+ HdfsUtils.setFullFileStatus(conf, fullDestStatus, destFs, destPath, false);
+ }
if (null != newFiles) {
newFiles.add(destPath);
@@ -2934,9 +2940,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
@Override
public ObjectPair<Path, Path> call() throws Exception {
SessionState.setCurrentSessionState(parentSession);
-
Path destPath = mvFile(conf, srcFs, srcP, destFs, destf, isSrcLocal, isRenameAllowed);
-
if (inheritPerms) {
// TODO: These are all files.
FileUtils.inheritPerms(conf, fullDestStatus, srcGroup, destFs, destPath, false, false);
@@ -3163,7 +3167,6 @@ private void constructOneLBLocationMap(FileStatus fSta,
new ThreadFactoryBuilder().setDaemon(true).setNameFormat("Move-Thread-%d").build()) : null;
/* Move files one by one because source is a subdirectory of destination */
for (final FileStatus srcStatus : srcs) {
-
final Path destFile = new Path(destf, srcStatus.getPath().getName());
if (null == pool) {
if(!destFs.rename(srcStatus.getPath(), destFile)) {
@@ -3175,11 +3178,12 @@ private void constructOneLBLocationMap(FileStatus fSta,
@Override
public Void call() throws Exception {
SessionState.setCurrentSessionState(parentSession);
+ // TODO: why does this preserve /source/ (tmp file) group combined with /dest/ status?
final String group = srcStatus.getGroup();
if(destFs.rename(srcStatus.getPath(), destFile)) {
if (inheritPerms) {
- // TODO: These are all files.
- FileUtils.inheritPerms(conf, desiredStatus, group, destFs, destFile, false, false);
+ // Note: for union, etc these may be directories.
+ FileUtils.inheritPerms(conf, desiredStatus, group, destFs, destFile, true);
}
} else {
throw new IOException("rename for src path: " + srcStatus.getPath() + " to dest path:"
@@ -3206,6 +3210,8 @@ private void constructOneLBLocationMap(FileStatus fSta,
}
}
}
+ // TODO: the inheritPerms logic in pool and non-pool case is different (per-file vs once).
+ // Should this be reconciled?
return true;
} else {
if (destFs.rename(srcf, destf)) {
@@ -3262,10 +3268,13 @@ private void constructOneLBLocationMap(FileStatus fSta,
*/
static protected void copyFiles(HiveConf conf, Path srcf, Path destf,
FileSystem fs, boolean isSrcLocal, boolean isAcid, List<Path> newFiles) throws HiveException {
+ boolean inheritPerms = HiveConf.getBoolVar(conf,
+ HiveConf.ConfVars.HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);
+
try {
// create the destination if it does not exist
if (!fs.exists(destf)) {
- FileUtils.mkdir(fs, destf, conf);
+ FileUtils.mkdir(fs, destf, inheritPerms, conf);
}
} catch (IOException e) {
throw new HiveException(
@@ -3291,14 +3300,14 @@ private void constructOneLBLocationMap(FileStatus fSta,
// If we're moving files around for an ACID write then the rules and paths are all different.
// You can blame this on Owen.
if (isAcid) {
- moveAcidFiles(srcFs, srcs, destf, newFiles);
+ moveAcidFiles(srcFs, srcs, destf, newFiles, inheritPerms, conf);
} else {
copyFiles(conf, fs, srcs, srcFs, destf, isSrcLocal, newFiles);
}
}
private static void moveAcidFiles(FileSystem fs, FileStatus[] stats, Path dst,
- List<Path> newFiles) throws HiveException {
+ List<Path> newFiles, boolean inheritPerms, Configuration conf) throws HiveException {
// The layout for ACID files is table|partname/base|delta|delete_delta/bucket
// We will always only be writing delta files. In the buckets created by FileSinkOperator
// it will look like bucket/delta|delete_delta/bucket. So we need to move that into
@@ -3327,16 +3336,17 @@ private void constructOneLBLocationMap(FileStatus fSta,
for (FileStatus origBucketStat : origBucketStats) {
Path origBucketPath = origBucketStat.getPath();
moveAcidDeltaFiles(AcidUtils.DELTA_PREFIX, AcidUtils.deltaFileFilter,
- fs, dst, origBucketPath, createdDeltaDirs, newFiles);
+ fs, dst, origBucketPath, createdDeltaDirs, newFiles, inheritPerms, conf);
moveAcidDeltaFiles(AcidUtils.DELETE_DELTA_PREFIX, AcidUtils.deleteEventDeltaDirFilter,
- fs, dst,origBucketPath, createdDeltaDirs, newFiles);
+ fs, dst,origBucketPath, createdDeltaDirs, newFiles, inheritPerms, conf);
}
}
}
+
private static void moveAcidDeltaFiles(String deltaFileType, PathFilter pathFilter, FileSystem fs,
- Path dst, Path origBucketPath, Set<Path> createdDeltaDirs,
- List<Path> newFiles) throws HiveException {
+ Path dst, Path origBucketPath, Set<Path> createdDeltaDirs,
+ List<Path> newFiles, boolean inheritPerms, Configuration conf) throws HiveException {
LOG.debug("Acid move looking for " + deltaFileType + " files in bucket " + origBucketPath);
FileStatus[] deltaStats = null;
@@ -3358,7 +3368,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
try {
if (!createdDeltaDirs.contains(deltaDest)) {
try {
- fs.mkdirs(deltaDest);
+ FileUtils.mkdir(fs, deltaDest, inheritPerms, conf);
createdDeltaDirs.add(deltaDest);
} catch (IOException swallowIt) {
// Don't worry about this, as it likely just means it's already been created.
@@ -3373,7 +3383,7 @@ private void constructOneLBLocationMap(FileStatus fSta,
Path bucketDest = new Path(deltaDest, bucketSrc.getName());
LOG.info("Moving bucket " + bucketSrc.toUri().toString() + " to " +
bucketDest.toUri().toString());
- fs.rename(bucketSrc, bucketDest);
+ FileUtils.renameWithPermsNoCheck(fs, bucketSrc, bucketDest, inheritPerms, conf);
if (newFiles != null) newFiles.add(bucketDest);
}
} catch (IOException e) {
@@ -3459,7 +3469,9 @@ private void constructOneLBLocationMap(FileStatus fSta,
// first call FileUtils.mkdir to make sure that destf directory exists, if not, it creates
// destf with inherited permissions
- boolean destfExist = FileUtils.mkdir(destFs, destf, conf);
+ boolean inheritPerms = HiveConf.getBoolVar(conf, HiveConf.ConfVars
+ .HIVE_WAREHOUSE_SUBDIR_INHERIT_PERMS);
+ boolean destfExist = FileUtils.mkdir(destFs, destf, inheritPerms, conf);
if(!destfExist) {
throw new IOException("Directory " + destf.toString()
+ " does not exist and could not be created.");
@@ -3477,7 +3489,8 @@ private void constructOneLBLocationMap(FileStatus fSta,
}
} else { // its either a file or glob
for (FileStatus src : srcs) {
- if (!moveFile(conf, src.getPath(), new Path(destf, src.getPath().getName()), true, isSrcLocal)) {
+ Path destFile = new Path(destf, src.getPath().getName());
+ if (!moveFile(conf, src.getPath(), destFile, true, isSrcLocal)) {
throw new IOException("Error moving: " + srcf + " into: " + destf);
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/0dddd7cf/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
index 5deec4b..a445c69 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/txn/compactor/CompactorMR.java
@@ -32,11 +32,14 @@ import org.apache.hadoop.fs.BlockLocation;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.common.JavaUtils;
import org.apache.hadoop.hive.common.StringableMap;
import org.apache.hadoop.hive.common.ValidCompactorTxnList;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.io.HdfsUtils;
import org.apache.hadoop.hive.metastore.api.CompactionType;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
@@ -861,9 +864,12 @@ public class CompactorMR {
FileStatus[] contents = fs.listStatus(tmpLocation);//expect 1 base or delta dir in this list
//we have MIN_TXN, MAX_TXN and IS_MAJOR in JobConf so we could figure out exactly what the dir
//name is that we want to rename; leave it for another day
+ boolean inheritPerms = FileUtils.shouldInheritPerms(conf, fs);
for (int i = 0; i < contents.length; i++) {
Path newPath = new Path(finalLocation, contents[i].getPath().getName());
- fs.rename(contents[i].getPath(), newPath);
+ if (fs.rename(contents[i].getPath(), newPath) && inheritPerms) {
+ HdfsUtils.setParentFileStatus(conf, fs, newPath, true);
+ }
}
fs.delete(tmpLocation, true);
}