You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ma...@apache.org on 2020/02/18 03:16:24 UTC
[hive] branch master updated: HIVE-22844 : Validate cm configs,
add retries in fs apis for cm. (Aasha Medhi,
reviewed by Mahesh Kumar Behera)
This is an automated email from the ASF dual-hosted git repository.
mahesh pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 21f801d HIVE-22844 : Validate cm configs, add retries in fs apis for cm. (Aasha Medhi, reviewed by Mahesh Kumar Behera)
21f801d is described below
commit 21f801dc82b93eb9c6ee449be15a003cd228e02d
Author: Aasha Medhi <aa...@gmail.com>
AuthorDate: Tue Feb 18 08:45:09 2020 +0530
HIVE-22844 : Validate cm configs, add retries in fs apis for cm. (Aasha Medhi, reviewed by Mahesh Kumar Behera)
Signed-off-by: Mahesh Kumar Behera <ma...@apache.org>
---
.../java/org/apache/hadoop/hive/conf/HiveConf.java | 2 +-
.../MetastoreHousekeepingLeaderTestBase.java | 1 +
.../TestMetaStoreMultipleEncryptionZones.java | 83 +++++++++++++++++++++-
.../hadoop/hive/metastore/ReplChangeManager.java | 65 +++++++++++------
.../hadoop/hive/metastore/conf/MetastoreConf.java | 2 +-
.../hadoop/hive/metastore/HiveMetaStore.java | 77 +++++++++-----------
6 files changed, 165 insertions(+), 65 deletions(-)
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index 2f695d4..d3cb60b 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -455,7 +455,7 @@ public class HiveConf extends Configuration {
REPLCMENCRYPTEDDIR("hive.repl.cm.encryptionzone.rootdir", ".cmroot",
"Root dir for ChangeManager if encryption zones are enabled, used for deleted files."),
REPLCMFALLBACKNONENCRYPTEDDIR("hive.repl.cm.nonencryptionzone.rootdir",
- "/user/${system:user.name}/cmroot/",
+ "",
"Root dir for ChangeManager for non encrypted paths if hive.repl.cmrootdir is encrypted."),
REPLCMINTERVAL("hive.repl.cm.interval","3600s",
new TimeValidator(TimeUnit.SECONDS),
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/MetastoreHousekeepingLeaderTestBase.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/MetastoreHousekeepingLeaderTestBase.java
index d89d67c..7ba0d3e 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/MetastoreHousekeepingLeaderTestBase.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/MetastoreHousekeepingLeaderTestBase.java
@@ -109,6 +109,7 @@ class MetastoreHousekeepingLeaderTestBase {
MetastoreConf.setBoolVar(conf, ConfVars.REPLCMENABLED, true);
String cmroot = "hdfs://" + miniDFS.getNameNode().getHostAndPort() + "/cmroot";
MetastoreConf.setVar(conf, ConfVars.REPLCMDIR, cmroot);
+ MetastoreConf.setVar(conf, ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR, cmroot);
threadNames.put(ReplChangeManager.CM_THREAD_NAME_PREFIX, false);
}
diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java
index 51bb787..41a1ce9 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestMetaStoreMultipleEncryptionZones.java
@@ -1364,8 +1364,10 @@ public class TestMetaStoreMultipleEncryptionZones {
"hdfs://" + miniDFSCluster.getNameNode().getHostAndPort()
+ HiveConf.ConfVars.METASTOREWAREHOUSE.defaultStrVal);
- String cmrootdirEncrypted = "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort() + "/cmroot";
+ String cmrootdirEncrypted = "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort() + "/cmrootDirEncrypted";
encryptedHiveConf.set(HiveConf.ConfVars.REPLCMDIR.varname, cmrootdirEncrypted);
+ FileSystem cmrootdirEncryptedFs = new Path(cmrootdirEncrypted).getFileSystem(hiveConf);
+ cmrootdirEncryptedFs.mkdirs(new Path(cmrootdirEncrypted));
encryptedHiveConf.set(HiveConf.ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR.varname, cmrootFallBack);
//Create cm in encrypted zone
@@ -1410,10 +1412,89 @@ public class TestMetaStoreMultipleEncryptionZones {
exceptionThrown = true;
}
assertFalse(exceptionThrown);
+ cmrootdirEncryptedFs.delete(new Path(cmrootdirEncrypted), true);
ReplChangeManager.resetReplChangeManagerInstance();
initReplChangeManager();
}
+ @Test
+ public void testCmrootFallbackEncrypted() throws Exception {
+ HiveConf encryptedHiveConf = new HiveConf(TestReplChangeManager.class);
+ encryptedHiveConf.setBoolean(HiveConf.ConfVars.REPLCMENABLED.varname, true);
+ encryptedHiveConf.setInt(CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY, 60);
+ encryptedHiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname,
+ "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort()
+ + HiveConf.ConfVars.METASTOREWAREHOUSE.defaultStrVal);
+ String cmrootdirEncrypted = "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort() + "/cmrootIsEncrypted";
+ String cmRootFallbackEncrypted = "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort()
+ + "/cmrootFallbackEncrypted";
+ FileSystem cmrootdirEncryptedFs = new Path(cmrootdirEncrypted).getFileSystem(encryptedHiveConf);
+ try {
+ cmrootdirEncryptedFs.mkdirs(new Path(cmrootdirEncrypted));
+ cmrootdirEncryptedFs.mkdirs(new Path(cmRootFallbackEncrypted));
+ encryptedHiveConf.set(HiveConf.ConfVars.REPLCMDIR.varname, cmrootdirEncrypted);
+ encryptedHiveConf.set(HiveConf.ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR.varname, cmRootFallbackEncrypted);
+
+ //Create cm in encrypted zone
+ HadoopShims.HdfsEncryptionShim shimCmEncrypted = ShimLoader.getHadoopShims().createHdfsEncryptionShim(fs, conf);
+ shimCmEncrypted.createEncryptionZone(new Path(cmrootdirEncrypted), "test_key_db");
+ shimCmEncrypted.createEncryptionZone(new Path(cmRootFallbackEncrypted), "test_key_db");
+ ReplChangeManager.resetReplChangeManagerInstance();
+ boolean exceptionThrown = false;
+ try {
+ new Warehouse(encryptedHiveConf);
+ } catch (MetaException e) {
+ exceptionThrown = true;
+ assertTrue(e.getMessage().contains("should not be encrypted"));
+ }
+ assertTrue(exceptionThrown);
+ } finally {
+ cmrootdirEncryptedFs.delete(new Path(cmrootdirEncrypted), true);
+ cmrootdirEncryptedFs.delete(new Path(cmRootFallbackEncrypted), true);
+ ReplChangeManager.resetReplChangeManagerInstance();
+ initReplChangeManager();
+ }
+ }
+
+ @Test
+ public void testCmrootFallbackRelative() throws Exception {
+ HiveConf encryptedHiveConf = new HiveConf(TestReplChangeManager.class);
+ encryptedHiveConf.setBoolean(HiveConf.ConfVars.REPLCMENABLED.varname, true);
+ encryptedHiveConf.setInt(CommonConfigurationKeysPublic.FS_TRASH_INTERVAL_KEY, 60);
+ encryptedHiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname,
+ "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort()
+ + HiveConf.ConfVars.METASTOREWAREHOUSE.defaultStrVal);
+ String cmrootdirEncrypted = "hdfs://" + miniDFSCluster.getNameNode().getHostAndPort() + "/cmrootIsEncrypted";
+ String cmRootFallbackEncrypted = "cmrootFallbackEncrypted";
+ FileSystem cmrootdirEncryptedFs = new Path(cmrootdirEncrypted).getFileSystem(encryptedHiveConf);
+ try {
+ cmrootdirEncryptedFs.mkdirs(new Path(cmrootdirEncrypted));
+ cmrootdirEncryptedFs.mkdirs(new Path(cmRootFallbackEncrypted));
+ encryptedHiveConf.set(HiveConf.ConfVars.REPLCMDIR.varname, cmrootdirEncrypted);
+ encryptedHiveConf.set(HiveConf.ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR.varname, cmRootFallbackEncrypted);
+
+ //Create cm in encrypted zone
+ HadoopShims.HdfsEncryptionShim shimCmEncrypted = ShimLoader.getHadoopShims().createHdfsEncryptionShim(fs, conf);
+ shimCmEncrypted.createEncryptionZone(new Path(cmrootdirEncrypted), "test_key_db");
+
+ ReplChangeManager.resetReplChangeManagerInstance();
+ boolean exceptionThrown = false;
+ try {
+ new Warehouse(encryptedHiveConf);
+ } catch (MetaException e) {
+ exceptionThrown = true;
+ assertTrue(e.getMessage().contains("should be absolute"));
+ }
+ assertTrue(exceptionThrown);
+ } finally {
+ cmrootdirEncryptedFs.delete(new Path(cmrootdirEncrypted), true);
+ cmrootdirEncryptedFs.delete(new Path(cmRootFallbackEncrypted), true);
+ ReplChangeManager.resetReplChangeManagerInstance();
+ initReplChangeManager();
+ }
+ }
+
+
private void createFile(Path path, String content) throws IOException {
FSDataOutputStream output = path.getFileSystem(hiveConf).create(path);
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java
index 1041d92..8e1bb4e 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/ReplChangeManager.java
@@ -59,7 +59,7 @@ public class ReplChangeManager {
private static boolean inited = false;
private static boolean enabled = false;
- private static Map<String, String> encryptionZones = new HashMap<>();
+ private static Map<String, String> encryptionZoneToCmrootMapping = new HashMap<>();
private static HadoopShims hadoopShims = ShimLoader.getHadoopShims();
private static Configuration conf;
private String msUser;
@@ -156,23 +156,35 @@ public class ReplChangeManager {
cmRootDir = MetastoreConf.getVar(conf, ConfVars.REPLCMDIR);
encryptedCmRootDir = MetastoreConf.getVar(conf, ConfVars.REPLCMENCRYPTEDDIR);
fallbackNonEncryptedCmRootDir = MetastoreConf.getVar(conf, ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR);
+ //validate cmRootEncrypted is absolute
+ Path cmRootEncrypted = new Path(encryptedCmRootDir);
+ if (cmRootEncrypted.isAbsolute()) {
+ throw new MetaException(ConfVars.REPLCMENCRYPTEDDIR.getHiveName() + " should be a relative path");
+ }
//Create default cm root
Path cmroot = new Path(cmRootDir);
createCmRoot(cmroot);
FileSystem cmRootFs = cmroot.getFileSystem(conf);
HdfsEncryptionShim pathEncryptionShim = hadoopShims
.createHdfsEncryptionShim(cmRootFs, conf);
- Path cmRootEncrypted = new Path(encryptedCmRootDir);
- if (cmRootEncrypted.isAbsolute()) {
- throw new MetaException(ConfVars.REPLCMENCRYPTEDDIR.getHiveName() + " should be a relative path");
- }
if (pathEncryptionShim.isPathEncrypted(cmroot)) {
//If cm root is encrypted we keep using it for the encryption zone
String encryptionZonePath = cmRootFs.getUri()
+ pathEncryptionShim.getEncryptionZoneForPath(cmroot).getPath();
- encryptionZones.put(encryptionZonePath, cmRootDir);
+ encryptionZoneToCmrootMapping.put(encryptionZonePath, cmRootDir);
} else {
- encryptionZones.put(NO_ENCRYPTION, cmRootDir);
+ encryptionZoneToCmrootMapping.put(NO_ENCRYPTION, cmRootDir);
+ }
+ if (!StringUtils.isEmpty(fallbackNonEncryptedCmRootDir)) {
+ Path cmRootFallback = new Path(fallbackNonEncryptedCmRootDir);
+ if (!cmRootFallback.isAbsolute()) {
+ throw new MetaException(ConfVars.REPLCMENCRYPTEDDIR.getHiveName() + " should be absolute path");
+ }
+ createCmRoot(cmRootFallback);
+ if (pathEncryptionShim.isPathEncrypted(cmRootFallback)) {
+ throw new MetaException(ConfVars.REPLCMFALLBACKNONENCRYPTEDDIR.getHiveName()
+ + " should not be encrypted");
+ }
}
UserGroupInformation usergroupInfo = UserGroupInformation.getCurrentUser();
msUser = usergroupInfo.getShortUserName();
@@ -500,7 +512,7 @@ public class ReplChangeManager {
.namingPattern(CM_THREAD_NAME_PREFIX + "%d")
.daemon(true)
.build());
- executor.scheduleAtFixedRate(new CMClearer(encryptionZones,
+ executor.scheduleAtFixedRate(new CMClearer(encryptionZoneToCmrootMapping,
MetastoreConf.getTimeVar(conf, ConfVars.REPLCMRETIAN, TimeUnit.SECONDS), conf),
0, MetastoreConf.getTimeVar(conf, ConfVars.REPLCMINTERVAL, TimeUnit.SECONDS), TimeUnit.SECONDS);
}
@@ -553,14 +565,14 @@ public class ReplChangeManager {
//at the root of the encryption zone
cmrootDir = encryptionZonePath + Path.SEPARATOR + encryptedCmRootDir;
}
- if (encryptionZones.containsKey(encryptionZonePath)) {
- cmroot = new Path(encryptionZones.get(encryptionZonePath));
+ if (encryptionZoneToCmrootMapping.containsKey(encryptionZonePath)) {
+ cmroot = new Path(encryptionZoneToCmrootMapping.get(encryptionZonePath));
} else {
cmroot = new Path(cmrootDir);
synchronized (instance) {
- if (!encryptionZones.containsKey(encryptionZonePath)) {
+ if (!encryptionZoneToCmrootMapping.containsKey(encryptionZonePath)) {
createCmRoot(cmroot);
- encryptionZones.put(encryptionZonePath, cmrootDir);
+ encryptionZoneToCmrootMapping.put(encryptionZonePath, cmrootDir);
}
}
}
@@ -569,11 +581,22 @@ public class ReplChangeManager {
}
private static void createCmRoot(Path cmroot) throws IOException {
- FileSystem cmFs = cmroot.getFileSystem(conf);
- // Create cmroot with permission 700 if not exist
- if (!cmFs.exists(cmroot)) {
- cmFs.mkdirs(cmroot);
- cmFs.setPermission(cmroot, new FsPermission("700"));
+ Retry<Void> retriable = new Retry<Void>(IOException.class) {
+ @Override
+ public Void execute() throws IOException {
+ FileSystem cmFs = cmroot.getFileSystem(conf);
+ // Create cmroot with permission 700 if not exist
+ if (!cmFs.exists(cmroot)) {
+ cmFs.mkdirs(cmroot);
+ cmFs.setPermission(cmroot, new FsPermission("700"));
+ }
+ return null;
+ }
+ };
+ try {
+ retriable.run();
+ } catch (Exception e) {
+ throw new IOException(org.apache.hadoop.util.StringUtils.stringifyException(e));
}
}
@@ -582,7 +605,7 @@ public class ReplChangeManager {
inited = false;
enabled = false;
instance = null;
- encryptionZones.clear();
+ encryptionZoneToCmrootMapping.clear();
}
public static final PathFilter CMROOT_PATH_FILTER = new PathFilter() {
@@ -590,8 +613,10 @@ public class ReplChangeManager {
public boolean accept(Path p) {
if (enabled) {
String name = p.getName();
- return !name.contains(cmRootDir) && !name.contains(encryptedCmRootDir)
- && !name.contains(fallbackNonEncryptedCmRootDir);
+ return StringUtils.isEmpty(fallbackNonEncryptedCmRootDir)
+ ? (!name.contains(cmRootDir) && !name.contains(encryptedCmRootDir))
+ : (!name.contains(cmRootDir) && !name.contains(encryptedCmRootDir)
+ && !name.contains(fallbackNonEncryptedCmRootDir));
}
return true;
}
diff --git a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
index 2aeb374..58b67e8 100644
--- a/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
+++ b/standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java
@@ -920,7 +920,7 @@ public class MetastoreConf {
REPLCMENCRYPTEDDIR("metastore.repl.cm.encryptionzone.rootdir", "hive.repl.cm.encryptionzone.rootdir", ".cmroot",
"Root dir for ChangeManager if encryption zones are enabled, used for deleted files."),
REPLCMFALLBACKNONENCRYPTEDDIR("metastore.repl.cm.nonencryptionzone.rootdir",
- "hive.repl.cm.nonencryptionzone.rootdir", "/user/${system:user.name}/cmroot/",
+ "hive.repl.cm.nonencryptionzone.rootdir", "",
"Root dir for ChangeManager for non encrypted paths if hive.repl.cmrootdir is encrypted."),
REPLCMRETIAN("metastore.repl.cm.retain", "hive.repl.cm.retain", 24, TimeUnit.HOURS,
"Time to retain removed files in cmrootdir."),
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index b6de146..c88c889 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -2842,27 +2842,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
*/
private void deleteTableData(Path tablePath, boolean ifPurge, boolean shouldEnableCm) {
if (tablePath != null) {
- try {
- if (shouldEnableCm) {
- //Don't delete cmdir if its inside the table path
- FileStatus[] statuses = tablePath.getFileSystem(conf).listStatus(tablePath,
- ReplChangeManager.CMROOT_PATH_FILTER);
- for (final FileStatus status : statuses) {
- wh.deleteDir(status.getPath(), true, ifPurge, shouldEnableCm);
- }
- //Check if table directory is empty, delete it
- FileStatus[] statusWithoutFilter = tablePath.getFileSystem(conf).listStatus(tablePath);
- if (statusWithoutFilter.length == 0) {
- wh.deleteDir(tablePath, true, ifPurge, shouldEnableCm);
- }
- } else {
- //If no cm delete the complete table directory
- wh.deleteDir(tablePath, true, ifPurge, shouldEnableCm);
- }
- } catch (Exception e) {
- LOG.error("Failed to delete table directory: " + tablePath +
- " " + e.getMessage());
- }
+ deleteDataExcludeCmroot(tablePath, ifPurge, shouldEnableCm);
}
}
@@ -2897,27 +2877,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
private void deletePartitionData(List<Path> partPaths, boolean ifPurge, boolean shouldEnableCm) {
if (partPaths != null && !partPaths.isEmpty()) {
for (Path partPath : partPaths) {
- try {
- if (shouldEnableCm) {
- //Don't delete cmdir if its inside the partition path
- FileStatus[] statuses = partPath.getFileSystem(conf).listStatus(partPath,
- ReplChangeManager.CMROOT_PATH_FILTER);
- for (final FileStatus status : statuses) {
- wh.deleteDir(status.getPath(), true, ifPurge, shouldEnableCm);
- }
- //Check if table directory is empty, delete it
- FileStatus[] statusWithoutFilter = partPath.getFileSystem(conf).listStatus(partPath);
- if (statusWithoutFilter.length == 0) {
- wh.deleteDir(partPath, true, ifPurge, shouldEnableCm);
- }
- } else {
- //If no cm delete the complete table directory
- wh.deleteDir(partPath, true, ifPurge, shouldEnableCm);
- }
- } catch (Exception e) {
- LOG.error("Failed to delete partition directory: " + partPath +
- " " + e.getMessage());
- }
+ deleteDataExcludeCmroot(partPath, ifPurge, shouldEnableCm);
}
}
}
@@ -2945,6 +2905,39 @@ public class HiveMetaStore extends ThriftHiveMetastore {
}
/**
+ * Delete data from path excluding cmdir
+ * and for each that fails logs an error.
+ *
+ * @param path
+ * @param ifPurge completely purge the partition (skipping trash) while
+ * removing data from warehouse
+ * @param shouldEnableCm If cm should be enabled
+ */
+ private void deleteDataExcludeCmroot(Path path, boolean ifPurge, boolean shouldEnableCm) {
+ try {
+ if (shouldEnableCm) {
+ //Don't delete cmdir if its inside the partition path
+ FileStatus[] statuses = path.getFileSystem(conf).listStatus(path,
+ ReplChangeManager.CMROOT_PATH_FILTER);
+ for (final FileStatus status : statuses) {
+ wh.deleteDir(status.getPath(), true, ifPurge, shouldEnableCm);
+ }
+ //Check if table directory is empty, delete it
+ FileStatus[] statusWithoutFilter = path.getFileSystem(conf).listStatus(path);
+ if (statusWithoutFilter.length == 0) {
+ wh.deleteDir(path, true, ifPurge, shouldEnableCm);
+ }
+ } else {
+ //If no cm delete the complete table directory
+ wh.deleteDir(path, true, ifPurge, shouldEnableCm);
+ }
+ } catch (Exception e) {
+ LOG.error("Failed to delete directory: " + path +
+ " " + e.getMessage());
+ }
+ }
+
+ /**
* Deletes the partitions specified by catName, dbName, tableName. If checkLocation is true, for
* locations of partitions which may not be subdirectories of tablePath checks to make sure the
* locations are writable.