You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ct...@apache.org on 2015/12/04 16:47:42 UTC
hive git commit: HIVE-12505:Insert overwrite in same encrypted zone
silently fails to remove some existing files (Chaoyu Tang,
reviewed by Aihua Xu)
Repository: hive
Updated Branches:
refs/heads/master 22fc39796 -> 114eabcbe
HIVE-12505:Insert overwrite in same encrypted zone silently fails to remove some existing files (Chaoyu Tang, reviewed by Aihua Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/114eabcb
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/114eabcb
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/114eabcb
Branch: refs/heads/master
Commit: 114eabcbea24491607f4f933e5033a5986b3b2f4
Parents: 22fc397
Author: ctang <ct...@gmail.com>
Authored: Fri Dec 4 10:46:23 2015 -0500
Committer: ctang <ct...@gmail.com>
Committed: Fri Dec 4 10:46:23 2015 -0500
----------------------------------------------------------------------
.../apache/hadoop/hive/common/FileUtils.java | 66 ++++++++--
.../test/resources/testconfiguration.properties | 3 +-
.../apache/hadoop/hive/ql/metadata/Hive.java | 26 +++-
.../clientpositive/encryption_with_trash.q | 33 +++++
.../encrypted/encryption_with_trash.q.out | 122 +++++++++++++++++++
5 files changed, 234 insertions(+), 16 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/114eabcb/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
index f943781..5dd9f40 100644
--- a/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/FileUtils.java
@@ -574,7 +574,7 @@ public final class FileUtils {
}
/**
- * Deletes all files under a directory, sending them to the trash. Leaves the directory as is.
+ * Trashes or deletes all files under a directory. Leaves the directory as is.
* @param fs FileSystem to use
* @param f path of directory
* @param conf hive configuration
@@ -582,17 +582,34 @@ public final class FileUtils {
* @throws FileNotFoundException
* @throws IOException
*/
- public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf) throws FileNotFoundException, IOException {
+ public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf)
+ throws FileNotFoundException, IOException {
+ return trashFilesUnderDir(fs, f, conf, true);
+ }
+
+ /**
+ * Trashes or deletes all files under a directory. Leaves the directory as is.
+ * @param fs FileSystem to use
+ * @param f path of directory
+ * @param conf hive configuration
+ * @param forceDelete whether to force delete files if trashing does not succeed
+ * @return true if deletion successful
+ * @throws FileNotFoundException
+ * @throws IOException
+ */
+ public static boolean trashFilesUnderDir(FileSystem fs, Path f, Configuration conf,
+ boolean forceDelete) throws FileNotFoundException, IOException {
FileStatus[] statuses = fs.listStatus(f, HIDDEN_FILES_PATH_FILTER);
boolean result = true;
for (FileStatus status : statuses) {
- result = result & moveToTrash(fs, status.getPath(), conf);
+ result = result & moveToTrash(fs, status.getPath(), conf, forceDelete);
}
return result;
}
/**
- * Move a particular file or directory to the trash.
+ * Move a particular file or directory to the trash. If for a certain reason the trashing fails
+ * it will force deletes the file or directory
* @param fs FileSystem to use
* @param f path of file or directory to move to trash.
* @param conf
@@ -600,18 +617,47 @@ public final class FileUtils {
* @throws IOException
*/
public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf) throws IOException {
+ return moveToTrash(fs, f, conf, true);
+ }
+
+ /**
+ * Move a particular file or directory to the trash.
+ * @param fs FileSystem to use
+ * @param f path of file or directory to move to trash.
+ * @param conf
+ * @param forceDelete whether force delete the file or directory if trashing fails
+ * @return true if move successful
+ * @throws IOException
+ */
+ public static boolean moveToTrash(FileSystem fs, Path f, Configuration conf, boolean forceDelete)
+ throws IOException {
LOG.info("deleting " + f);
HadoopShims hadoopShim = ShimLoader.getHadoopShims();
- if (hadoopShim.moveToAppropriateTrash(fs, f, conf)) {
- LOG.info("Moved to trash: " + f);
- return true;
+ boolean result = false;
+ try {
+ result = hadoopShim.moveToAppropriateTrash(fs, f, conf);
+ if (result) {
+ LOG.info("Moved to trash: " + f);
+ return true;
+ }
+ } catch (IOException ioe) {
+ if (forceDelete) {
+ // for whatever failure reason including that trash has lower encryption zone
+ // retry with force delete
+ LOG.warn(ioe.getMessage() + "; Force to delete it.");
+ } else {
+ throw ioe;
+ }
}
- boolean result = fs.delete(f, true);
- if (!result) {
- LOG.error("Failed to delete " + f);
+ if (forceDelete) {
+ result = fs.delete(f, true);
+ if (!result) {
+ LOG.error("Failed to delete " + f);
+ }
}
+
return result;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/114eabcb/itests/src/test/resources/testconfiguration.properties
----------------------------------------------------------------------
diff --git a/itests/src/test/resources/testconfiguration.properties b/itests/src/test/resources/testconfiguration.properties
index 1031655..39ee9d3 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -471,7 +471,8 @@ encrypted.query.files=encryption_join_unencrypted_tbl.q,\
encryption_drop_table.q \
encryption_insert_values.q \
encryption_drop_view.q \
- encryption_drop_partition.q
+ encryption_drop_partition.q \
+ encryption_with_trash.q
beeline.positive.exclude=add_part_exist.q,\
alter1.q,\
http://git-wip-us.apache.org/repos/asf/hive/blob/114eabcb/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index 488d923..a4ada24 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2953,19 +2953,35 @@ private void constructOneLBLocationMap(FileStatus fSta,
List<List<Path[]>> result = checkPaths(conf, destFs, srcs, srcFs, destf, true);
if (oldPath != null) {
+ boolean oldPathDeleted = false;
+ boolean isOldPathUnderDestf = false;
try {
FileSystem fs2 = oldPath.getFileSystem(conf);
if (fs2.exists(oldPath)) {
// Do not delete oldPath if:
// - destf is subdir of oldPath
//if ( !(fs2.equals(destf.getFileSystem(conf)) && FileUtils.isSubDir(oldPath, destf, fs2)))
- if (FileUtils.isSubDir(oldPath, destf, fs2)) {
- FileUtils.trashFilesUnderDir(fs2, oldPath, conf);
+ isOldPathUnderDestf = FileUtils.isSubDir(oldPath, destf, fs2);
+ if (isOldPathUnderDestf) {
+ // if oldPath is destf or its subdir, its should definitely be deleted, otherwise its
+ // existing content might result in incorrect (extra) data.
+ // But not sure why we changed not to delete the oldPath in HIVE-8750 if it is
+ // not the destf or its subdir?
+ oldPathDeleted = FileUtils.trashFilesUnderDir(fs2, oldPath, conf);
}
}
- } catch (Exception e) {
- //swallow the exception
- LOG.warn("Directory " + oldPath.toString() + " cannot be removed: " + e, e);
+ } catch (IOException e) {
+ if (isOldPathUnderDestf) {
+ // if oldPath is a subdir of destf but it could not be cleaned
+ throw new HiveException("Directory " + oldPath.toString()
+ + " could not be cleaned up.", e);
+ } else {
+ //swallow the exception since it won't affect the final result
+ LOG.warn("Directory " + oldPath.toString() + " cannot be cleaned: " + e, e);
+ }
+ }
+ if (isOldPathUnderDestf && !oldPathDeleted) {
+ throw new HiveException("Destination directory " + destf + " has not be cleaned up.");
}
}
http://git-wip-us.apache.org/repos/asf/hive/blob/114eabcb/ql/src/test/queries/clientpositive/encryption_with_trash.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/encryption_with_trash.q b/ql/src/test/queries/clientpositive/encryption_with_trash.q
new file mode 100644
index 0000000..8f8789a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/encryption_with_trash.q
@@ -0,0 +1,33 @@
+set fs.trash.interval=5
+
+-- SORT_QUERY_RESULTS
+
+-- init
+drop table IF EXISTS encryptedTableSrc PURGE;
+drop table IF EXISTS unencryptedTable PURGE;
+
+create table encryptedTableSrc(key string, value string)
+LOCATION '${hiveconf:hive.metastore.warehouse.dir}/encryptedTableSrc';
+
+create table encryptedTable(key string, value string) partitioned by (ds string)
+ LOCATION '${hiveconf:hive.metastore.warehouse.dir}/encryptedTable';
+CRYPTO CREATE_KEY --keyName key_1 --bitLength 128;
+CRYPTO CREATE_ZONE --keyName key_1 --path ${hiveconf:hive.metastore.warehouse.dir}/encryptedTableSrc;
+CRYPTO CREATE_ZONE --keyName key_1 --path ${hiveconf:hive.metastore.warehouse.dir}/encryptedTable;
+
+-- insert src table from values
+insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502');
+
+insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc;
+select count(*) from encryptedTable where ds='today';
+insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc;
+select count(*) from encryptedTable where ds='today';
+
+insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc;
+select count(*) from encryptedTable where ds='today';
+
+-- clean up
+drop table encryptedTable PURGE;
+drop table unencryptedTable PURGE;
+CRYPTO DELETE_KEY --keyName key_1;
+set fs.trash.interval=0
http://git-wip-us.apache.org/repos/asf/hive/blob/114eabcb/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out b/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out
new file mode 100644
index 0000000..3d1f75f
--- /dev/null
+++ b/ql/src/test/results/clientpositive/encrypted/encryption_with_trash.q.out
@@ -0,0 +1,122 @@
+Warning: Value had a \n character in it.
+PREHOOK: query: drop table IF EXISTS unencryptedTable PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table IF EXISTS unencryptedTable PURGE
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: create table encryptedTableSrc(key string, value string)
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@encryptedTableSrc
+POSTHOOK: query: create table encryptedTableSrc(key string, value string)
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@encryptedTableSrc
+PREHOOK: query: create table encryptedTable(key string, value string) partitioned by (ds string)
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@encryptedTable
+POSTHOOK: query: create table encryptedTable(key string, value string) partitioned by (ds string)
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@encryptedTable
+Encryption key created: 'key_1'
+Encryption zone created: '/build/ql/test/data/warehouse/encryptedTableSrc' using key: 'key_1'
+Encryption zone created: '/build/ql/test/data/warehouse/encryptedTable' using key: 'key_1'
+PREHOOK: query: -- insert src table from values
+insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502')
+PREHOOK: type: QUERY
+PREHOOK: Input: default@values__tmp__table__1
+PREHOOK: Output: default@encryptedtablesrc
+POSTHOOK: query: -- insert src table from values
+insert into table encryptedTableSrc values ('501', 'val_501'), ('502', 'val_502')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@values__tmp__table__1
+POSTHOOK: Output: default@encryptedtablesrc
+POSTHOOK: Lineage: encryptedtablesrc.key SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col1, type:string, comment:), ]
+POSTHOOK: Lineage: encryptedtablesrc.value SIMPLE [(values__tmp__table__1)values__tmp__table__1.FieldSchema(name:tmp_values_col2, type:string, comment:), ]
+PREHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtablesrc
+PREHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtablesrc
+POSTHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from encryptedTable where ds='today'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+POSTHOOK: query: select count(*) from encryptedTable where ds='today'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+2
+PREHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtablesrc
+PREHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: query: insert into table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtablesrc
+POSTHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from encryptedTable where ds='today'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+POSTHOOK: query: select count(*) from encryptedTable where ds='today'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+4
+PREHOOK: query: insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtablesrc
+PREHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: query: insert overwrite table encryptedTable partition (ds='today') select key, value from encryptedTableSrc
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtablesrc
+POSTHOOK: Output: default@encryptedtable@ds=today
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).key SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:key, type:string, comment:null), ]
+POSTHOOK: Lineage: encryptedtable PARTITION(ds=today).value SIMPLE [(encryptedtablesrc)encryptedtablesrc.FieldSchema(name:value, type:string, comment:null), ]
+PREHOOK: query: select count(*) from encryptedTable where ds='today'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+POSTHOOK: query: select count(*) from encryptedTable where ds='today'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Input: default@encryptedtable@ds=today
+#### A PARTIAL masked pattern was here #### data/warehouse/encryptedTable/.hive-staging
+2
+PREHOOK: query: -- clean up
+drop table encryptedTable PURGE
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@encryptedtable
+PREHOOK: Output: default@encryptedtable
+POSTHOOK: query: -- clean up
+drop table encryptedTable PURGE
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@encryptedtable
+POSTHOOK: Output: default@encryptedtable
+PREHOOK: query: drop table unencryptedTable PURGE
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: drop table unencryptedTable PURGE
+POSTHOOK: type: DROPTABLE
+Encryption key deleted: 'key_1'