You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2018/02/06 00:28:54 UTC
hive git commit: HIVE-18536 : IOW + DP is broken for insert-only ACID
(Sergey Shelukhin, reviewed by Eugene Koifman)
Repository: hive
Updated Branches:
refs/heads/master b9e46bc4a -> 1bcc88f15
HIVE-18536 : IOW + DP is broken for insert-only ACID (Sergey Shelukhin, reviewed by Eugene Koifman)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/1bcc88f1
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/1bcc88f1
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/1bcc88f1
Branch: refs/heads/master
Commit: 1bcc88f15b20e2a808528d664d35c0d21d6c9388
Parents: b9e46bc
Author: sergey <se...@apache.org>
Authored: Mon Feb 5 16:16:54 2018 -0800
Committer: sergey <se...@apache.org>
Committed: Mon Feb 5 16:16:54 2018 -0800
----------------------------------------------------------------------
.../apache/hadoop/hive/common/JavaUtils.java | 35 ++++++++------------
.../apache/hadoop/hive/ql/exec/Utilities.java | 33 ++++++++++--------
.../apache/hadoop/hive/ql/metadata/Hive.java | 2 ++
3 files changed, 35 insertions(+), 35 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/1bcc88f1/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java b/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java
index 9f64b3d..57afbf8 100644
--- a/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java
+++ b/common/src/java/org/apache/hadoop/hive/common/JavaUtils.java
@@ -186,32 +186,24 @@ public final class JavaUtils {
}
public static class IdPathFilter implements PathFilter {
- private String mmDirName;
- private final boolean isMatch, isIgnoreTemp, isPrefix;
+ private String baseDirName, deltaDirName;
+ private final boolean isMatch, isIgnoreTemp, isDeltaPrefix;
public IdPathFilter(long writeId, int stmtId, boolean isMatch) {
- this(writeId, stmtId, isMatch, false, false);
+ this(writeId, stmtId, isMatch, false);
}
+
public IdPathFilter(long writeId, int stmtId, boolean isMatch, boolean isIgnoreTemp) {
- this(writeId, stmtId, isMatch, isIgnoreTemp, false);
- }
- public IdPathFilter(long writeId, int stmtId, boolean isMatch, boolean isIgnoreTemp, boolean isBaseDir) {
- String mmDirName = null;
- if (!isBaseDir) {
- mmDirName = DELTA_PREFIX + "_" + String.format(DELTA_DIGITS, writeId) + "_" +
- String.format(DELTA_DIGITS, writeId) + "_";
- if (stmtId >= 0) {
- mmDirName += String.format(STATEMENT_DIGITS, stmtId);
- isPrefix = false;
- } else {
- isPrefix = true;
- }
- } else {
- mmDirName = BASE_PREFIX + "_" + String.format(DELTA_DIGITS, writeId);
- isPrefix = false;
+ String deltaDirName = null;
+ deltaDirName = DELTA_PREFIX + "_" + String.format(DELTA_DIGITS, writeId) + "_" +
+ String.format(DELTA_DIGITS, writeId) + "_";
+ isDeltaPrefix = (stmtId < 0);
+ if (!isDeltaPrefix) {
+ deltaDirName += String.format(STATEMENT_DIGITS, stmtId);
}
- this.mmDirName = mmDirName;
+ this.baseDirName = BASE_PREFIX + "_" + String.format(DELTA_DIGITS, writeId);
+ this.deltaDirName = deltaDirName;
this.isMatch = isMatch;
this.isIgnoreTemp = isIgnoreTemp;
}
@@ -219,7 +211,8 @@ public final class JavaUtils {
@Override
public boolean accept(Path path) {
String name = path.getName();
- if ((isPrefix && name.startsWith(mmDirName)) || (!isPrefix && name.equals(mmDirName))) {
+ if (name.equals(baseDirName) || (isDeltaPrefix && name.startsWith(deltaDirName))
+ || (!isDeltaPrefix && name.equals(deltaDirName))) {
return isMatch;
}
if (isIgnoreTemp && name.length() > 0) {
http://git-wip-us.apache.org/repos/asf/hive/blob/1bcc88f1/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
index cf108e3..941dd58 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
@@ -4071,18 +4071,20 @@ public final class Utilities {
}
public static Path[] getMmDirectoryCandidates(FileSystem fs, Path path, int dpLevels,
- int lbLevels, PathFilter filter, long txnId, int stmtId, Configuration conf, boolean isBaseDir)
- throws IOException {
+ int lbLevels, PathFilter filter, long txnId, int stmtId, Configuration conf,
+ Boolean isBaseDir) throws IOException {
int skipLevels = dpLevels + lbLevels;
if (filter == null) {
- filter = new JavaUtils.IdPathFilter(txnId, stmtId, true, false, isBaseDir);
+ filter = new JavaUtils.IdPathFilter(txnId, stmtId, true, false);
}
if (skipLevels == 0) {
return statusToPath(fs.listStatus(path, filter));
}
// TODO: for some reason, globStatus doesn't work for masks like "...blah/*/delta_0000007_0000007*"
// the last star throws it off. So, for now, if stmtId is missing use recursion.
- if (stmtId < 0
+ // For the same reason, we cannot use it if we don't know isBaseDir. Currently, we don't
+ // /want/ to know isBaseDir because that is error prone; so, it ends up never being used.
+ if (stmtId < 0 || isBaseDir == null
|| (HiveConf.getBoolVar(conf, ConfVars.HIVE_MM_AVOID_GLOBSTATUS_ON_S3) && isS3(fs))) {
return getMmDirectoryCandidatesRecursive(fs, path, skipLevels, filter);
}
@@ -4162,8 +4164,8 @@ public final class Utilities {
return results.toArray(new Path[results.size()]);
}
- private static Path[] getMmDirectoryCandidatesGlobStatus(FileSystem fs,
- Path path, int skipLevels, PathFilter filter, long txnId, int stmtId, boolean isBaseDir) throws IOException {
+ private static Path[] getMmDirectoryCandidatesGlobStatus(FileSystem fs, Path path, int skipLevels,
+ PathFilter filter, long txnId, int stmtId, boolean isBaseDir) throws IOException {
StringBuilder sb = new StringBuilder(path.toUri().getPath());
for (int i = 0; i < skipLevels; i++) {
sb.append(Path.SEPARATOR).append('*');
@@ -4180,10 +4182,10 @@ public final class Utilities {
}
private static void tryDeleteAllMmFiles(FileSystem fs, Path specPath, Path manifestDir,
- int dpLevels, int lbLevels, JavaUtils.IdPathFilter filter,
- long txnId, int stmtId, Configuration conf, boolean isBaseDir) throws IOException {
+ int dpLevels, int lbLevels, JavaUtils.IdPathFilter filter, long txnId, int stmtId,
+ Configuration conf) throws IOException {
Path[] files = getMmDirectoryCandidates(
- fs, specPath, dpLevels, lbLevels, filter, txnId, stmtId, conf, isBaseDir);
+ fs, specPath, dpLevels, lbLevels, filter, txnId, stmtId, conf, null);
if (files != null) {
for (Path path : files) {
Utilities.FILE_OP_LOGGER.info("Deleting {} on failure", path);
@@ -4220,8 +4222,10 @@ public final class Utilities {
}
}
- private static Path getManifestDir(Path specPath, long txnId, int stmtId, String unionSuffix, boolean isInsertOverwrite) {
- Path manifestPath = new Path(specPath, "_tmp." +
+ // TODO: we should get rid of isInsertOverwrite here too.
+ private static Path getManifestDir(
+ Path specPath, long txnId, int stmtId, String unionSuffix, boolean isInsertOverwrite) {
+ Path manifestPath = new Path(specPath, "_tmp." +
AcidUtils.baseOrDeltaSubdir(isInsertOverwrite, txnId, txnId, stmtId));
return (unionSuffix == null) ? manifestPath : new Path(manifestPath, unionSuffix);
@@ -4240,13 +4244,14 @@ public final class Utilities {
public static void handleMmTableFinalPath(Path specPath, String unionSuffix, Configuration hconf,
boolean success, int dpLevels, int lbLevels, MissingBucketsContext mbc, long txnId, int stmtId,
- Reporter reporter, boolean isMmTable, boolean isMmCtas, boolean isInsertOverwrite) throws IOException, HiveException {
+ Reporter reporter, boolean isMmTable, boolean isMmCtas, boolean isInsertOverwrite)
+ throws IOException, HiveException {
FileSystem fs = specPath.getFileSystem(hconf);
Path manifestDir = getManifestDir(specPath, txnId, stmtId, unionSuffix, isInsertOverwrite);
if (!success) {
JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(txnId, stmtId, true);
tryDeleteAllMmFiles(fs, specPath, manifestDir, dpLevels, lbLevels,
- filter, txnId, stmtId, hconf, isInsertOverwrite);
+ filter, txnId, stmtId, hconf);
return;
}
@@ -4269,7 +4274,7 @@ public final class Utilities {
}
Utilities.FILE_OP_LOGGER.debug("Looking for files in: {}", specPath);
- JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(txnId, stmtId, true, false, isInsertOverwrite);
+ JavaUtils.IdPathFilter filter = new JavaUtils.IdPathFilter(txnId, stmtId, true, false);
if (isMmCtas && !fs.exists(specPath)) {
Utilities.FILE_OP_LOGGER.info("Creating table directory for CTAS with no output at {}", specPath);
FileUtils.mkdir(fs, specPath, hconf);
http://git-wip-us.apache.org/repos/asf/hive/blob/1bcc88f1/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
index b1e05df..07999e2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
@@ -2190,6 +2190,8 @@ private void constructOneLBLocationMap(FileStatus fSta,
// Note: we ignore the statement ID here, because it's currently irrelevant for MoveTask
// where this is used; we always want to load everything; also the only case where
// we have multiple statements anyway is union.
+ Utilities.FILE_OP_LOGGER.trace(
+ "Looking for dynamic partitions in {} ({} levels)", loadPath, numDP);
Path[] leafStatus = Utilities.getMmDirectoryCandidates(
fs, loadPath, numDP, numLB, null, txnId, -1, conf, isInsertOverwrite);
for (Path p : leafStatus) {