You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by go...@apache.org on 2018/10/03 20:12:03 UTC
hive git commit: HIVE-20338: LLAP: Force synthetic file-id for
filesystems which have HDFS protocol impls with POSIX mutation semantics
(Gopal V, reviewed by Sergey Shelukhin)
Repository: hive
Updated Branches:
refs/heads/master d9d431cfe -> 240bfb464
HIVE-20338: LLAP: Force synthetic file-id for filesystems which have HDFS protocol impls with POSIX mutation semantics (Gopal V, reviewed by Sergey Shelukhin)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/240bfb46
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/240bfb46
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/240bfb46
Branch: refs/heads/master
Commit: 240bfb464631ddb521406eac2348968bbee16c51
Parents: d9d431c
Author: Gopal V <go...@apache.org>
Authored: Wed Oct 3 12:22:10 2018 -0700
Committer: Gopal V <go...@apache.org>
Committed: Wed Oct 3 13:11:53 2018 -0700
----------------------------------------------------------------------
common/src/java/org/apache/hadoop/hive/conf/HiveConf.java | 3 ++-
.../hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java | 8 +++++---
.../hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java | 7 ++++---
ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java | 4 ++--
.../org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java | 7 +++++--
.../ql/io/parquet/vector/VectorizedParquetRecordReader.java | 3 ++-
6 files changed, 20 insertions(+), 12 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/240bfb46/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index a49f8af..531fabd 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -3942,7 +3942,8 @@ public class HiveConf extends Configuration {
"is unneeded. This is only necessary for ORC files written before HIVE-9660."),
LLAP_IO_USE_FILEID_PATH("hive.llap.io.use.fileid.path", true,
"Whether LLAP should use fileId (inode)-based path to ensure better consistency for the\n" +
- "cases of file overwrites. This is supported on HDFS."),
+ "cases of file overwrites. This is supported on HDFS. Disabling this also turns off any\n" +
+ "cache consistency checks based on fileid comparisons."),
// Restricted to text for now as this is a new feature; only text files can be sliced.
LLAP_IO_ENCODE_ENABLED("hive.llap.io.encode.enabled", true,
"Whether LLAP should try to re-encode and cache data for non-ORC formats. This is used\n" +
http://git-wip-us.apache.org/repos/asf/hive/blob/240bfb46/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
index e8a3b40..4f5b0a9 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/OrcEncodedDataReader.java
@@ -214,7 +214,9 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
fs = split.getPath().getFileSystem(jobConf);
fileKey = determineFileId(fs, split,
HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID),
- HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID));
+ HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID),
+ !HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_USE_FILEID_PATH)
+ );
fileMetadata = getFileFooterFromCacheOrDisk();
final TypeDescription fileSchema = fileMetadata.getSchema();
@@ -464,7 +466,7 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
}
private static Object determineFileId(FileSystem fs, FileSplit split,
- boolean allowSynthetic, boolean checkDefaultFs) throws IOException {
+ boolean allowSynthetic, boolean checkDefaultFs, boolean forceSynthetic) throws IOException {
if (split instanceof OrcSplit) {
Object fileKey = ((OrcSplit)split).getFileKey();
if (fileKey != null) {
@@ -472,7 +474,7 @@ public class OrcEncodedDataReader extends CallableWithNdc<Void>
}
}
LOG.warn("Split for " + split.getPath() + " (" + split.getClass() + ") does not have file ID");
- return HdfsUtils.getFileId(fs, split.getPath(), allowSynthetic, checkDefaultFs);
+ return HdfsUtils.getFileId(fs, split.getPath(), allowSynthetic, checkDefaultFs, forceSynthetic);
}
/**
http://git-wip-us.apache.org/repos/asf/hive/blob/240bfb46/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
----------------------------------------------------------------------
diff --git a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
index 2576175..658bc7d 100644
--- a/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
+++ b/llap-server/src/java/org/apache/hadoop/hive/llap/io/encoded/SerDeEncodedDataReader.java
@@ -216,7 +216,8 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
fs = split.getPath().getFileSystem(daemonConf);
fileKey = determineFileId(fs, split,
HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID),
- HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID));
+ HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID),
+ !HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_IO_USE_FILEID_PATH));
cacheTag = HiveConf.getBoolVar(daemonConf, ConfVars.LLAP_TRACK_CACHE_USAGE)
? LlapUtil.getDbAndTableNameForMetrics(split.getPath(), true) : null;
this.sourceInputFormat = sourceInputFormat;
@@ -1698,12 +1699,12 @@ public class SerDeEncodedDataReader extends CallableWithNdc<Void>
}
private static Object determineFileId(FileSystem fs, FileSplit split,
- boolean allowSynthetic, boolean checkDefaultFs) throws IOException {
+ boolean allowSynthetic, boolean checkDefaultFs, boolean forceSynthetic) throws IOException {
/* TODO: support this optionally? this is not OrcSplit, but we could add a custom split.
Object fileKey = ((OrcSplit)split).getFileKey();
if (fileKey != null) return fileKey; */
LlapIoImpl.LOG.warn("Split for " + split.getPath() + " (" + split.getClass() + ") does not have file ID");
- return HdfsUtils.getFileId(fs, split.getPath(), allowSynthetic, checkDefaultFs);
+ return HdfsUtils.getFileId(fs, split.getPath(), allowSynthetic, checkDefaultFs, forceSynthetic);
}
@Override
http://git-wip-us.apache.org/repos/asf/hive/blob/240bfb46/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
index 1158b52..3482cfc 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/HdfsUtils.java
@@ -45,8 +45,8 @@ public class HdfsUtils {
private static final Logger LOG = LoggerFactory.getLogger(HdfsUtils.class);
public static Object getFileId(FileSystem fileSystem, Path path,
- boolean allowSynthetic, boolean checkDefaultFs) throws IOException {
- if (fileSystem instanceof DistributedFileSystem) {
+ boolean allowSynthetic, boolean checkDefaultFs, boolean forceSyntheticIds) throws IOException {
+ if (forceSyntheticIds == false && fileSystem instanceof DistributedFileSystem) {
DistributedFileSystem dfs = (DistributedFileSystem) fileSystem;
if ((!checkDefaultFs) || isDefaultFs(dfs)) {
Object result = SHIMS.getFileId(dfs, path.toUri().getPath());
http://git-wip-us.apache.org/repos/asf/hive/blob/240bfb46/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
index 728bf50..25b2d48 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcInputFormat.java
@@ -2286,8 +2286,11 @@ public class OrcInputFormat implements InputFormat<NullWritable, OrcStruct>,
boolean checkDefaultFs = HiveConf.getBoolVar(
context.conf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID);
- boolean isDefaultFs = (!checkDefaultFs) || ((fs instanceof DistributedFileSystem)
- && HdfsUtils.isDefaultFs((DistributedFileSystem) fs));
+ boolean forceSynthetic =
+ !HiveConf.getBoolVar(context.conf, ConfVars.LLAP_IO_USE_FILEID_PATH);
+ // if forceSynthetic == true, then assume it is not a defaultFS
+ boolean isDefaultFs = (forceSynthetic == false) && ((!checkDefaultFs) || ((fs instanceof DistributedFileSystem)
+ && HdfsUtils.isDefaultFs((DistributedFileSystem) fs)));
if (baseFiles.isEmpty()) {
assert false : "acid 2.0 no base?!: " + dir;
http://git-wip-us.apache.org/repos/asf/hive/blob/240bfb46/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
index 8c49056..fd776cf 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java
@@ -193,7 +193,8 @@ public class VectorizedParquetRecordReader extends ParquetRecordReaderBase
if (metadataCache != null) {
cacheKey = HdfsUtils.getFileId(file.getFileSystem(configuration), file,
HiveConf.getBoolVar(cacheConf, ConfVars.LLAP_CACHE_ALLOW_SYNTHETIC_FILEID),
- HiveConf.getBoolVar(cacheConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID));
+ HiveConf.getBoolVar(cacheConf, ConfVars.LLAP_CACHE_DEFAULT_FS_FILE_ID),
+ !HiveConf.getBoolVar(cacheConf, ConfVars.LLAP_IO_USE_FILEID_PATH));
}
if (cacheKey != null) {
if (HiveConf.getBoolVar(cacheConf, ConfVars.LLAP_TRACK_CACHE_USAGE)) {