You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2018/05/23 17:15:18 UTC
[2/3] impala git commit: IMPALA-6813: Hedged reads metrics broken
when scanning non-HDFS based table
IMPALA-6813: Hedged reads metrics broken when scanning non-HDFS based table
We realized that the libHDFS API call hdfsGetHedgedReadMetrics() crashes
when the 'fs' argument passed to it is not a HDFS filesystem.
There is an open bug for it on the HDFS side: HDFS-13417
However, it looks like we won't be getting a fix for it in the short term,
so our only option at this point is to skip it.
Testing: Made sure that enabling preads and scanning from S3 doesn't
cause a crash.
Also, added a custom cluster test to exercise the pread code path. We
are unable to verify hedged reads in a minicluster, but we can at least
exercise the code path to make sure that nothing breaks.
Change-Id: I48fe80dfd9a1ed68a8f2b7038e5f42b5a3df3baa
Reviewed-on: http://gerrit.cloudera.org:8080/9966
Reviewed-by: Sailesh Mukil <sa...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
Project: http://git-wip-us.apache.org/repos/asf/impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/impala/commit/c4b214b3
Tree: http://git-wip-us.apache.org/repos/asf/impala/tree/c4b214b3
Diff: http://git-wip-us.apache.org/repos/asf/impala/diff/c4b214b3
Branch: refs/heads/master
Commit: c4b214b3a2ef8ca4692f74e6a289ddf76d1b3f58
Parents: 8d474a0
Author: Sailesh Mukil <sa...@cloudera.com>
Authored: Mon Apr 9 15:26:06 2018 -0700
Committer: Impala Public Jenkins <im...@cloudera.com>
Committed: Wed May 23 01:38:58 2018 +0000
----------------------------------------------------------------------
be/src/runtime/io/scan-range.cc | 4 +++-
tests/custom_cluster/test_hedged_reads.py | 30 ++++++++++++++++++++++++++
2 files changed, 33 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/impala/blob/c4b214b3/be/src/runtime/io/scan-range.cc
----------------------------------------------------------------------
diff --git a/be/src/runtime/io/scan-range.cc b/be/src/runtime/io/scan-range.cc
index c868c3d..409e743 100644
--- a/be/src/runtime/io/scan-range.cc
+++ b/be/src/runtime/io/scan-range.cc
@@ -498,11 +498,13 @@ void ScanRange::Close() {
closed_file = true;
}
- if (FLAGS_use_hdfs_pread) {
+ if (FLAGS_use_hdfs_pread && IsHdfsPath(file())) {
// Update Hedged Read Metrics.
// We call it only if the --use_hdfs_pread flag is set, to avoid having the
// libhdfs client malloc and free a hdfsHedgedReadMetrics object unnecessarily
// otherwise. 'hedged_metrics' is only set upon success.
+ // We also avoid calling hdfsGetHedgedReadMetrics() when the file is not on HDFS
+ // (see HDFS-13417).
struct hdfsHedgedReadMetrics* hedged_metrics;
int success = hdfsGetHedgedReadMetrics(fs_, &hedged_metrics);
if (success == 0) {
http://git-wip-us.apache.org/repos/asf/impala/blob/c4b214b3/tests/custom_cluster/test_hedged_reads.py
----------------------------------------------------------------------
diff --git a/tests/custom_cluster/test_hedged_reads.py b/tests/custom_cluster/test_hedged_reads.py
new file mode 100644
index 0000000..b24fd92
--- /dev/null
+++ b/tests/custom_cluster/test_hedged_reads.py
@@ -0,0 +1,30 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pytest
+from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
+from tests.common.skip import SkipIf
+
+@SkipIf.not_hdfs
+class TestHedgedReads(CustomClusterTestSuite):
+ """ Exercises the hedged reads code path.
+ NOTE: We unfortunately cannot force hedged reads on a minicluster, but we enable
+ this test to at least make sure that the code path doesn't break."""
+ @CustomClusterTestSuite.with_args("--use_hdfs_pread=true")
+ def test_hedged_reads(self, vector):
+ QUERY = "select * from tpch_parquet.lineitem limit 100"
+ self.client.execute(QUERY)