You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/02/16 01:21:01 UTC
[impala] 11/11: IMPALA-7111: avoid use of boost::split in
CheckPluginEnabled
This is an automated email from the ASF dual-hosted git repository.
tarmstrong pushed a commit to branch 2.x
in repository https://gitbox.apache.org/repos/asf/impala.git
commit e660f1187fb48316a6cadcb3777786ebc72a1e43
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Wed Jun 13 13:48:48 2018 -0700
IMPALA-7111: avoid use of boost::split in CheckPluginEnabled
This is an attempt to either avoid the bug or make it easier to diagnose
if it reoccurs. My suspicion is that somehow boost::split() is
accessing the input string in a non-thread-safe manner, but the
implementation is opaque enough that it's not obvious how.
Change-Id: I17b7f083731a33b832035f24900e351e2cb2feb8
Reviewed-on: http://gerrit.cloudera.org:8080/10709
Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
be/src/exec/hdfs-plugin-text-scanner.cc | 9 +++------
be/src/util/string-util-test.cc | 31 +++++++++++++++++++++++++++++++
be/src/util/string-util.cc | 11 +++++++++++
be/src/util/string-util.h | 3 +++
4 files changed, 48 insertions(+), 6 deletions(-)
diff --git a/be/src/exec/hdfs-plugin-text-scanner.cc b/be/src/exec/hdfs-plugin-text-scanner.cc
index d28fab5..b4f7292 100644
--- a/be/src/exec/hdfs-plugin-text-scanner.cc
+++ b/be/src/exec/hdfs-plugin-text-scanner.cc
@@ -27,8 +27,9 @@
#include "runtime/runtime-state.h"
#include "runtime/hdfs-fs-cache.h"
#include "util/debug-util.h"
-#include "util/hdfs-util.h"
#include "util/dynamic-util.h"
+#include "util/hdfs-util.h"
+#include "util/string-util.h"
#include "common/names.h"
@@ -105,11 +106,7 @@ Status HdfsPluginTextScanner::IssueInitialRanges(HdfsScanNodeBase* scan_node,
}
Status HdfsPluginTextScanner::CheckPluginEnabled(const string& plugin_name) {
- vector<string> enabled_plugins;
- boost::split(enabled_plugins, FLAGS_enabled_hdfs_text_scanner_plugins,
- boost::is_any_of(","));
- if (find(enabled_plugins.begin(), enabled_plugins.end(), plugin_name)
- == enabled_plugins.end()) {
+ if (!CommaSeparatedContains(FLAGS_enabled_hdfs_text_scanner_plugins, plugin_name)) {
return Status(Substitute("Scanner plugin '$0' is not one of the enabled plugins: '$1'",
plugin_name, FLAGS_enabled_hdfs_text_scanner_plugins));
}
diff --git a/be/src/util/string-util-test.cc b/be/src/util/string-util-test.cc
index 979eb9f..2c268dd 100644
--- a/be/src/util/string-util-test.cc
+++ b/be/src/util/string-util-test.cc
@@ -79,6 +79,37 @@ TEST(TruncateUpTest, Basic) {
EvalTruncation(a, b, 2, UP);
}
+TEST(CommaSeparatedContainsTest, Basic) {
+ // Basic tests with string present.
+ EXPECT_TRUE(CommaSeparatedContains("LZO", "LZO"));
+ EXPECT_TRUE(CommaSeparatedContains("foo,LZO", "LZO"));
+ EXPECT_TRUE(CommaSeparatedContains("LZO,bar", "LZO"));
+ EXPECT_TRUE(CommaSeparatedContains("foo,LZO,bar", "LZO"));
+
+ // Handles zero-length entries.
+ EXPECT_FALSE(CommaSeparatedContains("", "LZO"));
+ EXPECT_FALSE(CommaSeparatedContains(",", "LZO"));
+ EXPECT_FALSE(CommaSeparatedContains(",,", "LZO"));
+ EXPECT_TRUE(CommaSeparatedContains("foo,LZO,", "LZO"));
+ EXPECT_TRUE(CommaSeparatedContains(",foo,LZO,", "LZO"));
+ EXPECT_TRUE(CommaSeparatedContains(",foo,,LZO,", "LZO"));
+
+ // Basic tests with string absent.
+ EXPECT_FALSE(CommaSeparatedContains("foo,bar", "LZO"));
+ EXPECT_FALSE(CommaSeparatedContains("foo", "LZO"));
+ EXPECT_FALSE(CommaSeparatedContains("foo,", "LZO"));
+ EXPECT_FALSE(CommaSeparatedContains("foo,bar,baz", "LZO"));
+ EXPECT_FALSE(CommaSeparatedContains(",foo,LzO,", "LZO"));
+
+ // Pattern is longer than token.
+ EXPECT_FALSE(CommaSeparatedContains(",foo,LzO,", "ZZZZZ"));
+ // Pattern is longer than string.
+ EXPECT_FALSE(CommaSeparatedContains("foo", "ZZZZZ"));
+
+ // Whitespace is included in tokens alone.
+ EXPECT_FALSE(CommaSeparatedContains("foo , foo, foo,\nfoo,\tfoo", "foo"));
+}
+
}
IMPALA_TEST_MAIN();
diff --git a/be/src/util/string-util.cc b/be/src/util/string-util.cc
index b6c8fb7..f8e284f 100644
--- a/be/src/util/string-util.cc
+++ b/be/src/util/string-util.cc
@@ -54,4 +54,15 @@ Status TruncateUp(const string& str, int32_t max_length, string* result) {
return Status::OK();
}
+bool CommaSeparatedContains(const std::string& cs_list, const std::string& item) {
+ size_t pos = 0;
+ while (pos < cs_list.size()) {
+ size_t comma_pos = cs_list.find(",", pos);
+ if (comma_pos == string::npos) return cs_list.compare(pos, string::npos, item) == 0;
+ if (cs_list.compare(pos, comma_pos - pos, item) == 0) return true;
+ pos = comma_pos + 1;
+ }
+ return false;
+}
+
}
diff --git a/be/src/util/string-util.h b/be/src/util/string-util.h
index 7e7ab12..2811a3c 100644
--- a/be/src/util/string-util.h
+++ b/be/src/util/string-util.h
@@ -37,6 +37,9 @@ Status TruncateDown(const std::string& str, int32_t max_length, std::string* res
WARN_UNUSED_RESULT
Status TruncateUp(const std::string& str, int32_t max_length, std::string* result);
+/// Return true if the comma-separated string 'cs_list' contains 'item' as one of
+/// the comma-separated values.
+bool CommaSeparatedContains(const std::string& cs_list, const std::string& item);
}
#endif