You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2019/02/16 01:21:01 UTC

[impala] 11/11: IMPALA-7111: avoid use of boost::split in CheckPluginEnabled

This is an automated email from the ASF dual-hosted git repository.

tarmstrong pushed a commit to branch 2.x
in repository https://gitbox.apache.org/repos/asf/impala.git

commit e660f1187fb48316a6cadcb3777786ebc72a1e43
Author: Tim Armstrong <ta...@cloudera.com>
AuthorDate: Wed Jun 13 13:48:48 2018 -0700

    IMPALA-7111: avoid use of boost::split in CheckPluginEnabled
    
    This is an attempt to either avoid the bug or make it easier to diagnose
    if it reoccurs. My suspicion is that somehow boost::split() is
    accessing the input string in a non-thread-safe manner, but the
    implementation is opaque enough that it's not obvious how.
    
    Change-Id: I17b7f083731a33b832035f24900e351e2cb2feb8
    Reviewed-on: http://gerrit.cloudera.org:8080/10709
    Reviewed-by: Impala Public Jenkins <im...@cloudera.com>
    Tested-by: Impala Public Jenkins <im...@cloudera.com>
---
 be/src/exec/hdfs-plugin-text-scanner.cc |  9 +++------
 be/src/util/string-util-test.cc         | 31 +++++++++++++++++++++++++++++++
 be/src/util/string-util.cc              | 11 +++++++++++
 be/src/util/string-util.h               |  3 +++
 4 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/be/src/exec/hdfs-plugin-text-scanner.cc b/be/src/exec/hdfs-plugin-text-scanner.cc
index d28fab5..b4f7292 100644
--- a/be/src/exec/hdfs-plugin-text-scanner.cc
+++ b/be/src/exec/hdfs-plugin-text-scanner.cc
@@ -27,8 +27,9 @@
 #include "runtime/runtime-state.h"
 #include "runtime/hdfs-fs-cache.h"
 #include "util/debug-util.h"
-#include "util/hdfs-util.h"
 #include "util/dynamic-util.h"
+#include "util/hdfs-util.h"
+#include "util/string-util.h"
 
 #include "common/names.h"
 
@@ -105,11 +106,7 @@ Status HdfsPluginTextScanner::IssueInitialRanges(HdfsScanNodeBase* scan_node,
 }
 
 Status HdfsPluginTextScanner::CheckPluginEnabled(const string& plugin_name) {
-  vector<string> enabled_plugins;
-  boost::split(enabled_plugins, FLAGS_enabled_hdfs_text_scanner_plugins,
-      boost::is_any_of(","));
-  if (find(enabled_plugins.begin(), enabled_plugins.end(), plugin_name)
-      == enabled_plugins.end()) {
+  if (!CommaSeparatedContains(FLAGS_enabled_hdfs_text_scanner_plugins, plugin_name)) {
     return Status(Substitute("Scanner plugin '$0' is not one of the enabled plugins: '$1'",
           plugin_name, FLAGS_enabled_hdfs_text_scanner_plugins));
   }
diff --git a/be/src/util/string-util-test.cc b/be/src/util/string-util-test.cc
index 979eb9f..2c268dd 100644
--- a/be/src/util/string-util-test.cc
+++ b/be/src/util/string-util-test.cc
@@ -79,6 +79,37 @@ TEST(TruncateUpTest, Basic) {
   EvalTruncation(a, b, 2, UP);
 }
 
+TEST(CommaSeparatedContainsTest, Basic) {
+  // Basic tests with string present.
+  EXPECT_TRUE(CommaSeparatedContains("LZO", "LZO"));
+  EXPECT_TRUE(CommaSeparatedContains("foo,LZO", "LZO"));
+  EXPECT_TRUE(CommaSeparatedContains("LZO,bar", "LZO"));
+  EXPECT_TRUE(CommaSeparatedContains("foo,LZO,bar", "LZO"));
+
+  // Handles zero-length entries.
+  EXPECT_FALSE(CommaSeparatedContains("", "LZO"));
+  EXPECT_FALSE(CommaSeparatedContains(",", "LZO"));
+  EXPECT_FALSE(CommaSeparatedContains(",,", "LZO"));
+  EXPECT_TRUE(CommaSeparatedContains("foo,LZO,", "LZO"));
+  EXPECT_TRUE(CommaSeparatedContains(",foo,LZO,", "LZO"));
+  EXPECT_TRUE(CommaSeparatedContains(",foo,,LZO,", "LZO"));
+
+  // Basic tests with string absent.
+  EXPECT_FALSE(CommaSeparatedContains("foo,bar", "LZO"));
+  EXPECT_FALSE(CommaSeparatedContains("foo", "LZO"));
+  EXPECT_FALSE(CommaSeparatedContains("foo,", "LZO"));
+  EXPECT_FALSE(CommaSeparatedContains("foo,bar,baz", "LZO"));
+  EXPECT_FALSE(CommaSeparatedContains(",foo,LzO,", "LZO"));
+
+  // Pattern is longer than token.
+  EXPECT_FALSE(CommaSeparatedContains(",foo,LzO,", "ZZZZZ"));
+  // Pattern is longer than string.
+  EXPECT_FALSE(CommaSeparatedContains("foo", "ZZZZZ"));
+
+  // Whitespace is included in tokens alone.
+  EXPECT_FALSE(CommaSeparatedContains("foo , foo, foo,\nfoo,\tfoo", "foo"));
+}
+
 }
 
 IMPALA_TEST_MAIN();
diff --git a/be/src/util/string-util.cc b/be/src/util/string-util.cc
index b6c8fb7..f8e284f 100644
--- a/be/src/util/string-util.cc
+++ b/be/src/util/string-util.cc
@@ -54,4 +54,15 @@ Status TruncateUp(const string& str, int32_t max_length, string* result) {
   return Status::OK();
 }
 
+bool CommaSeparatedContains(const std::string& cs_list, const std::string& item) {
+  size_t pos = 0;
+  while (pos < cs_list.size()) {
+    size_t comma_pos = cs_list.find(",", pos);
+    if (comma_pos == string::npos) return cs_list.compare(pos, string::npos, item) == 0;
+    if (cs_list.compare(pos, comma_pos - pos, item) == 0) return true;
+    pos = comma_pos + 1;
+  }
+  return false;
+}
+
 }
diff --git a/be/src/util/string-util.h b/be/src/util/string-util.h
index 7e7ab12..2811a3c 100644
--- a/be/src/util/string-util.h
+++ b/be/src/util/string-util.h
@@ -37,6 +37,9 @@ Status TruncateDown(const std::string& str, int32_t max_length, std::string* res
 WARN_UNUSED_RESULT
 Status TruncateUp(const std::string& str, int32_t max_length, std::string* result);
 
+/// Return true if the comma-separated string 'cs_list' contains 'item' as one of
+/// the comma-separated values.
+bool CommaSeparatedContains(const std::string& cs_list, const std::string& item);
 }
 
 #endif