You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kvrocks.apache.org by hu...@apache.org on 2022/09/22 09:59:04 UTC

[incubator-kvrocks] branch unstable updated: Support quoted string and inline comment in config (#849)

This is an automated email from the ASF dual-hosted git repository.

hulk pushed a commit to branch unstable
in repository https://gitbox.apache.org/repos/asf/incubator-kvrocks.git


The following commit(s) were added to refs/heads/unstable by this push:
     new 399a33d  Support quoted string and inline comment in config (#849)
399a33d is described below

commit 399a33d06af9542b3758347a5ee0c5844a032e3c
Author: Twice <tw...@gmail.com>
AuthorDate: Thu Sep 22 17:58:58 2022 +0800

    Support quoted string and inline comment in config (#849)
---
 src/config.cc                |  43 +++++-----
 src/config.h                 |   2 +-
 src/config_util.cc           | 195 +++++++++++++++++++++++++++++++++++++++++++
 src/config_util.h            |  40 +++++++++
 tests/cppunit/config_test.cc |  34 ++++++++
 5 files changed, 290 insertions(+), 24 deletions(-)

diff --git a/src/config.cc b/src/config.cc
index 16c0972..a98fdf4 100644
--- a/src/config.cc
+++ b/src/config.cc
@@ -27,6 +27,8 @@
 #include <vector>
 #include <utility>
 #include <limits>
+#include <algorithm>
+#include <cctype>
 #include <glog/logging.h>
 #include <rocksdb/env.h>
 
@@ -38,6 +40,7 @@
 #include "cron.h"
 #include "server.h"
 #include "log_collector.h"
+#include "config_util.h"
 
 const char *kDefaultNamespace = "__namespace";
 
@@ -558,28 +561,27 @@ void Config::ClearMaster() {
   }
 }
 
-Status Config::parseConfigFromString(std::string input, int line_number) {
-  std::vector<std::string> kv = Util::Split2KV(input, " \t");
+Status Config::parseConfigFromString(const std::string &input, int line_number) {
+  auto parsed = ParseConfigLine(input);
+  if (!parsed) return parsed.ToStatus();
 
-  // skip the comment and empty line
-  if (kv.empty() || kv[0].front() == '#') return Status::OK();
+  auto kv = std::move(*parsed);
 
-  if (kv.size() != 2) return Status(Status::NotOK, "wrong number of arguments");
-  if (kv[1] == "\"\"") return Status::OK();
+  if (kv.first.empty() || kv.second.empty()) return Status::OK();
 
-  std::string field_key = Util::ToLower(kv[0]);
+  std::string field_key = Util::ToLower(kv.first);
   const char ns_str[] = "namespace.";
   size_t ns_str_size = sizeof(ns_str) - 1;
-  if (!strncasecmp(kv[0].data(), ns_str, ns_str_size)) {
+  if (!strncasecmp(kv.first.data(), ns_str, ns_str_size)) {
       // namespace should keep key case-sensitive
-      field_key = kv[0];
-      tokens[kv[1]] = kv[0].substr(ns_str_size);
+      field_key = kv.first;
+      tokens[kv.second] = kv.first.substr(ns_str_size);
   }
   auto iter = fields_.find(field_key);
   if (iter != fields_.end()) {
     auto& field = iter->second;
     field->line_number = line_number;
-    auto s = field->Set(kv[1]);
+    auto s = field->Set(kv.second);
     if (!s.IsOK()) return s;
   }
   return Status::OK();
@@ -711,27 +713,22 @@ Status Config::Rewrite() {
 
   std::ifstream file(path_);
   if (file.is_open()) {
-    std::string raw_line, trim_line, new_value;
-    std::vector<std::string> kv;
+    std::string raw_line;
     while (!file.eof()) {
       std::getline(file, raw_line);
-      trim_line = Util::Trim(raw_line, " \t\r\n");
-      if (trim_line.empty() || trim_line.front() == '#') {
+      auto parsed = ParseConfigLine(raw_line);
+      if (!parsed || parsed->first.empty()) {
         lines.emplace_back(raw_line);
         continue;
       }
-      kv = Util::Split2KV(trim_line, " \t");
-      if (kv.size() != 2) {
-        lines.emplace_back(raw_line);
-        continue;
-      }
-      if (Util::HasPrefix(kv[0], namespacePrefix)) {
+      auto kv = std::move(*parsed);
+      if (Util::HasPrefix(kv.first, namespacePrefix)) {
         // Ignore namespace fields here since we would always rewrite them
         continue;
       }
-      auto iter = new_config.find(Util::ToLower(kv[0]));
+      auto iter = new_config.find(Util::ToLower(kv.first));
       if (iter != new_config.end()) {
-        if (!iter->second.empty()) lines.emplace_back(iter->first + " " + iter->second);
+        if (!iter->second.empty()) lines.emplace_back(DumpConfigLine({iter->first, iter->second}));
         new_config.erase(iter);
       } else {
         lines.emplace_back(raw_line);
diff --git a/src/config.h b/src/config.h
index dec091a..5c5ec3e 100644
--- a/src/config.h
+++ b/src/config.h
@@ -210,7 +210,7 @@ struct Config{
 
   void initFieldValidator();
   void initFieldCallback();
-  Status parseConfigFromString(std::string input, int line_number);
+  Status parseConfigFromString(const std::string &input, int line_number);
   Status finish();
   Status isNamespaceLegal(const std::string &ns);
 };
diff --git a/src/config_util.cc b/src/config_util.cc
new file mode 100644
index 0000000..6f22ef8
--- /dev/null
+++ b/src/config_util.cc
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#include "config_util.h"
+#include "util.h"
+
+
+StatusOr<ConfigKV> ParseConfigLine(const std::string& line) {
+  enum {
+    KEY,  // in (unquoted) key string
+    NORMAL,  // in unquoted value string
+    QUOTED,  // in quoted value string
+    PRE_KEY_SPACE,  // in whitespace characters before key
+    AFTER_KEY_SPACE,  // in whitespace characters after key and before value
+    AFTER_VAL_SPACE,  // in whitespace characters after value
+    ESCAPE,  // in escape character of quoted string
+    ERROR  // error state, e.g. encounter more than one value
+  } state = PRE_KEY_SPACE;
+
+  char quote;  // single or double quote
+  std::string current_str;
+  ConfigKV res;
+
+  for (auto i = line.begin(); i != line.end();) {
+    switch (state) {
+      case PRE_KEY_SPACE:
+        if (!std::isspace(*i)) {
+          if (*i == '#') {
+            i = line.end();
+          } else {
+            state = KEY;
+          }
+        } else {
+          i++;
+        }
+        break;
+      case KEY:
+        if (std::isspace(*i)) {
+          res.first = current_str;
+          current_str = "";
+          state = AFTER_KEY_SPACE;
+        } else if (*i == '#') {
+          res.first = current_str;
+          i = line.end();
+        } else {
+          current_str.push_back(*i);
+          i++;
+        }
+        break;
+      case AFTER_KEY_SPACE:
+        if (!std::isspace(*i)) {
+          if (*i == '"' || *i == '\'') {
+            state = QUOTED;
+            quote = *i;
+            i++;
+          } else if (*i == '#') {
+            i = line.end();
+          } else {
+            state = NORMAL;
+          }
+        } else {
+          i++;
+        }
+        break;
+      case NORMAL:
+        if (*i == '#') {
+          res.second = current_str;
+          i = line.end();
+        } else {
+          current_str.push_back(*i);
+          i++;
+        }
+        break;
+      case QUOTED:
+        if (*i == '\\') {
+          state = ESCAPE;
+        } else if (*i == quote) {
+          res.second = current_str;
+          state = AFTER_VAL_SPACE;
+        } else {
+          current_str.push_back(*i);
+        }
+        i++;
+        break;
+      case ESCAPE:
+        if (*i == '\'' || *i == '"' || *i == '\\') {
+          current_str.push_back(*i);
+        } else if (*i == 't') {
+          current_str.push_back('\t');
+        } else if (*i == 'r') {
+          current_str.push_back('\r');
+        } else if (*i == 'n') {
+          current_str.push_back('\n');
+        } else if (*i == 'v') {
+          current_str.push_back('\v');
+        } else if (*i == 'f') {
+          current_str.push_back('\f');
+        } else if (*i == 'b') {
+          current_str.push_back('\b');
+        }
+        state = QUOTED;
+        i++;
+        break;
+      case AFTER_VAL_SPACE:
+        if (!std::isspace(*i)) {
+          if (*i == '#') {
+            i = line.end();
+          } else {
+            state = ERROR;
+          }
+        } else {
+          i++;
+        }
+        break;
+      case ERROR:
+        i = line.end();
+        break;
+    }
+  }
+
+
+  if (state == KEY) {
+    res.first = current_str;
+    state = AFTER_KEY_SPACE;
+  } else if (state == NORMAL) {
+    res.second = Util::Trim(current_str, " \t\r\n\v\f\b");
+    state = AFTER_VAL_SPACE;
+  } else if (state == QUOTED || state == ESCAPE) {
+    return {Status::NotOK, "config line ends unexpectedly in quoted string"};
+  } else if (state == ERROR) {
+    return {Status::NotOK, "more than 2 item in config line"};
+  }
+
+  return res;
+}
+
+std::string DumpConfigLine(const ConfigKV &config) {
+  std::string res;
+
+  res += config.first;
+  res += " ";
+
+  if (std::any_of(config.second.begin(), config.second.end(), [](char c) {
+    return std::isspace(c) || c == '"' || c == '\'' || c == '#';
+  })) {
+    res += '"';
+    for (char c : config.second) {
+      if (c == '\\') {
+        res += "\\\\";
+      } else if (c == '\'') {
+        res += "\\'";
+      } else if (c == '"') {
+        res += "\\\"";
+      } else if (c == '\t') {
+        res += "\\t";
+      } else if (c == '\r') {
+        res += "\\r";
+      } else if (c == '\n') {
+        res += "\\n";
+      } else if (c == '\v') {
+        res += "\\v";
+      } else if (c == '\f') {
+        res += "\\f";
+      } else if (c == '\b') {
+        res += "\\b";
+      } else {
+        res += c;
+      }
+    }
+    res += '"';
+  } else if (config.second.empty()) {
+    res += "\"\"";
+  } else {
+    res += config.second;
+  }
+
+  return res;
+}
diff --git a/src/config_util.h b/src/config_util.h
new file mode 100644
index 0000000..9d781dd
--- /dev/null
+++ b/src/config_util.h
@@ -0,0 +1,40 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ *
+ */
+
+#pragma once
+
+#include <string>
+#include <vector>
+#include <utility>
+
+#include "status.h"
+
+using ConfigKV = std::pair<std::string, std::string>;
+
+// refer to https://redis.io/docs/manual/config
+// format: key value
+// inline comment: key value # comment
+// quoted string: key "hello world"
+// e.g. `key "hello # world\""  # hi` -> key: hello # world"
+StatusOr<ConfigKV> ParseConfigLine(const std::string &line);
+
+// dump a config item to a string line
+// e.g. {'a', 'b c'} -> "a 'b c'"
+std::string DumpConfigLine(const ConfigKV &config);
diff --git a/tests/cppunit/config_test.cc b/tests/cppunit/config_test.cc
index 7efc056..add990d 100644
--- a/tests/cppunit/config_test.cc
+++ b/tests/cppunit/config_test.cc
@@ -25,6 +25,7 @@
 #include <fstream>
 #include <iostream>
 #include <gtest/gtest.h>
+#include <config_util.h>
 
 TEST(Config, GetAndSet) {
   const char *path = "test.conf";
@@ -276,3 +277,36 @@ TEST(Namespace, RewriteNamespaces) {
   EXPECT_FALSE(new_config.GetNamespace("to-be-deleted-ns", &token).IsOK());
   unlink(path);
 }
+
+TEST(Config, ParseConfigLine) {
+  ASSERT_EQ(*ParseConfigLine(""), ConfigKV{});
+  ASSERT_EQ(*ParseConfigLine("# hello"), ConfigKV{});
+  ASSERT_EQ(*ParseConfigLine("       #x y z "), ConfigKV{});
+  ASSERT_EQ(*ParseConfigLine("key value  "), (ConfigKV{"key", "value"}));
+  ASSERT_EQ(*ParseConfigLine("key value#x"), (ConfigKV{"key", "value"}));
+  ASSERT_EQ(*ParseConfigLine("key"), (ConfigKV{"key", ""}));
+  ASSERT_EQ(*ParseConfigLine("    key    value1   value2   "), (ConfigKV{"key", "value1   value2"}));
+  ASSERT_EQ(*ParseConfigLine(" #"), ConfigKV{});
+  ASSERT_EQ(*ParseConfigLine("  key val ue #h e l l o"), (ConfigKV{"key", "val ue"}));
+  ASSERT_EQ(*ParseConfigLine("key 'val ue'"), (ConfigKV{"key", "val ue"}));
+  ASSERT_EQ(*ParseConfigLine(R"(key ' value\'\'v a l ')"), (ConfigKV{"key", " value''v a l "}));
+  ASSERT_EQ(*ParseConfigLine(R"( key "val # hi" # hello!)"), (ConfigKV{"key", "val # hi"}));
+  ASSERT_EQ(*ParseConfigLine(R"(key "\n \r \t ")"), (ConfigKV{"key", "\n \r \t "}));
+  ASSERT_EQ(*ParseConfigLine("key ''"), (ConfigKV{"key", ""}));
+  ASSERT_FALSE(ParseConfigLine("key \"hello "));
+  ASSERT_FALSE(ParseConfigLine("key \'\\"));
+  ASSERT_FALSE(ParseConfigLine("key \"hello'"));
+  ASSERT_FALSE(ParseConfigLine("key \""));
+  ASSERT_FALSE(ParseConfigLine("key '' ''"));
+  ASSERT_FALSE(ParseConfigLine("key '' x"));
+}
+
+TEST(Config, DumpConfigLine) {
+  ASSERT_EQ(DumpConfigLine({"key", "value"}), "key value");
+  ASSERT_EQ(DumpConfigLine({"key", " v a l "}), R"(key " v a l ")");
+  ASSERT_EQ(DumpConfigLine({"a", "'b"}), "a \"\\'b\"");
+  ASSERT_EQ(DumpConfigLine({"a", "x#y"}), "a \"x#y\"");
+  ASSERT_EQ(DumpConfigLine({"a", "x y"}), "a \"x y\"");
+  ASSERT_EQ(DumpConfigLine({"a", "xy"}), "a xy");
+  ASSERT_EQ(DumpConfigLine({"a", "x\n"}), "a \"x\\n\"");
+}