You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@doris.apache.org by GitBox <gi...@apache.org> on 2022/10/13 12:40:04 UTC

[GitHub] [doris] zhangstar333 commented on a diff in pull request #13323: [function](string_function) add new string function 'extract_url_parameter'

zhangstar333 commented on code in PR #13323:
URL: https://github.com/apache/doris/pull/13323#discussion_r994582642


##########
be/src/util/url_parser.cpp:
##########
@@ -344,4 +345,52 @@ UrlParser::UrlPart UrlParser::get_url_part(const StringValue& part) {
     }
 }
 
+std::string UrlParser::extract_url(const StringValue& url, const StringValue& name) {
+    std::string result;
+    std::string str_name = name.to_string();
+    // Remove leading and trailing spaces.
+    StringValue trimmed_url = url.trim();
+    // find '?' and '#'
+    int32_t question_pos = _s_question_search.search(&trimmed_url);
+    int32_t hash_pos = _s_hash_search.search(&trimmed_url);
+    if(question_pos < 0) {

Review Comment:
   maybe could check this, then doing search of '#' 



##########
be/src/util/url_parser.cpp:
##########
@@ -344,4 +345,52 @@ UrlParser::UrlPart UrlParser::get_url_part(const StringValue& part) {
     }
 }
 
+std::string UrlParser::extract_url(const StringValue& url, const StringValue& name) {
+    std::string result;
+    std::string str_name = name.to_string();
+    // Remove leading and trailing spaces.
+    StringValue trimmed_url = url.trim();
+    // find '?' and '#'
+    int32_t question_pos = _s_question_search.search(&trimmed_url);
+    int32_t hash_pos = _s_hash_search.search(&trimmed_url);
+    if(question_pos < 0) {
+        // this url no parameters.
+        // Example: https://doris.apache.org/
+        return result;
+    }
+    std::string sub_url = "";
+    if(hash_pos < 0) {
+        sub_url = trimmed_url.substring(question_pos + 1, trimmed_url.len - question_pos -1).to_string();
+    } else {
+        sub_url = trimmed_url.substring(question_pos + 1, hash_pos - question_pos - 1).to_string();
+    }
+
+    // find '&' and '=', and extract target parameter
+    // Example: k1=aa&k2=bb&k3=cc&test=dd
+    std::string::size_type and_pod;
+    std::string::size_type len = sub_url.length();
+    std::string key_url;
+    while(true) {
+        if(len <= 0) {
+            break;
+        }
+        and_pod  = sub_url.find('&');
+        if(and_pod != std::string::npos) {
+            key_url = sub_url.substr(0, and_pod);
+            sub_url = sub_url.substr(and_pod + 1, len - and_pod);   
+        } else {
+            key_url = sub_url;
+            sub_url = "";
+        }
+        std::string::size_type eq_pod = key_url.find_first_of('=');
+        int32_t key_len = key_url.length();
+        std::string key = key_url.substr(0, eq_pod);
+        if(str_name == key) {
+            result = key_url.substr(eq_pod + 1, key_len - eq_pod);
+            return result;

Review Comment:
   if the url is invalid,looks maybe be error,
   like this "https://localhost:3000?id"



##########
be/src/vec/functions/function_string.h:
##########
@@ -1152,6 +1153,56 @@ class FunctionStringMd5AndSM3 : public IFunction {
     }
 };
 
+class FunctionExtractURLParameter : public IFunction {
+public:
+    static constexpr auto name = "extract_url_parameter";
+    static FunctionPtr create() { return std::make_shared<FunctionExtractURLParameter>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+     return std::make_shared<DataTypeString>();
+    }
+
+    DataTypes get_variadic_argument_types_impl() const override {
+        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};

Review Comment:
   could remove this function, looks this function isn't variadic



##########
be/src/vec/functions/function_string.h:
##########
@@ -1152,6 +1153,56 @@ class FunctionStringMd5AndSM3 : public IFunction {
     }
 };
 
+class FunctionExtractURLParameter : public IFunction {
+public:
+    static constexpr auto name = "extract_url_parameter";
+    static FunctionPtr create() { return std::make_shared<FunctionExtractURLParameter>(); }
+    String get_name() const override { return name; }
+    size_t get_number_of_arguments() const override { return 2; }
+
+    DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
+     return std::make_shared<DataTypeString>();
+    }
+
+    DataTypes get_variadic_argument_types_impl() const override {
+        return {std::make_shared<DataTypeString>(), std::make_shared<DataTypeString>()};
+    }  
+
+    bool use_default_implementation_for_constants() const override { return true; }
+
+    Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
+                        size_t result, size_t input_rows_count) override {
+        auto col_url =
+                block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
+        auto col_parameter =
+                block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
+
+        ColumnString::MutablePtr col_res = ColumnString::create();
+
+        for(int i = 0; i < input_rows_count; ++i) {
+            auto source = col_url->get_data_at(i);
+            auto param = col_parameter->get_data_at(i);

Review Comment:
   could cast column before for loop,
   like: const auto& url_str = assert_cast<const ColumnString*>(col_url.get());



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@doris.apache.org
For additional commands, e-mail: commits-help@doris.apache.org