You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@arrow.apache.org by GitBox <gi...@apache.org> on 2022/07/27 15:57:09 UTC

[GitHub] [arrow] anthonylouisbsb commented on a diff in pull request #13395: [Gandiva][C++] Add REGEXP_LIKE function

anthonylouisbsb commented on code in PR #13395:
URL: https://github.com/apache/arrow/pull/13395#discussion_r931220191


##########
cpp/src/gandiva/regex_functions_holder.h:
##########
@@ -26,35 +26,38 @@
 #include "gandiva/execution_context.h"
 #include "gandiva/function_holder.h"
 #include "gandiva/node.h"
+#include "gandiva/regex_util.h"
 #include "gandiva/visibility.h"
 
 namespace gandiva {
 
-/// Function Holder for SQL 'like'
-class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
+class GANDIVA_EXPORT RegexpMatchesHolder : public FunctionHolder {

Review Comment:
   The name of the holder is a little bit confusing because it is responsible for other functions(like regexp_like).
   
   Change it to `RegexpExpressionsHolder` or something similar



##########
cpp/src/gandiva/regex_functions_holder.cc:
##########
@@ -76,10 +57,114 @@ const FunctionNode LikeHolder::TryOptimize(const FunctionNode& node) {
   return node;
 }
 
-Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* holder) {
-  ARROW_RETURN_IF(node.children().size() != 2 && node.children().size() != 3,
-                  Status::Invalid("'like' function requires two or three parameters"));
+const FunctionNode SQLLikeHolder::TryOptimize(const FunctionNode& node) {
+  if (node.descriptor()->name() == "ilike") {
+    // Optimizations don't work for case-insensitive matching
+    return node;
+  }
+
+  std::string pcre_pattern;
+  auto pattern_result = GetPattern(node);
+  if (!pattern_result.ok()) {
+    return node;
+  } else {
+    pcre_pattern = pattern_result.ValueOrDie();
+  }
+
+  auto literal_type = node.children().at(1)->return_type();
+  auto pcre_node =
+      std::make_shared<LiteralNode>(literal_type, LiteralHolder(pcre_pattern), false);
+  auto new_node = FunctionNode("regexp_matches", {node.children().at(0), pcre_node},
+                               node.return_type());
+
+  auto optimized_node = RegexpMatchesHolder::TryOptimize(new_node);
+
+  if (optimized_node.descriptor()->name() != "regexp_matches") {
+    return optimized_node;
+  } else {
+    return node;
+  }
+}
+
+// static bool IsArrowStringLiteral(arrow::Type::type type) {
+//  return type == arrow::Type::STRING || type == arrow::Type::BINARY;
+//}

Review Comment:
   If the method is not being used. Remove it



##########
cpp/src/gandiva/regex_functions_holder.cc:
##########
@@ -76,10 +57,114 @@ const FunctionNode LikeHolder::TryOptimize(const FunctionNode& node) {
   return node;
 }
 
-Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* holder) {
-  ARROW_RETURN_IF(node.children().size() != 2 && node.children().size() != 3,
-                  Status::Invalid("'like' function requires two or three parameters"));
+const FunctionNode SQLLikeHolder::TryOptimize(const FunctionNode& node) {
+  if (node.descriptor()->name() == "ilike") {
+    // Optimizations don't work for case-insensitive matching
+    return node;
+  }
+
+  std::string pcre_pattern;
+  auto pattern_result = GetPattern(node);
+  if (!pattern_result.ok()) {
+    return node;
+  } else {
+    pcre_pattern = pattern_result.ValueOrDie();
+  }
+
+  auto literal_type = node.children().at(1)->return_type();
+  auto pcre_node =
+      std::make_shared<LiteralNode>(literal_type, LiteralHolder(pcre_pattern), false);
+  auto new_node = FunctionNode("regexp_matches", {node.children().at(0), pcre_node},

Review Comment:
   That part is not simple, it is good to have a comment explaining the purpose of it



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@arrow.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org