You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@impala.apache.org by ta...@apache.org on 2016/07/08 01:41:48 UTC

incubator-impala git commit: Remove some code in like-predicate-ir.cc from cross-compilation

Repository: incubator-impala
Updated Branches:
  refs/heads/master c1da1409b -> c7b7c3ece


Remove some code in like-predicate-ir.cc from cross-compilation

like-predicate-ir.cc contains a lot of code which
won't be called and inlined by other IR functions.
(e.g. the prepare functions). To reduce the size of
the bitcode module, this change removes these functions
from cross compilation and moves them into like-predicate.cc
instead.

Change-Id: I1279622ff97af0e1bedcfd9aafdb875b01e38c7c
Reviewed-on: http://gerrit.cloudera.org:8080/3483
Reviewed-by: Michael Ho <kw...@cloudera.com>
Tested-by: Internal Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/c7b7c3ec
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/c7b7c3ec
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/c7b7c3ec

Branch: refs/heads/master
Commit: c7b7c3ece2eb28db7ad76397c433ef582aeac7f8
Parents: c1da140
Author: Michael Ho <kw...@cloudera.com>
Authored: Thu Jun 23 22:32:18 2016 -0700
Committer: Tim Armstrong <ta...@cloudera.com>
Committed: Thu Jul 7 18:41:45 2016 -0700

----------------------------------------------------------------------
 be/src/exprs/CMakeLists.txt       |   1 +
 be/src/exprs/like-predicate-ir.cc | 390 +------------------------------
 be/src/exprs/like-predicate.cc    | 414 +++++++++++++++++++++++++++++++++
 be/src/exprs/like-predicate.h     |   9 +-
 4 files changed, 424 insertions(+), 390 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7b7c3ec/be/src/exprs/CMakeLists.txt
----------------------------------------------------------------------
diff --git a/be/src/exprs/CMakeLists.txt b/be/src/exprs/CMakeLists.txt
index 452113a..747add3 100644
--- a/be/src/exprs/CMakeLists.txt
+++ b/be/src/exprs/CMakeLists.txt
@@ -39,6 +39,7 @@ add_library(Exprs
   in-predicate-ir.cc
   is-not-empty-predicate.cc
   is-null-predicate-ir.cc
+  like-predicate.cc
   like-predicate-ir.cc
   literal.cc
   math-functions-ir.cc

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7b7c3ec/be/src/exprs/like-predicate-ir.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/like-predicate-ir.cc b/be/src/exprs/like-predicate-ir.cc
index 516866a..2d9c28b 100644
--- a/be/src/exprs/like-predicate-ir.cc
+++ b/be/src/exprs/like-predicate-ir.cc
@@ -14,103 +14,9 @@
 
 #include "exprs/like-predicate.h"
 
-#include <string.h>
-#include <re2/re2.h>
-#include <re2/stringpiece.h>
-#include <sstream>
-
-#include "gutil/strings/substitute.h"
-#include "runtime/string-value.inline.h"
-#include "string-functions.h"
-#include "common/names.h"
-
 using namespace impala_udf;
-using namespace re2;
 
 namespace impala {
-// A regex to match any regex pattern is equivalent to a substring search.
-static const RE2 SUBSTRING_RE(
-    "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*");
-
-// A regex to match any regex pattern which is equivalent to matching a constant string
-// at the end of the string values.
-static const RE2 ENDS_WITH_RE(
-    "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$");
-
-// A regex to match any regex pattern which is equivalent to matching a constant string
-// at the end of the string values.
-static const RE2 STARTS_WITH_RE(
-    "\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*");
-
-// A regex to match any regex pattern which is equivalent to a constant string match.
-static const RE2 EQUALS_RE("\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$");
-
-LikePredicate::LikePredicate(const TExprNode& node)
-  : Predicate(node) {
-}
-
-LikePredicate::~LikePredicate() {
-}
-
-void LikePredicate::LikePrepare(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  LikePrepareInternal(context, scope, true);
-}
-
-void LikePredicate::ILikePrepare(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  LikePrepareInternal(context, scope, false);
-}
-
-// TODO: make class StringValue and StringSearch accept a case-sensitive flag and
-// switch back to using the cheaper Constant<>() functions.
-void LikePredicate::LikePrepareInternal(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope, bool case_sensitive) {
-  if (scope != FunctionContext::THREAD_LOCAL) return;
-  LikePredicateState* state = new LikePredicateState();
-  state->function_ = LikeFn;
-  context->SetFunctionState(scope, state);
-  if (context->IsArgConstant(1)) {
-    StringVal pattern_val = *reinterpret_cast<StringVal*>(context->GetConstantArg(1));
-    if (pattern_val.is_null) return;
-    StringValue pattern = StringValue::FromStringVal(pattern_val);
-    re2::RE2 substring_re("(?:%+)([^%_]*)(?:%+)");
-    re2::RE2 ends_with_re("(?:%+)([^%_]*)");
-    re2::RE2 starts_with_re("([^%_]*)(?:%+)");
-    re2::RE2 equals_re("([^%_]*)");
-    string pattern_str(pattern.ptr, pattern.len);
-    string search_string;
-    if (case_sensitive && RE2::FullMatch(pattern_str, substring_re, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantSubstringFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, starts_with_re, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantStartsWithFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, ends_with_re, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantEndsWithFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, equals_re, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantEqualsFn;
-    } else {
-      string re_pattern;
-      ConvertLikePattern(context,
-          *reinterpret_cast<StringVal*>(context->GetConstantArg(1)), &re_pattern);
-      RE2::Options opts;
-      opts.set_never_nl(false);
-      opts.set_dot_nl(true);
-      opts.set_case_sensitive(case_sensitive);
-      state->regex_.reset(new RE2(re_pattern, opts));
-      if (!state->regex_->ok()) {
-        context->SetError(
-            strings::Substitute("Invalid regex: $0", pattern_val.ptr).c_str());
-      }
-    }
-  }
-}
 
 BooleanVal LikePredicate::Like(FunctionContext* context, const StringVal& val,
     const StringVal& pattern) {
@@ -119,71 +25,6 @@ BooleanVal LikePredicate::Like(FunctionContext* context, const StringVal& val,
   return (state->function_)(context, val, pattern);
 }
 
-void LikePredicate::LikeClose(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  if (scope == FunctionContext::THREAD_LOCAL) {
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-    delete state;
-  }
-}
-
-void LikePredicate::RegexPrepare(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  RegexPrepareInternal(context, scope, true);
-}
-
-void LikePredicate::IRegexPrepare(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  RegexPrepareInternal(context, scope, false);
-}
-
-void LikePredicate::RegexPrepareInternal(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope, bool case_sensitive) {
-  if (scope != FunctionContext::THREAD_LOCAL) return;
-  LikePredicateState* state = new LikePredicateState();
-  context->SetFunctionState(scope, state);
-  state->function_ = RegexFn;
-  if (context->IsArgConstant(1)) {
-    StringVal* pattern = reinterpret_cast<StringVal*>(context->GetConstantArg(1));
-    if (pattern->is_null) return;
-    string pattern_str(reinterpret_cast<const char*>(pattern->ptr), pattern->len);
-    string search_string;
-    // The following four conditionals check if the pattern is a constant string,
-    // starts with a constant string and is followed by any number of wildcard characters,
-    // ends with a constant string and is preceded by any number of wildcard characters or
-    // has a constant substring surrounded on both sides by any number of wildcard
-    // characters. In any of these conditions, we can search for the pattern more
-    // efficiently by using our own string match functions rather than regex matching.
-    if (case_sensitive && RE2::FullMatch(pattern_str, EQUALS_RE, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantEqualsFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, STARTS_WITH_RE, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantStartsWithFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, ENDS_WITH_RE, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantEndsWithFn;
-    } else if (case_sensitive &&
-        RE2::FullMatch(pattern_str, SUBSTRING_RE, &search_string)) {
-      state->SetSearchString(search_string);
-      state->function_ = ConstantSubstringFn;
-    } else {
-      RE2::Options opts;
-      opts.set_case_sensitive(case_sensitive);
-      state->regex_.reset(new RE2(pattern_str, opts));
-      if (!state->regex_->ok()) {
-        stringstream error;
-        error << "Invalid regex expression" << pattern->ptr;
-        context->SetError(error.str().c_str());
-      }
-      state->function_ = ConstantRegexFnPartial;
-    }
-  }
-}
-
 BooleanVal LikePredicate::Regex(FunctionContext* context, const StringVal& val,
     const StringVal& pattern) {
   LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
@@ -191,238 +32,9 @@ BooleanVal LikePredicate::Regex(FunctionContext* context, const StringVal& val,
   return (state->function_)(context, val, pattern);
 }
 
-// This prepare function is used only when 3 parameters are passed to the regexp_like()
-// function. For the 2 parameter version, the RegexPrepare() function is used to prepare.
-void LikePredicate::RegexpLikePrepare(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  if (scope != FunctionContext::THREAD_LOCAL) return;
-  LikePredicateState* state = new LikePredicateState();
-  context->SetFunctionState(scope, state);
-  // If both the pattern and the match parameter are constant, we pre-compile the
-  // regular expression once here. Otherwise, the RE is compiled per row in RegexpLike()
-  if (context->IsArgConstant(1) && context->IsArgConstant(2)) {
-    StringVal* pattern;
-    pattern = reinterpret_cast<StringVal*>(context->GetConstantArg(1));
-    if (pattern->is_null) return;
-    StringVal* match_parameter = reinterpret_cast<StringVal*>(context->GetConstantArg(2));
-    stringstream error;
-    if (match_parameter->is_null) {
-      error << "NULL match parameter";
-      context->SetError(error.str().c_str());
-      return;
-    }
-    RE2::Options opts;
-    string error_str;
-    if (!StringFunctions::SetRE2Options(*match_parameter, &error_str, &opts)) {
-      context->SetError(error_str.c_str());
-      return;
-    }
-    string pattern_str(reinterpret_cast<const char*>(pattern->ptr), pattern->len);
-    state->regex_.reset(new RE2(pattern_str, opts));
-    if (!state->regex_->ok()) {
-      error << "Invalid regex expression" << pattern->ptr;
-      context->SetError(error.str().c_str());
-    }
-  }
-}
-
-// This is used only for the 3 parameter version of regexp_like(). The 2 parameter
-// version calls Regex() directly.
 BooleanVal LikePredicate::RegexpLike(FunctionContext* context, const StringVal& val,
     const StringVal& pattern, const StringVal& match_parameter) {
-  if (val.is_null || pattern.is_null) return BooleanVal::null();
-  // If either the pattern or the third optional match parameter are not constant, we
-  // have to recompile the RE for every row.
-  if (!context->IsArgConstant(2) || !context->IsArgConstant(1)) {
-    if (match_parameter.is_null) return BooleanVal::null();
-    RE2::Options opts;
-    string error_str;
-    if (!StringFunctions::SetRE2Options(match_parameter, &error_str, &opts)) {
-      context->SetError(error_str.c_str());
-      return BooleanVal(false);
-    }
-    string re_pattern(reinterpret_cast<const char*>(pattern.ptr), pattern.len);
-    re2::RE2 re(re_pattern, opts);
-    if (re.ok()) {
-      return RE2::PartialMatch(re2::StringPiece(
-          reinterpret_cast<const char*>(val.ptr), val.len), re);
-    } else {
-      context->SetError(
-          strings::Substitute("Invalid regex: $0", pattern.ptr).c_str());
-      return BooleanVal(false);
-    }
-  }
-  return ConstantRegexFnPartial(context, val, pattern);
-}
-
-void LikePredicate::RegexClose(FunctionContext* context,
-    FunctionContext::FunctionStateScope scope) {
-  if (scope == FunctionContext::THREAD_LOCAL) {
-    LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-        context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-    delete state;
-  }
-}
-
-BooleanVal LikePredicate::RegexFn(FunctionContext* context, const StringVal& val,
-    const StringVal& pattern) {
-  return RegexMatch(context, val, pattern, false);
-}
-
-BooleanVal LikePredicate::LikeFn(FunctionContext* context, const StringVal& val,
-    const StringVal& pattern) {
-  return RegexMatch(context, val, pattern, true);
-}
-
-BooleanVal LikePredicate::ConstantSubstringFn(FunctionContext* context,
-    const StringVal& val, const StringVal& pattern) {
-  if (val.is_null) return BooleanVal::null();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  if (state->search_string_sv_.len == 0) return BooleanVal(true);
-  StringValue pattern_value = StringValue::FromStringVal(val);
-  return BooleanVal(state->substring_pattern_.Search(&pattern_value) != -1);
-}
-
-BooleanVal LikePredicate::ConstantStartsWithFn(FunctionContext* context,
-    const StringVal& val, const StringVal& pattern) {
-  if (val.is_null) return BooleanVal::null();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  if (val.len < state->search_string_sv_.len) {
-    return BooleanVal(false);
-  } else {
-    StringValue v =
-        StringValue(reinterpret_cast<char*>(val.ptr), state->search_string_sv_.len);
-    return BooleanVal(state->search_string_sv_.Eq((v)));
-  }
-}
-
-BooleanVal LikePredicate::ConstantEndsWithFn(FunctionContext* context,
-    const StringVal& val, const StringVal& pattern) {
-  if (val.is_null) return BooleanVal::null();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  if (val.len < state->search_string_sv_.len) {
-    return BooleanVal(false);
-  } else {
-    char* ptr =
-        reinterpret_cast<char*>(val.ptr) + val.len - state->search_string_sv_.len;
-    int len = state->search_string_sv_.len;
-    StringValue v = StringValue(ptr, len);
-    return BooleanVal(state->search_string_sv_.Eq(v));
-  }
-}
-
-BooleanVal LikePredicate::ConstantEqualsFn(FunctionContext* context, const StringVal& val,
-    const StringVal& pattern) {
-  if (val.is_null) return BooleanVal::null();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  return BooleanVal(state->search_string_sv_.Eq(StringValue::FromStringVal(val)));
-}
-
-BooleanVal LikePredicate::ConstantRegexFnPartial(FunctionContext* context,
-    const StringVal& val, const StringVal& pattern) {
-  if (val.is_null) return BooleanVal::null();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  re2::StringPiece operand_sp(reinterpret_cast<const char*>(val.ptr), val.len);
-  return RE2::PartialMatch(operand_sp, *state->regex_);
-}
-
-BooleanVal LikePredicate::ConstantRegexFn(FunctionContext* context,
-    const StringVal& val, const StringVal& pattern) {
-  if (val.is_null) return BooleanVal::null();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  re2::StringPiece operand_sp(reinterpret_cast<const char*>(val.ptr), val.len);
-  return RE2::FullMatch(operand_sp, *state->regex_);
-}
-
-BooleanVal LikePredicate::RegexMatch(FunctionContext* context,
-    const StringVal& operand_value, const StringVal& pattern_value,
-    bool is_like_pattern) {
-  if (operand_value.is_null || pattern_value.is_null) return BooleanVal::null();
-  if (context->IsArgConstant(1)) {
-    LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-        context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-    if (is_like_pattern) {
-      return RE2::FullMatch(re2::StringPiece(reinterpret_cast<const char*>(
-          operand_value.ptr), operand_value.len), *state->regex_.get());
-    } else {
-      return RE2::PartialMatch(re2::StringPiece(reinterpret_cast<const char*>(
-          operand_value.ptr), operand_value.len), *state->regex_.get());
-    }
-  } else {
-    string re_pattern;
-    RE2::Options opts;
-    if (is_like_pattern) {
-      ConvertLikePattern(context, pattern_value, &re_pattern);
-      opts.set_never_nl(false);
-      opts.set_dot_nl(true);
-    } else {
-      re_pattern =
-        string(reinterpret_cast<const char*>(pattern_value.ptr), pattern_value.len);
-    }
-    re2::RE2 re(re_pattern, opts);
-    if (re.ok()) {
-      if (is_like_pattern) {
-        return RE2::FullMatch(re2::StringPiece(
-            reinterpret_cast<const char*>(operand_value.ptr), operand_value.len), re);
-      } else {
-        return RE2::PartialMatch(re2::StringPiece(
-            reinterpret_cast<const char*>(operand_value.ptr), operand_value.len), re);
-      }
-    } else {
-      context->SetError(
-          strings::Substitute("Invalid regex: $0", pattern_value.ptr).c_str());
-      return BooleanVal(false);
-    }
-  }
-}
-
-void LikePredicate::ConvertLikePattern(FunctionContext* context, const StringVal& pattern,
-    string* re_pattern) {
-  re_pattern->clear();
-  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
-      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
-  bool is_escaped = false;
-  for (int i = 0; i < pattern.len; ++i) {
-    if (!is_escaped && pattern.ptr[i] == '%') {
-      re_pattern->append(".*");
-    } else if (!is_escaped && pattern.ptr[i] == '_') {
-      re_pattern->append(".");
-    // check for escape char before checking for regex special chars, they might overlap
-    } else if (!is_escaped && pattern.ptr[i] == state->escape_char_) {
-      is_escaped = true;
-    } else if (
-        pattern.ptr[i] == '.'
-        || pattern.ptr[i] == '['
-        || pattern.ptr[i] == ']'
-        || pattern.ptr[i] == '{'
-        || pattern.ptr[i] == '}'
-        || pattern.ptr[i] == '('
-        || pattern.ptr[i] == ')'
-        || pattern.ptr[i] == '\\'
-        || pattern.ptr[i] == '*'
-        || pattern.ptr[i] == '+'
-        || pattern.ptr[i] == '?'
-        || pattern.ptr[i] == '|'
-        || pattern.ptr[i] == '^'
-        || pattern.ptr[i] == '$'
-        ) {
-      // escape all regex special characters; see list at
-      // http://www.boost.org/doc/libs/1_47_0/libs/regex/doc/html/boost_regex/syntax/basic_extended.html
-      re_pattern->append("\\");
-      re_pattern->append(1, pattern.ptr[i]);
-      is_escaped = false;
-    } else {
-      // regular character or escaped special character
-      re_pattern->append(1, pattern.ptr[i]);
-      is_escaped = false;
-    }
-  }
+  return RegexpLikeInternal(context, val, pattern, match_parameter);
 }
 
 }  // namespace impala

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7b7c3ec/be/src/exprs/like-predicate.cc
----------------------------------------------------------------------
diff --git a/be/src/exprs/like-predicate.cc b/be/src/exprs/like-predicate.cc
new file mode 100644
index 0000000..fd73089
--- /dev/null
+++ b/be/src/exprs/like-predicate.cc
@@ -0,0 +1,414 @@
+// Copyright 2012 Cloudera Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "exprs/like-predicate.h"
+
+#include <string.h>
+#include <re2/re2.h>
+#include <re2/stringpiece.h>
+#include <sstream>
+
+#include "gutil/strings/substitute.h"
+#include "runtime/string-value.inline.h"
+#include "string-functions.h"
+#include "common/names.h"
+
+using namespace impala_udf;
+using namespace re2;
+
+namespace impala {
+// A regex to match any regex pattern is equivalent to a substring search.
+static const RE2 SUBSTRING_RE(
+    "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*");
+
+// A regex to match any regex pattern which is equivalent to matching a constant string
+// at the end of the string values.
+static const RE2 ENDS_WITH_RE(
+    "(?:\\.\\*)*([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$");
+
+// A regex to match any regex pattern which is equivalent to matching a constant string
+// at the end of the string values.
+static const RE2 STARTS_WITH_RE(
+    "\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)(?:\\.\\*)*");
+
+// A regex to match any regex pattern which is equivalent to a constant string match.
+static const RE2 EQUALS_RE("\\^([^\\.\\^\\{\\[\\(\\|\\)\\]\\}\\+\\*\\?\\$\\\\]*)\\$");
+
+LikePredicate::LikePredicate(const TExprNode& node)
+  : Predicate(node) {
+}
+
+LikePredicate::~LikePredicate() {
+}
+
+void LikePredicate::LikePrepare(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  LikePrepareInternal(context, scope, true);
+}
+
+void LikePredicate::ILikePrepare(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  LikePrepareInternal(context, scope, false);
+}
+
+// TODO: make class StringValue and StringSearch accept a case-sensitive flag and
+// switch back to using the cheaper Constant<>() functions.
+void LikePredicate::LikePrepareInternal(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope, bool case_sensitive) {
+  if (scope != FunctionContext::THREAD_LOCAL) return;
+  LikePredicateState* state = new LikePredicateState();
+  state->function_ = LikeFn;
+  context->SetFunctionState(scope, state);
+  if (context->IsArgConstant(1)) {
+    StringVal pattern_val = *reinterpret_cast<StringVal*>(context->GetConstantArg(1));
+    if (pattern_val.is_null) return;
+    StringValue pattern = StringValue::FromStringVal(pattern_val);
+    re2::RE2 substring_re("(?:%+)([^%_]*)(?:%+)");
+    re2::RE2 ends_with_re("(?:%+)([^%_]*)");
+    re2::RE2 starts_with_re("([^%_]*)(?:%+)");
+    re2::RE2 equals_re("([^%_]*)");
+    string pattern_str(pattern.ptr, pattern.len);
+    string search_string;
+    if (case_sensitive && RE2::FullMatch(pattern_str, substring_re, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantSubstringFn;
+    } else if (case_sensitive &&
+        RE2::FullMatch(pattern_str, starts_with_re, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantStartsWithFn;
+    } else if (case_sensitive &&
+        RE2::FullMatch(pattern_str, ends_with_re, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantEndsWithFn;
+    } else if (case_sensitive &&
+        RE2::FullMatch(pattern_str, equals_re, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantEqualsFn;
+    } else {
+      string re_pattern;
+      ConvertLikePattern(context,
+          *reinterpret_cast<StringVal*>(context->GetConstantArg(1)), &re_pattern);
+      RE2::Options opts;
+      opts.set_never_nl(false);
+      opts.set_dot_nl(true);
+      opts.set_case_sensitive(case_sensitive);
+      state->regex_.reset(new RE2(re_pattern, opts));
+      if (!state->regex_->ok()) {
+        context->SetError(
+            strings::Substitute("Invalid regex: $0", pattern_val.ptr).c_str());
+      }
+    }
+  }
+}
+
+void LikePredicate::LikeClose(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  if (scope == FunctionContext::THREAD_LOCAL) {
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+    delete state;
+  }
+}
+
+void LikePredicate::RegexPrepare(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  RegexPrepareInternal(context, scope, true);
+}
+
+void LikePredicate::IRegexPrepare(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  RegexPrepareInternal(context, scope, false);
+}
+
+void LikePredicate::RegexPrepareInternal(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope, bool case_sensitive) {
+  if (scope != FunctionContext::THREAD_LOCAL) return;
+  LikePredicateState* state = new LikePredicateState();
+  context->SetFunctionState(scope, state);
+  state->function_ = RegexFn;
+  if (context->IsArgConstant(1)) {
+    StringVal* pattern = reinterpret_cast<StringVal*>(context->GetConstantArg(1));
+    if (pattern->is_null) return;
+    string pattern_str(reinterpret_cast<const char*>(pattern->ptr), pattern->len);
+    string search_string;
+    // The following four conditionals check if the pattern is a constant string,
+    // starts with a constant string and is followed by any number of wildcard characters,
+    // ends with a constant string and is preceded by any number of wildcard characters or
+    // has a constant substring surrounded on both sides by any number of wildcard
+    // characters. In any of these conditions, we can search for the pattern more
+    // efficiently by using our own string match functions rather than regex matching.
+    if (case_sensitive && RE2::FullMatch(pattern_str, EQUALS_RE, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantEqualsFn;
+    } else if (case_sensitive &&
+        RE2::FullMatch(pattern_str, STARTS_WITH_RE, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantStartsWithFn;
+    } else if (case_sensitive &&
+        RE2::FullMatch(pattern_str, ENDS_WITH_RE, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantEndsWithFn;
+    } else if (case_sensitive &&
+        RE2::FullMatch(pattern_str, SUBSTRING_RE, &search_string)) {
+      state->SetSearchString(search_string);
+      state->function_ = ConstantSubstringFn;
+    } else {
+      RE2::Options opts;
+      opts.set_case_sensitive(case_sensitive);
+      state->regex_.reset(new RE2(pattern_str, opts));
+      if (!state->regex_->ok()) {
+        stringstream error;
+        error << "Invalid regex expression" << pattern->ptr;
+        context->SetError(error.str().c_str());
+      }
+      state->function_ = ConstantRegexFnPartial;
+    }
+  }
+}
+
+// This prepare function is used only when 3 parameters are passed to the regexp_like()
+// function. For the 2 parameter version, the RegexPrepare() function is used to prepare.
+void LikePredicate::RegexpLikePrepare(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  if (scope != FunctionContext::THREAD_LOCAL) return;
+  LikePredicateState* state = new LikePredicateState();
+  context->SetFunctionState(scope, state);
+  // If both the pattern and the match parameter are constant, we pre-compile the
+  // regular expression once here. Otherwise, the RE is compiled per row in RegexpLike()
+  if (context->IsArgConstant(1) && context->IsArgConstant(2)) {
+    StringVal* pattern;
+    pattern = reinterpret_cast<StringVal*>(context->GetConstantArg(1));
+    if (pattern->is_null) return;
+    StringVal* match_parameter = reinterpret_cast<StringVal*>(context->GetConstantArg(2));
+    stringstream error;
+    if (match_parameter->is_null) {
+      error << "NULL match parameter";
+      context->SetError(error.str().c_str());
+      return;
+    }
+    RE2::Options opts;
+    string error_str;
+    if (!StringFunctions::SetRE2Options(*match_parameter, &error_str, &opts)) {
+      context->SetError(error_str.c_str());
+      return;
+    }
+    string pattern_str(reinterpret_cast<const char*>(pattern->ptr), pattern->len);
+    state->regex_.reset(new RE2(pattern_str, opts));
+    if (!state->regex_->ok()) {
+      error << "Invalid regex expression" << pattern->ptr;
+      context->SetError(error.str().c_str());
+    }
+  }
+}
+
+// This is used only for the 3 parameter version of regexp_like(). The 2 parameter
+// version calls Regex() directly.
+BooleanVal LikePredicate::RegexpLikeInternal(FunctionContext* context,
+    const StringVal& val, const StringVal& pattern, const StringVal& match_parameter) {
+  if (val.is_null || pattern.is_null) return BooleanVal::null();
+  // If either the pattern or the third optional match parameter are not constant, we
+  // have to recompile the RE for every row.
+  if (!context->IsArgConstant(2) || !context->IsArgConstant(1)) {
+    if (match_parameter.is_null) return BooleanVal::null();
+    RE2::Options opts;
+    string error_str;
+    if (!StringFunctions::SetRE2Options(match_parameter, &error_str, &opts)) {
+      context->SetError(error_str.c_str());
+      return BooleanVal(false);
+    }
+    string re_pattern(reinterpret_cast<const char*>(pattern.ptr), pattern.len);
+    re2::RE2 re(re_pattern, opts);
+    if (re.ok()) {
+      return RE2::PartialMatch(re2::StringPiece(
+          reinterpret_cast<const char*>(val.ptr), val.len), re);
+    } else {
+      context->SetError(
+          strings::Substitute("Invalid regex: $0", pattern.ptr).c_str());
+      return BooleanVal(false);
+    }
+  }
+  return ConstantRegexFnPartial(context, val, pattern);
+}
+
+void LikePredicate::RegexClose(FunctionContext* context,
+    FunctionContext::FunctionStateScope scope) {
+  if (scope == FunctionContext::THREAD_LOCAL) {
+    LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+        context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+    delete state;
+  }
+}
+
+BooleanVal LikePredicate::RegexFn(FunctionContext* context, const StringVal& val,
+    const StringVal& pattern) {
+  return RegexMatch(context, val, pattern, false);
+}
+
+BooleanVal LikePredicate::LikeFn(FunctionContext* context, const StringVal& val,
+    const StringVal& pattern) {
+  return RegexMatch(context, val, pattern, true);
+}
+
+BooleanVal LikePredicate::ConstantSubstringFn(FunctionContext* context,
+    const StringVal& val, const StringVal& pattern) {
+  if (val.is_null) return BooleanVal::null();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  if (state->search_string_sv_.len == 0) return BooleanVal(true);
+  StringValue pattern_value = StringValue::FromStringVal(val);
+  return BooleanVal(state->substring_pattern_.Search(&pattern_value) != -1);
+}
+
+BooleanVal LikePredicate::ConstantStartsWithFn(FunctionContext* context,
+    const StringVal& val, const StringVal& pattern) {
+  if (val.is_null) return BooleanVal::null();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  if (val.len < state->search_string_sv_.len) {
+    return BooleanVal(false);
+  } else {
+    StringValue v =
+        StringValue(reinterpret_cast<char*>(val.ptr), state->search_string_sv_.len);
+    return BooleanVal(state->search_string_sv_.Eq((v)));
+  }
+}
+
+BooleanVal LikePredicate::ConstantEndsWithFn(FunctionContext* context,
+    const StringVal& val, const StringVal& pattern) {
+  if (val.is_null) return BooleanVal::null();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  if (val.len < state->search_string_sv_.len) {
+    return BooleanVal(false);
+  } else {
+    char* ptr =
+        reinterpret_cast<char*>(val.ptr) + val.len - state->search_string_sv_.len;
+    int len = state->search_string_sv_.len;
+    StringValue v = StringValue(ptr, len);
+    return BooleanVal(state->search_string_sv_.Eq(v));
+  }
+}
+
+BooleanVal LikePredicate::ConstantEqualsFn(FunctionContext* context, const StringVal& val,
+    const StringVal& pattern) {
+  if (val.is_null) return BooleanVal::null();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  return BooleanVal(state->search_string_sv_.Eq(StringValue::FromStringVal(val)));
+}
+
+BooleanVal LikePredicate::ConstantRegexFnPartial(FunctionContext* context,
+    const StringVal& val, const StringVal& pattern) {
+  if (val.is_null) return BooleanVal::null();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  re2::StringPiece operand_sp(reinterpret_cast<const char*>(val.ptr), val.len);
+  return RE2::PartialMatch(operand_sp, *state->regex_);
+}
+
+BooleanVal LikePredicate::ConstantRegexFn(FunctionContext* context,
+    const StringVal& val, const StringVal& pattern) {
+  if (val.is_null) return BooleanVal::null();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  re2::StringPiece operand_sp(reinterpret_cast<const char*>(val.ptr), val.len);
+  return RE2::FullMatch(operand_sp, *state->regex_);
+}
+
+BooleanVal LikePredicate::RegexMatch(FunctionContext* context,
+    const StringVal& operand_value, const StringVal& pattern_value,
+    bool is_like_pattern) {
+  if (operand_value.is_null || pattern_value.is_null) return BooleanVal::null();
+  if (context->IsArgConstant(1)) {
+    LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+        context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+    if (is_like_pattern) {
+      return RE2::FullMatch(re2::StringPiece(reinterpret_cast<const char*>(
+          operand_value.ptr), operand_value.len), *state->regex_.get());
+    } else {
+      return RE2::PartialMatch(re2::StringPiece(reinterpret_cast<const char*>(
+          operand_value.ptr), operand_value.len), *state->regex_.get());
+    }
+  } else {
+    string re_pattern;
+    RE2::Options opts;
+    if (is_like_pattern) {
+      ConvertLikePattern(context, pattern_value, &re_pattern);
+      opts.set_never_nl(false);
+      opts.set_dot_nl(true);
+    } else {
+      re_pattern =
+        string(reinterpret_cast<const char*>(pattern_value.ptr), pattern_value.len);
+    }
+    re2::RE2 re(re_pattern, opts);
+    if (re.ok()) {
+      if (is_like_pattern) {
+        return RE2::FullMatch(re2::StringPiece(
+            reinterpret_cast<const char*>(operand_value.ptr), operand_value.len), re);
+      } else {
+        return RE2::PartialMatch(re2::StringPiece(
+            reinterpret_cast<const char*>(operand_value.ptr), operand_value.len), re);
+      }
+    } else {
+      context->SetError(
+          strings::Substitute("Invalid regex: $0", pattern_value.ptr).c_str());
+      return BooleanVal(false);
+    }
+  }
+}
+
+void LikePredicate::ConvertLikePattern(FunctionContext* context, const StringVal& pattern,
+    string* re_pattern) {
+  re_pattern->clear();
+  LikePredicateState* state = reinterpret_cast<LikePredicateState*>(
+      context->GetFunctionState(FunctionContext::THREAD_LOCAL));
+  bool is_escaped = false;
+  for (int i = 0; i < pattern.len; ++i) {
+    if (!is_escaped && pattern.ptr[i] == '%') {
+      re_pattern->append(".*");
+    } else if (!is_escaped && pattern.ptr[i] == '_') {
+      re_pattern->append(".");
+    // check for escape char before checking for regex special chars, they might overlap
+    } else if (!is_escaped && pattern.ptr[i] == state->escape_char_) {
+      is_escaped = true;
+    } else if (
+        pattern.ptr[i] == '.'
+        || pattern.ptr[i] == '['
+        || pattern.ptr[i] == ']'
+        || pattern.ptr[i] == '{'
+        || pattern.ptr[i] == '}'
+        || pattern.ptr[i] == '('
+        || pattern.ptr[i] == ')'
+        || pattern.ptr[i] == '\\'
+        || pattern.ptr[i] == '*'
+        || pattern.ptr[i] == '+'
+        || pattern.ptr[i] == '?'
+        || pattern.ptr[i] == '|'
+        || pattern.ptr[i] == '^'
+        || pattern.ptr[i] == '$'
+        ) {
+      // escape all regex special characters; see list at
+      // http://www.boost.org/doc/libs/1_47_0/libs/regex/doc/html/boost_regex/syntax/basic_extended.html
+      re_pattern->append("\\");
+      re_pattern->append(1, pattern.ptr[i]);
+      is_escaped = false;
+    } else {
+      // regular character or escaped special character
+      re_pattern->append(1, pattern.ptr[i]);
+      is_escaped = false;
+    }
+  }
+}
+
+}  // namespace impala

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/c7b7c3ec/be/src/exprs/like-predicate.h
----------------------------------------------------------------------
diff --git a/be/src/exprs/like-predicate.h b/be/src/exprs/like-predicate.h
index 6c907f2..90c78ec 100644
--- a/be/src/exprs/like-predicate.h
+++ b/be/src/exprs/like-predicate.h
@@ -117,11 +117,18 @@ class LikePredicate: public Predicate {
   static void RegexpLikePrepare(impala_udf::FunctionContext* context,
       impala_udf::FunctionContext::FunctionStateScope scope);
 
-  /// Handles regexp_like() when 3 parameters are passed to it
+  /// The cross-compiled wrapper to call RegexpLikeInternal() which is not cross-compiled.
   static impala_udf::BooleanVal RegexpLike(impala_udf::FunctionContext* context,
       const impala_udf::StringVal& val, const impala_udf::StringVal& pattern,
       const impala_udf::StringVal& match_parameter);
 
+  /// Handles regexp_like() when 3 parameters are passed to it. This is intentionally
+  /// not cross-compiled as there is no performance benefit in doing so and it will
+  /// consume extra codegen time.
+  static impala_udf::BooleanVal RegexpLikeInternal(impala_udf::FunctionContext* context,
+      const impala_udf::StringVal& val, const impala_udf::StringVal& pattern,
+      const impala_udf::StringVal& match_parameter);
+
   static void RegexClose(impala_udf::FunctionContext*,
       impala_udf::FunctionContext::FunctionStateScope scope);