You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by hs...@apache.org on 2016/04/27 02:26:02 UTC

drill git commit: DRILL-4573: Zero copy LIKE, REGEXP_MATCHES, SUBSTR

Repository: drill
Updated Branches:
  refs/heads/master a07f4de7e -> 8176fbca6


DRILL-4573: Zero copy LIKE, REGEXP_MATCHES, SUBSTR


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/8176fbca
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/8176fbca
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/8176fbca

Branch: refs/heads/master
Commit: 8176fbca6435ea2417a77b99e954ae10ca4bbdc4
Parents: a07f4de
Author: jean-claude cote <jc...@gmail.com>
Authored: Fri Apr 1 23:37:00 2016 -0400
Committer: Hsuan-Yi Chu <hs...@usc.edu>
Committed: Mon Apr 25 09:47:07 2016 -0700

----------------------------------------------------------------------
 .../exec/expr/fn/impl/CharSequenceWrapper.java  | 49 +++++++++++
 .../expr/fn/impl/SimpleRepeatedFunctions.java   | 11 ++-
 .../exec/expr/fn/impl/StringFunctions.java      | 91 +++++++++++++++-----
 3 files changed, 128 insertions(+), 23 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/8176fbca/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/CharSequenceWrapper.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/CharSequenceWrapper.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/CharSequenceWrapper.java
new file mode 100644
index 0000000..6c475ed
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/CharSequenceWrapper.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.fn.impl;
+
+import io.netty.buffer.DrillBuf;
+
+public class CharSequenceWrapper implements CharSequence {
+
+    private int start;
+    private int end;
+    private DrillBuf buffer;
+
+    @Override
+    public int length() {
+        return end - start;
+    }
+
+    @Override
+    public char charAt(int index) {
+        return (char) buffer.getByte(start + index);
+    }
+
+    @Override
+    public CharSequence subSequence(int start, int end) {
+        throw new UnsupportedOperationException("Not implemented.");
+    }
+
+    public void setBuffer(int start, int end, DrillBuf buffer) {
+        this.start = start;
+        this.end = end;
+        this.buffer = buffer;
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/8176fbca/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java
index 253c92a..7825289 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java
@@ -327,6 +327,7 @@ public class SimpleRepeatedFunctions {
     @Param VarCharHolder targetValue;
     @Workspace VarCharHolder currVal;
     @Workspace java.util.regex.Matcher matcher;
+    @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
 
     @Output BitHolder out;
 
@@ -334,15 +335,19 @@ public class SimpleRepeatedFunctions {
       currVal = new VarCharHolder();
       matcher = java.util.regex.Pattern.compile(
           org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(targetValue.start,  targetValue.end,  targetValue.buffer)).matcher("");
+      charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+      matcher.reset(charSequenceWrapper);
     }
 
     public void eval() {
       for (int i = listToSearch.start; i < listToSearch.end; i++) {
         out.value = 0;
         listToSearch.vector.getAccessor().get(i, currVal);
-        String in = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(currVal.start, currVal.end, currVal.buffer);
-        matcher.reset(in);
-          if(matcher.find()) {
+        charSequenceWrapper.setBuffer(currVal.start, currVal.end, currVal.buffer);
+        // Reusing same charSequenceWrapper, no need to pass it in.
+        // This saves one method call since reset(CharSequence) calls reset()
+        matcher.reset();
+        if(matcher.find()) {
              out.value = 1;
              break;
           }

http://git-wip-us.apache.org/repos/asf/drill/blob/8176fbca/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
index 78de816..0ce1c4e 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
@@ -55,17 +55,22 @@ public class StringFunctions{
     @Param(constant=true) VarCharHolder pattern;
     @Output BitHolder out;
     @Workspace java.util.regex.Matcher matcher;
+    @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
 
     @Override
     public void setup() {
       matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( //
           org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start,  pattern.end,  pattern.buffer))).matcher("");
+      charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+      matcher.reset(charSequenceWrapper);
     }
 
     @Override
     public void eval() {
-      String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
-      matcher.reset(i);
+      charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+      // Reusing same charSequenceWrapper, no need to pass it in.
+      // This saves one method call since reset(CharSequence) calls reset()
+      matcher.reset();
       out.value = matcher.matches()? 1:0;
     }
   }
@@ -78,18 +83,23 @@ public class StringFunctions{
     @Param(constant=true) VarCharHolder escape;
     @Output BitHolder out;
     @Workspace java.util.regex.Matcher matcher;
+    @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
 
     @Override
     public void setup() {
       matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( //
           org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start,  pattern.end,  pattern.buffer),
           org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start,  escape.end,  escape.buffer))).matcher("");
+      charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+      matcher.reset(charSequenceWrapper);
     }
 
     @Override
     public void eval() {
-      String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
-      matcher.reset(i);
+      charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+      // Reusing same charSequenceWrapper, no need to pass it in.
+      // This saves one method call since reset(CharSequence) calls reset()
+      matcher.reset();
       out.value = matcher.matches()? 1:0;
     }
   }
@@ -101,18 +111,23 @@ public class StringFunctions{
     @Param(constant=true) VarCharHolder pattern;
     @Output BitHolder out;
     @Workspace java.util.regex.Matcher matcher;
+    @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
 
     @Override
     public void setup() {
       matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( //
           org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start,  pattern.end,  pattern.buffer)),
           java.util.regex.Pattern.CASE_INSENSITIVE).matcher("");
+      charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+      matcher.reset(charSequenceWrapper);
     }
 
     @Override
     public void eval() {
-      String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
-      matcher.reset(i);
+      charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+      // Reusing same charSequenceWrapper, no need to pass it in.
+      // This saves one method call since reset(CharSequence) calls reset()
+      matcher.reset();
       out.value = matcher.matches()? 1:0;
     }
   }
@@ -125,6 +140,7 @@ public class StringFunctions{
     @Param(constant=true) VarCharHolder escape;
     @Output BitHolder out;
     @Workspace java.util.regex.Matcher matcher;
+    @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
 
     @Override
     public void setup() {
@@ -132,12 +148,16 @@ public class StringFunctions{
           org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start,  pattern.end,  pattern.buffer),
           org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start,  escape.end,  escape.buffer)),
           java.util.regex.Pattern.CASE_INSENSITIVE).matcher("");
+      charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+      matcher.reset(charSequenceWrapper);
     }
 
     @Override
     public void eval() {
-      String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
-      matcher.reset(i);
+      charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+      // Reusing same charSequenceWrapper, no need to pass it in.
+      // This saves one method call since reset(CharSequence) calls reset()
+      matcher.reset();
       out.value = matcher.matches()? 1:0;
     }
   }
@@ -148,16 +168,21 @@ public class StringFunctions{
     @Param(constant=true) VarCharHolder pattern;
     @Output BitHolder out;
     @Workspace java.util.regex.Matcher matcher;
+    @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
 
     @Override
     public void setup() {
       matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexSimilar(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer))).matcher("");
+      charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+      matcher.reset(charSequenceWrapper);
     }
 
     @Override
     public void eval() {
-      String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
-      matcher.reset(i);
+      charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+      // Reusing same charSequenceWrapper, no need to pass it in.
+      // This saves one method call since reset(CharSequence) calls reset()
+      matcher.reset();
       out.value = matcher.matches()? 1:0;
     }
   }
@@ -169,18 +194,23 @@ public class StringFunctions{
     @Param(constant=true) VarCharHolder escape;
     @Output BitHolder out;
     @Workspace java.util.regex.Matcher matcher;
+    @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
 
     @Override
     public void setup() {
       matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexSimilar(
           org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start,  pattern.end,  pattern.buffer),
           org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start,  escape.end,  escape.buffer))).matcher("");
+      charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+      matcher.reset(charSequenceWrapper);
     }
 
     @Override
     public void eval() {
-      String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
-      matcher.reset(i);
+      charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+      // Reusing same charSequenceWrapper, no need to pass it in.
+      // This saves one method call since reset(CharSequence) calls reset()
+      matcher.reset();
       out.value = matcher.matches()? 1:0;
     }
   }
@@ -196,19 +226,25 @@ public class StringFunctions{
     @Param VarCharHolder replacement;
     @Inject DrillBuf buffer;
     @Workspace java.util.regex.Matcher matcher;
+    @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
     @Output VarCharHolder out;
 
     @Override
     public void setup() {
       matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)).matcher("");
+      charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+      matcher.reset(charSequenceWrapper);
     }
 
     @Override
     public void eval() {
       out.start = 0;
-      final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
+      charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+      // Reusing same charSequenceWrapper, no need to pass it in.
+      // This saves one method call since reset(CharSequence) calls reset()
+      matcher.reset();
       final String r = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(replacement.start, replacement.end, replacement.buffer);
-      final byte [] bytea = matcher.reset(i).replaceAll(r).getBytes(java.nio.charset.Charset.forName("UTF-8"));
+      final byte [] bytea = matcher.replaceAll(r).getBytes(java.nio.charset.Charset.forName("UTF-8"));
       out.buffer = buffer = buffer.reallocIfNeeded(bytea.length);
       out.buffer.setBytes(out.start, bytea);
       out.end = bytea.length;
@@ -227,17 +263,22 @@ public class StringFunctions{
     @Param(constant=true) VarCharHolder pattern;
     @Inject DrillBuf buffer;
     @Workspace java.util.regex.Matcher matcher;
+    @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
     @Output BitHolder out;
 
     @Override
     public void setup() {
       matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start,  pattern.end,  pattern.buffer)).matcher("");
+      charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+      matcher.reset(charSequenceWrapper);
     }
 
     @Override
     public void eval() {
-      final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
-      matcher.reset(i);
+      charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+      // Reusing same charSequenceWrapper, no need to pass it in.
+      // This saves one method call since reset(CharSequence) calls reset()
+      matcher.reset();
       out.value = matcher.matches()? 1:0;
     }
   }
@@ -566,18 +607,23 @@ public class StringFunctions{
     @Param(constant=true) VarCharHolder pattern;
     @Output NullableVarCharHolder out;
     @Workspace java.util.regex.Matcher matcher;
+    @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
 
     @Override
     public void setup() {
       matcher = java.util.regex.Pattern.compile(
           org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start,  pattern.end,  pattern.buffer))
           .matcher("");
+      charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+      matcher.reset(charSequenceWrapper);
     }
 
     @Override
     public void eval() {
-      final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
-      matcher.reset(i);
+      charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+      // Reusing same charSequenceWrapper, no need to pass it in.
+      // This saves one method call since reset(CharSequence) calls reset()
+      matcher.reset();
       if (matcher.find()) {
         out.isSet = 1;
         out.buffer = input.buffer;
@@ -593,12 +639,15 @@ public class StringFunctions{
     @Param(constant=true) VarCharHolder pattern;
     @Output NullableVarCharHolder out;
     @Workspace java.util.regex.Matcher matcher;
+    @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
 
     @Override
     public void setup() {
       matcher = java.util.regex.Pattern.compile(
           org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start,  pattern.end,  pattern.buffer))
           .matcher("");
+      charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+      matcher.reset(charSequenceWrapper);
     }
 
     @Override
@@ -606,8 +655,10 @@ public class StringFunctions{
       if (input.isSet == 0) {
         out.isSet = 0;
       } else {
-        final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
-        matcher.reset(i);
+        charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+        // Reusing same charSequenceWrapper, no need to pass it in.
+        // This saves one method call since reset(CharSequence) calls reset()
+        matcher.reset();
         if (matcher.find()) {
           out.isSet = 1;
           out.buffer = input.buffer;