You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by hs...@apache.org on 2016/04/27 02:26:02 UTC
drill git commit: DRILL-4573: Zero copy LIKE, REGEXP_MATCHES, SUBSTR
Repository: drill
Updated Branches:
refs/heads/master a07f4de7e -> 8176fbca6
DRILL-4573: Zero copy LIKE, REGEXP_MATCHES, SUBSTR
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/8176fbca
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/8176fbca
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/8176fbca
Branch: refs/heads/master
Commit: 8176fbca6435ea2417a77b99e954ae10ca4bbdc4
Parents: a07f4de
Author: jean-claude cote <jc...@gmail.com>
Authored: Fri Apr 1 23:37:00 2016 -0400
Committer: Hsuan-Yi Chu <hs...@usc.edu>
Committed: Mon Apr 25 09:47:07 2016 -0700
----------------------------------------------------------------------
.../exec/expr/fn/impl/CharSequenceWrapper.java | 49 +++++++++++
.../expr/fn/impl/SimpleRepeatedFunctions.java | 11 ++-
.../exec/expr/fn/impl/StringFunctions.java | 91 +++++++++++++++-----
3 files changed, 128 insertions(+), 23 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/8176fbca/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/CharSequenceWrapper.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/CharSequenceWrapper.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/CharSequenceWrapper.java
new file mode 100644
index 0000000..6c475ed
--- /dev/null
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/CharSequenceWrapper.java
@@ -0,0 +1,49 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.expr.fn.impl;
+
+import io.netty.buffer.DrillBuf;
+
+public class CharSequenceWrapper implements CharSequence {
+
+ private int start;
+ private int end;
+ private DrillBuf buffer;
+
+ @Override
+ public int length() {
+ return end - start;
+ }
+
+ @Override
+ public char charAt(int index) {
+ return (char) buffer.getByte(start + index);
+ }
+
+ @Override
+ public CharSequence subSequence(int start, int end) {
+ throw new UnsupportedOperationException("Not implemented.");
+ }
+
+ public void setBuffer(int start, int end, DrillBuf buffer) {
+ this.start = start;
+ this.end = end;
+ this.buffer = buffer;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/drill/blob/8176fbca/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java
index 253c92a..7825289 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/SimpleRepeatedFunctions.java
@@ -327,6 +327,7 @@ public class SimpleRepeatedFunctions {
@Param VarCharHolder targetValue;
@Workspace VarCharHolder currVal;
@Workspace java.util.regex.Matcher matcher;
+ @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
@Output BitHolder out;
@@ -334,15 +335,19 @@ public class SimpleRepeatedFunctions {
currVal = new VarCharHolder();
matcher = java.util.regex.Pattern.compile(
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(targetValue.start, targetValue.end, targetValue.buffer)).matcher("");
+ charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+ matcher.reset(charSequenceWrapper);
}
public void eval() {
for (int i = listToSearch.start; i < listToSearch.end; i++) {
out.value = 0;
listToSearch.vector.getAccessor().get(i, currVal);
- String in = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(currVal.start, currVal.end, currVal.buffer);
- matcher.reset(in);
- if(matcher.find()) {
+ charSequenceWrapper.setBuffer(currVal.start, currVal.end, currVal.buffer);
+ // Reusing same charSequenceWrapper, no need to pass it in.
+ // This saves one method call since reset(CharSequence) calls reset()
+ matcher.reset();
+ if(matcher.find()) {
out.value = 1;
break;
}
http://git-wip-us.apache.org/repos/asf/drill/blob/8176fbca/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
index 78de816..0ce1c4e 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
@@ -55,17 +55,22 @@ public class StringFunctions{
@Param(constant=true) VarCharHolder pattern;
@Output BitHolder out;
@Workspace java.util.regex.Matcher matcher;
+ @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
@Override
public void setup() {
matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( //
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer))).matcher("");
+ charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+ matcher.reset(charSequenceWrapper);
}
@Override
public void eval() {
- String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
- matcher.reset(i);
+ charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+ // Reusing same charSequenceWrapper, no need to pass it in.
+ // This saves one method call since reset(CharSequence) calls reset()
+ matcher.reset();
out.value = matcher.matches()? 1:0;
}
}
@@ -78,18 +83,23 @@ public class StringFunctions{
@Param(constant=true) VarCharHolder escape;
@Output BitHolder out;
@Workspace java.util.regex.Matcher matcher;
+ @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
@Override
public void setup() {
matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( //
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer),
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer))).matcher("");
+ charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+ matcher.reset(charSequenceWrapper);
}
@Override
public void eval() {
- String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
- matcher.reset(i);
+ charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+ // Reusing same charSequenceWrapper, no need to pass it in.
+ // This saves one method call since reset(CharSequence) calls reset()
+ matcher.reset();
out.value = matcher.matches()? 1:0;
}
}
@@ -101,18 +111,23 @@ public class StringFunctions{
@Param(constant=true) VarCharHolder pattern;
@Output BitHolder out;
@Workspace java.util.regex.Matcher matcher;
+ @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
@Override
public void setup() {
matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexLike( //
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)),
java.util.regex.Pattern.CASE_INSENSITIVE).matcher("");
+ charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+ matcher.reset(charSequenceWrapper);
}
@Override
public void eval() {
- String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
- matcher.reset(i);
+ charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+ // Reusing same charSequenceWrapper, no need to pass it in.
+ // This saves one method call since reset(CharSequence) calls reset()
+ matcher.reset();
out.value = matcher.matches()? 1:0;
}
}
@@ -125,6 +140,7 @@ public class StringFunctions{
@Param(constant=true) VarCharHolder escape;
@Output BitHolder out;
@Workspace java.util.regex.Matcher matcher;
+ @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
@Override
public void setup() {
@@ -132,12 +148,16 @@ public class StringFunctions{
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer),
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer)),
java.util.regex.Pattern.CASE_INSENSITIVE).matcher("");
+ charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+ matcher.reset(charSequenceWrapper);
}
@Override
public void eval() {
- String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
- matcher.reset(i);
+ charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+ // Reusing same charSequenceWrapper, no need to pass it in.
+ // This saves one method call since reset(CharSequence) calls reset()
+ matcher.reset();
out.value = matcher.matches()? 1:0;
}
}
@@ -148,16 +168,21 @@ public class StringFunctions{
@Param(constant=true) VarCharHolder pattern;
@Output BitHolder out;
@Workspace java.util.regex.Matcher matcher;
+ @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
@Override
public void setup() {
matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexSimilar(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer))).matcher("");
+ charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+ matcher.reset(charSequenceWrapper);
}
@Override
public void eval() {
- String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
- matcher.reset(i);
+ charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+ // Reusing same charSequenceWrapper, no need to pass it in.
+ // This saves one method call since reset(CharSequence) calls reset()
+ matcher.reset();
out.value = matcher.matches()? 1:0;
}
}
@@ -169,18 +194,23 @@ public class StringFunctions{
@Param(constant=true) VarCharHolder escape;
@Output BitHolder out;
@Workspace java.util.regex.Matcher matcher;
+ @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
@Override
public void setup() {
matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexSimilar(
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer),
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(escape.start, escape.end, escape.buffer))).matcher("");
+ charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+ matcher.reset(charSequenceWrapper);
}
@Override
public void eval() {
- String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
- matcher.reset(i);
+ charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+ // Reusing same charSequenceWrapper, no need to pass it in.
+ // This saves one method call since reset(CharSequence) calls reset()
+ matcher.reset();
out.value = matcher.matches()? 1:0;
}
}
@@ -196,19 +226,25 @@ public class StringFunctions{
@Param VarCharHolder replacement;
@Inject DrillBuf buffer;
@Workspace java.util.regex.Matcher matcher;
+ @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
@Output VarCharHolder out;
@Override
public void setup() {
matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)).matcher("");
+ charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+ matcher.reset(charSequenceWrapper);
}
@Override
public void eval() {
out.start = 0;
- final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
+ charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+ // Reusing same charSequenceWrapper, no need to pass it in.
+ // This saves one method call since reset(CharSequence) calls reset()
+ matcher.reset();
final String r = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(replacement.start, replacement.end, replacement.buffer);
- final byte [] bytea = matcher.reset(i).replaceAll(r).getBytes(java.nio.charset.Charset.forName("UTF-8"));
+ final byte [] bytea = matcher.replaceAll(r).getBytes(java.nio.charset.Charset.forName("UTF-8"));
out.buffer = buffer = buffer.reallocIfNeeded(bytea.length);
out.buffer.setBytes(out.start, bytea);
out.end = bytea.length;
@@ -227,17 +263,22 @@ public class StringFunctions{
@Param(constant=true) VarCharHolder pattern;
@Inject DrillBuf buffer;
@Workspace java.util.regex.Matcher matcher;
+ @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
@Output BitHolder out;
@Override
public void setup() {
matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)).matcher("");
+ charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+ matcher.reset(charSequenceWrapper);
}
@Override
public void eval() {
- final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
- matcher.reset(i);
+ charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+ // Reusing same charSequenceWrapper, no need to pass it in.
+ // This saves one method call since reset(CharSequence) calls reset()
+ matcher.reset();
out.value = matcher.matches()? 1:0;
}
}
@@ -566,18 +607,23 @@ public class StringFunctions{
@Param(constant=true) VarCharHolder pattern;
@Output NullableVarCharHolder out;
@Workspace java.util.regex.Matcher matcher;
+ @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
@Override
public void setup() {
matcher = java.util.regex.Pattern.compile(
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer))
.matcher("");
+ charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+ matcher.reset(charSequenceWrapper);
}
@Override
public void eval() {
- final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
- matcher.reset(i);
+ charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+ // Reusing same charSequenceWrapper, no need to pass it in.
+ // This saves one method call since reset(CharSequence) calls reset()
+ matcher.reset();
if (matcher.find()) {
out.isSet = 1;
out.buffer = input.buffer;
@@ -593,12 +639,15 @@ public class StringFunctions{
@Param(constant=true) VarCharHolder pattern;
@Output NullableVarCharHolder out;
@Workspace java.util.regex.Matcher matcher;
+ @Workspace org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper charSequenceWrapper;
@Override
public void setup() {
matcher = java.util.regex.Pattern.compile(
org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer))
.matcher("");
+ charSequenceWrapper = new org.apache.drill.exec.expr.fn.impl.CharSequenceWrapper();
+ matcher.reset(charSequenceWrapper);
}
@Override
@@ -606,8 +655,10 @@ public class StringFunctions{
if (input.isSet == 0) {
out.isSet = 0;
} else {
- final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
- matcher.reset(i);
+ charSequenceWrapper.setBuffer(input.start, input.end, input.buffer);
+ // Reusing same charSequenceWrapper, no need to pass it in.
+ // This saves one method call since reset(CharSequence) calls reset()
+ matcher.reset();
if (matcher.find()) {
out.isSet = 1;
out.buffer = input.buffer;