You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by js...@apache.org on 2016/04/01 22:25:31 UTC
drill git commit: DRILL-4551: Implement new functions (cot,
regex_matches, split_part, isdate)
Repository: drill
Updated Branches:
refs/heads/master 4442e162a -> e7e9b73c1
DRILL-4551: Implement new functions (cot, regex_matches, split_part, isdate)
Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/e7e9b73c
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/e7e9b73c
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/e7e9b73c
Branch: refs/heads/master
Commit: e7e9b73c1c31ddca93fb24a71c14faa968f27479
Parents: 4442e16
Author: Jason Altekruse <al...@gmail.com>
Authored: Mon Mar 28 11:55:11 2016 -0700
Committer: Jason Altekruse <al...@gmail.com>
Committed: Fri Apr 1 11:41:30 2016 -0700
----------------------------------------------------------------------
.../src/main/codegen/data/MathFunc.tdd | 13 +++
.../exec/expr/fn/impl/DateTypeFunctions.java | 54 +++++++++++
.../expr/fn/impl/StringFunctionHelpers.java | 31 ++++++-
.../exec/expr/fn/impl/StringFunctions.java | 95 +++++++++++++++++++-
.../exec/expr/fn/impl/TestStringFunctions.java | 91 +++++++++++++++++++
.../exec/fn/impl/TestNewDateFunctions.java | 33 +++++++
.../drill/exec/fn/impl/TestTrigFunctions.java | 35 ++++++++
7 files changed, 346 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/main/codegen/data/MathFunc.tdd
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/data/MathFunc.tdd b/exec/java-exec/src/main/codegen/data/MathFunc.tdd
index c8bd95c..f37310b 100644
--- a/exec/java-exec/src/main/codegen/data/MathFunc.tdd
+++ b/exec/java-exec/src/main/codegen/data/MathFunc.tdd
@@ -391,6 +391,19 @@ trigoMathFunctions : [
{input: "UInt4"},
{input: "UInt8"}
]
+ },
+ {className: "Cot", funcName: "cot", javaFunc : "1.0/java.lang.Math.tan", outputType: "Float8", types: [
+ {input: "Int"},
+ {input: "BigInt"},
+ {input: "Float4"},
+ {input: "Float8"},
+ {input: "SmallInt"},
+ {input: "TinyInt"},
+ {input: "UInt1"},
+ {input: "UInt2"},
+ {input: "UInt4"},
+ {input: "UInt8"}
+ ]
}
]
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java
index 41ce328..5a24af4 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java
@@ -29,10 +29,12 @@ import org.apache.drill.exec.expr.annotations.Output;
import org.apache.drill.exec.expr.annotations.Param;
import org.apache.drill.exec.expr.annotations.Workspace;
import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.BitHolder;
import org.apache.drill.exec.expr.holders.DateHolder;
import org.apache.drill.exec.expr.holders.IntervalDayHolder;
import org.apache.drill.exec.expr.holders.IntervalHolder;
import org.apache.drill.exec.expr.holders.IntervalYearHolder;
+import org.apache.drill.exec.expr.holders.NullableVarCharHolder;
import org.apache.drill.exec.expr.holders.TimeHolder;
import org.apache.drill.exec.expr.holders.TimeStampHolder;
import org.apache.drill.exec.expr.holders.VarCharHolder;
@@ -40,6 +42,58 @@ import org.apache.drill.exec.ops.ContextInformation;
public class DateTypeFunctions {
+ /**
+ * Function to check if a varchar value can be cast to a date.
+ *
+ * At the time of writing this function, several other databases were checked
+ * for behavior compatibility. There was not a consensus between oracle and
+ * Sql server about the expected behavior of this function, and Postgres
+ * lacks it completely.
+ *
+ * Sql Server appears to have both a DATEFORMAT and language locale setting
+ * that can change the values accepted by this function. Oracle appears to
+ * support several formats, some of which are not mentioned in the Sql
+ * Server docs. With the lack of standardization, we decided to implement
+ * this function so that it would only consider date strings that would be
+ * accepted by the cast function as valid.
+ */
+ @SuppressWarnings("unused")
+ @FunctionTemplate(name = "isdate", scope = FunctionTemplate.FunctionScope.SIMPLE, nulls=NullHandling.INTERNAL,
+ costCategory = FunctionTemplate.FunctionCostCategory.COMPLEX)
+ public static class IsDate implements DrillSimpleFunc {
+
+ @Param NullableVarCharHolder in;
+ @Output BitHolder out;
+
+ public void setup() { }
+
+ public void eval() {
+ // for a null input return false
+ if (in.isSet == 0) {
+ out.value = 0;
+ } else {
+ out.value = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.isReadableAsDate(in.buffer, in.start, in.end) ? 1 : 0;
+ }
+ }
+ }
+
+ // Same as above, just for required input
+ @SuppressWarnings("unused")
+ @FunctionTemplate(name = "isdate", scope = FunctionTemplate.FunctionScope.SIMPLE, nulls=NullHandling.INTERNAL,
+ costCategory = FunctionTemplate.FunctionCostCategory.COMPLEX)
+ public static class IsDateRequiredInput implements DrillSimpleFunc {
+
+ @Param VarCharHolder in;
+ @Output BitHolder out;
+
+ public void setup() { }
+
+ public void eval() {
+ // for a null input return false
+ out.value = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.isReadableAsDate(in.buffer, in.start, in.end) ? 1 : 0;
+ }
+ }
+
@FunctionTemplate(name = "intervaltype", scope = FunctionTemplate.FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
public static class IntervalType implements DrillSimpleFunc {
http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
index 06c9212..3bc8253 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
@@ -213,11 +213,35 @@ public class StringFunctionHelpers {
if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
buf.checkBytes(start, end);
}
- return memGetDate(buf.memoryAddress(), start, end);
+ int[] dateFields = memGetDate(buf.memoryAddress(), start, end);
+ return CHRONOLOGY.getDateTimeMillis(dateFields[0], dateFields[1], dateFields[2], 0);
}
+ /**
+ * Takes a string value, specified as a buffer with a start and end and
+ * returns true if the value can be read as a date.
+ *
+ * @param buf
+ * @param start
+ * @param end
+ * @return true iff the string value can be read as a date
+ */
+ public static boolean isReadableAsDate(DrillBuf buf, int start, int end){
+ // Tried looking for a method that would do this check without relying on
+ // an exception in the failure case (for better performance). Joda does
+ // not appear to provide such a function, so the try/catch block
+ // was chosen for compatibility with the getDate() method that actually
+ // returns the result of parsing.
+ try {
+ getDate(buf, start, end);
+ // the parsing from the line above succeeded, this was a valid date
+ return true;
+ } catch(IllegalArgumentException ex) {
+ return false;
+ }
+ }
- private static long memGetDate(long memoryAddress, int start, int end){
+ private static int[] memGetDate(long memoryAddress, int start, int end){
long index = memoryAddress + start;
final long endIndex = memoryAddress + end;
int digit = 0;
@@ -255,7 +279,6 @@ public class StringFunctionHelpers {
dateFields[0] += 1900;
}
}
-
- return CHRONOLOGY.getDateTimeMillis(dateFields[0], dateFields[1], dateFields[2], 0);
+ return dateFields;
}
}
http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
index 112f5fd..78de816 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
@@ -151,7 +151,7 @@ public class StringFunctions{
@Override
public void setup() {
- matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexSimilar(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer))).matcher("");
+ matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexSimilar(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer))).matcher("");
}
@Override
@@ -200,7 +200,7 @@ public class StringFunctions{
@Override
public void setup() {
- matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)).matcher("");
+ matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)).matcher("");
}
@Override
@@ -215,6 +215,33 @@ public class StringFunctions{
}
}
+ /*
+ * Match the given input against a regular expression.
+ *
+ * This differs from the "similar" function in that accepts a standard regex, rather than a SQL regex.
+ */
+ @FunctionTemplate(name = "regexp_matches", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
+ public static class RegexpMatches implements DrillSimpleFunc {
+
+ @Param VarCharHolder input;
+ @Param(constant=true) VarCharHolder pattern;
+ @Inject DrillBuf buffer;
+ @Workspace java.util.regex.Matcher matcher;
+ @Output BitHolder out;
+
+ @Override
+ public void setup() {
+ matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)).matcher("");
+ }
+
+ @Override
+ public void eval() {
+ final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
+ matcher.reset(i);
+ out.value = matcher.matches()? 1:0;
+ }
+ }
+
@FunctionTemplate(names = {"char_length", "character_length", "length"}, scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
public static class CharLength implements DrillSimpleFunc {
@Param VarCharHolder input;
@@ -304,6 +331,70 @@ public class StringFunctions{
}
+
+ @FunctionTemplate(name = "split_part", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
+ public static class SplitPart implements DrillSimpleFunc {
+ @Param VarCharHolder str;
+ @Param VarCharHolder splitter;
+ @Param IntHolder index;
+
+ @Output VarCharHolder out;
+
+ @Override
+ public void setup() {}
+
+ @Override
+ public void eval() {
+ if (index.value < 1) {
+ throw org.apache.drill.common.exceptions.UserException.functionError()
+ .message("Index in split_part must be positive, value provided was " + index.value).build();
+ }
+ int bufPos = str.start;
+ out.start = bufPos;
+ boolean beyondLastIndex = false;
+ int splitterLen = (splitter.end - splitter.start);
+ for (int i = 1; i < index.value + 1; i++) {
+ //Do string match.
+ final int pos = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.stringLeftMatchUTF8(str.buffer,
+ bufPos, str.end,
+ splitter.buffer, splitter.start, splitter.end);
+ if (pos < 0) {
+ // this is the last iteration, it is okay to hit the end of the string
+ if (i == index.value) {
+ bufPos = str.end;
+ // when the output is terminated by the end of the string we do not want
+ // to subtract the length of the splitter from the output at the end of
+ // the function below
+ splitterLen = 0;
+ break;
+ } else {
+ beyondLastIndex = true;
+ break;
+ }
+ } else {
+ // Count the # of characters. (one char could have 1-4 bytes)
+ // unlike the position function don't add 1, we are not translating the positions into SQL user level 1 based indices
+ bufPos = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(str.buffer, str.start, pos)
+ + splitterLen;
+ // if this is the second to last iteration, store the position again, as the start and end of the
+ // string to be returned need to be available
+ if (i == index.value - 1) {
+ out.start = bufPos;
+ }
+ }
+ }
+ if (beyondLastIndex) {
+ out.start = 0;
+ out.end = 0;
+ out.buffer = str.buffer;
+ } else {
+ out.buffer = str.buffer;
+ out.end = bufPos - splitterLen;
+ }
+ }
+
+ }
+
// same as function "position(substr, str) ", except the reverse order of argument.
@FunctionTemplate(name = "strpos", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
public static class Strpos implements DrillSimpleFunc {
http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
index 0ff789a..2efab3b 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
@@ -20,9 +20,100 @@ package org.apache.drill.exec.expr.fn.impl;
import org.apache.drill.BaseTestQuery;
import org.junit.Test;
+import static org.junit.Assert.assertTrue;
+
public class TestStringFunctions extends BaseTestQuery {
@Test
+ public void testStrPosMultiByte() throws Exception {
+ testBuilder()
+ .sqlQuery("select `position`('a', 'abc') res1 from (values(1))")
+ .ordered()
+ .baselineColumns("res1")
+ .baselineValues(1l)
+ .go();
+
+ testBuilder()
+ .sqlQuery("select `position`('\\u11E9', '\\u11E9\\u0031') res1 from (values(1))")
+ .ordered()
+ .baselineColumns("res1")
+ .baselineValues(1l)
+ .go();
+ }
+
+ @Test
+ public void testSplitPart() throws Exception {
+ testBuilder()
+ .sqlQuery("select split_part('abc~@~def~@~ghi', '~@~', 1) res1 from (values(1))")
+ .ordered()
+ .baselineColumns("res1")
+ .baselineValues("abc")
+ .go();
+
+ testBuilder()
+ .sqlQuery("select split_part('abc~@~def~@~ghi', '~@~', 2) res1 from (values(1))")
+ .ordered()
+ .baselineColumns("res1")
+ .baselineValues("def")
+ .go();
+
+ // invalid index
+ boolean expectedErrorEncountered;
+ try {
+ testBuilder()
+ .sqlQuery("select split_part('abc~@~def~@~ghi', '~@~', 0) res1 from (values(1))")
+ .ordered()
+ .baselineColumns("res1")
+ .baselineValues("abc")
+ .go();
+ expectedErrorEncountered = false;
+ } catch (Exception ex) {
+ assertTrue(ex.getMessage().contains("Index in split_part must be positive, value provided was 0"));
+ expectedErrorEncountered = true;
+ }
+ if (!expectedErrorEncountered) {
+ throw new RuntimeException("Missing expected error on invalid index for split_part function");
+ }
+
+ // with a multi-byte splitter
+ testBuilder()
+ .sqlQuery("select split_part('abc\\u1111drill\\u1111ghi', '\\u1111', 2) res1 from (values(1))")
+ .ordered()
+ .baselineColumns("res1")
+ .baselineValues("drill")
+ .go();
+
+ // going beyond the last available index, returns empty string
+ testBuilder()
+ .sqlQuery("select split_part('a,b,c', ',', 4) res1 from (values(1))")
+ .ordered()
+ .baselineColumns("res1")
+ .baselineValues("")
+ .go();
+
+ // if the delimiter does not appear in the string, 1 returns the whole string
+ testBuilder()
+ .sqlQuery("select split_part('a,b,c', ' ', 1) res1 from (values(1))")
+ .ordered()
+ .baselineColumns("res1")
+ .baselineValues("a,b,c")
+ .go();
+ }
+
+ @Test
+ public void testRegexpMatches() throws Exception {
+ testBuilder()
+ .sqlQuery("select regexp_matches(a, '^a.*') res1, regexp_matches(b, '^a.*') res2 " +
+ "from (values('abc', 'bcd'), ('bcd', 'abc')) as t(a,b)")
+ .unOrdered()
+ .baselineColumns("res1", "res2")
+ .baselineValues(true, false)
+ .baselineValues(false, true)
+ .build()
+ .run();
+ }
+
+ @Test
public void testILike() throws Exception {
testBuilder()
.sqlQuery("select n_name from cp.`tpch/nation.parquet` where ilike(n_name, '%united%') = true")
http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestNewDateFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestNewDateFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestNewDateFunctions.java
index 0e9276c..257cc53 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestNewDateFunctions.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestNewDateFunctions.java
@@ -23,12 +23,45 @@ import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.junit.Test;
+import java.sql.Date;
+
public class TestNewDateFunctions extends BaseTestQuery {
DateTime date;
DateTimeFormatter formatter;
long unixTimeStamp = -1;
@Test
+ public void testIsDate() throws Exception {
+ final String dateValues = "(values('1900-01-01'), ('3500-01-01'), ('2000-12-31'), ('2005-12-32'), ('2015-02-29'), (cast(null as varchar))) as t(date1)";
+ testBuilder()
+ .sqlQuery("select isDate(date1) res1 " +
+ "from " + dateValues)
+ .unOrdered()
+ .baselineColumns("res1")
+ .baselineValues(true)
+ .baselineValues(true)
+ .baselineValues(true)
+ .baselineValues(false)
+ .baselineValues(false)
+ .baselineValues(false)
+ .build()
+ .run();
+
+ testBuilder()
+ .sqlQuery("select case when isdate(date1) then cast(date1 as date) else null end res1 from " + dateValues)
+ .unOrdered()
+ .baselineColumns("res1")
+ .baselineValues(new DateTime(Date.valueOf("1900-01-01").getTime()))
+ .baselineValues(new DateTime(Date.valueOf("3500-01-01").getTime()))
+ .baselineValues(new DateTime(Date.valueOf("2000-12-31").getTime()))
+ .baselineValues(new Object[] {null})
+ .baselineValues(new Object[] {null})
+ .baselineValues(new Object[] {null})
+ .build()
+ .run();
+ }
+
+ @Test
public void testUnixTimeStampForDate() throws Exception {
formatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
date = formatter.parseDateTime("2009-03-20 11:30:01");
http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestTrigFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestTrigFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestTrigFunctions.java
new file mode 100644
index 0000000..de95360
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestTrigFunctions.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.fn.impl;
+
+import org.apache.drill.BaseTestQuery;
+import org.junit.Test;
+
+public class TestTrigFunctions extends BaseTestQuery {
+
+ @Test
+ public void testCot() throws Exception {
+ final String query = "select cot(radians(30)) as cot_result from (values(1))";
+ testBuilder()
+ .sqlQuery(query)
+ .ordered()
+ .baselineColumns("cot_result")
+ .baselineValues(1.7320508075688774)
+ .go();
+ }
+}