You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by js...@apache.org on 2016/04/01 22:25:31 UTC

drill git commit: DRILL-4551: Implement new functions (cot, regex_matches, split_part, isdate)

Repository: drill
Updated Branches:
  refs/heads/master 4442e162a -> e7e9b73c1


DRILL-4551: Implement new functions (cot, regex_matches, split_part, isdate)


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/e7e9b73c
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/e7e9b73c
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/e7e9b73c

Branch: refs/heads/master
Commit: e7e9b73c1c31ddca93fb24a71c14faa968f27479
Parents: 4442e16
Author: Jason Altekruse <al...@gmail.com>
Authored: Mon Mar 28 11:55:11 2016 -0700
Committer: Jason Altekruse <al...@gmail.com>
Committed: Fri Apr 1 11:41:30 2016 -0700

----------------------------------------------------------------------
 .../src/main/codegen/data/MathFunc.tdd          | 13 +++
 .../exec/expr/fn/impl/DateTypeFunctions.java    | 54 +++++++++++
 .../expr/fn/impl/StringFunctionHelpers.java     | 31 ++++++-
 .../exec/expr/fn/impl/StringFunctions.java      | 95 +++++++++++++++++++-
 .../exec/expr/fn/impl/TestStringFunctions.java  | 91 +++++++++++++++++++
 .../exec/fn/impl/TestNewDateFunctions.java      | 33 +++++++
 .../drill/exec/fn/impl/TestTrigFunctions.java   | 35 ++++++++
 7 files changed, 346 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/main/codegen/data/MathFunc.tdd
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/codegen/data/MathFunc.tdd b/exec/java-exec/src/main/codegen/data/MathFunc.tdd
index c8bd95c..f37310b 100644
--- a/exec/java-exec/src/main/codegen/data/MathFunc.tdd
+++ b/exec/java-exec/src/main/codegen/data/MathFunc.tdd
@@ -391,6 +391,19 @@ trigoMathFunctions : [
       {input: "UInt4"},
       {input: "UInt8"}
     ]
+ },
+ {className: "Cot", funcName: "cot", javaFunc : "1.0/java.lang.Math.tan", outputType: "Float8", types: [
+      {input: "Int"},
+      {input: "BigInt"},
+      {input: "Float4"},
+      {input: "Float8"},
+      {input: "SmallInt"},
+      {input: "TinyInt"},
+      {input: "UInt1"},
+      {input: "UInt2"},
+      {input: "UInt4"},
+      {input: "UInt8"}
+    ]
  }
 ]
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java
index 41ce328..5a24af4 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/DateTypeFunctions.java
@@ -29,10 +29,12 @@ import org.apache.drill.exec.expr.annotations.Output;
 import org.apache.drill.exec.expr.annotations.Param;
 import org.apache.drill.exec.expr.annotations.Workspace;
 import org.apache.drill.exec.expr.holders.BigIntHolder;
+import org.apache.drill.exec.expr.holders.BitHolder;
 import org.apache.drill.exec.expr.holders.DateHolder;
 import org.apache.drill.exec.expr.holders.IntervalDayHolder;
 import org.apache.drill.exec.expr.holders.IntervalHolder;
 import org.apache.drill.exec.expr.holders.IntervalYearHolder;
+import org.apache.drill.exec.expr.holders.NullableVarCharHolder;
 import org.apache.drill.exec.expr.holders.TimeHolder;
 import org.apache.drill.exec.expr.holders.TimeStampHolder;
 import org.apache.drill.exec.expr.holders.VarCharHolder;
@@ -40,6 +42,58 @@ import org.apache.drill.exec.ops.ContextInformation;
 
 public class DateTypeFunctions {
 
+    /**
+     * Function to check if a varchar value can be cast to a date.
+     *
+     * At the time of writing this function, several other databases were checked
+     * for behavior compatibility. There was not a consensus between oracle and
+     * Sql server about the expected behavior of this function, and Postgres
+     * lacks it completely.
+     *
+     * Sql Server appears to have both a DATEFORMAT and language locale setting
+     * that can change the values accepted by this function. Oracle appears to
+     * support several formats, some of which are not mentioned in the Sql
+     * Server docs. With the lack of standardization, we decided to implement
+     * this function so that it would only consider date strings that would be
+     * accepted by the cast function as valid.
+     */
+    @SuppressWarnings("unused")
+    @FunctionTemplate(name = "isdate", scope = FunctionTemplate.FunctionScope.SIMPLE, nulls=NullHandling.INTERNAL,
+        costCategory = FunctionTemplate.FunctionCostCategory.COMPLEX)
+    public static class IsDate implements DrillSimpleFunc {
+
+      @Param NullableVarCharHolder in;
+      @Output BitHolder out;
+
+      public void setup() { }
+
+      public void eval() {
+        // for a null input return false
+        if (in.isSet == 0) {
+          out.value = 0;
+        } else {
+          out.value = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.isReadableAsDate(in.buffer, in.start, in.end) ? 1 : 0;
+        }
+      }
+    }
+
+    // Same as above, just for required input
+    @SuppressWarnings("unused")
+    @FunctionTemplate(name = "isdate", scope = FunctionTemplate.FunctionScope.SIMPLE, nulls=NullHandling.INTERNAL,
+        costCategory = FunctionTemplate.FunctionCostCategory.COMPLEX)
+    public static class IsDateRequiredInput implements DrillSimpleFunc {
+
+      @Param VarCharHolder in;
+      @Output BitHolder out;
+
+      public void setup() { }
+
+      public void eval() {
+        // for a null input return false
+        out.value = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.isReadableAsDate(in.buffer, in.start, in.end) ? 1 : 0;
+      }
+    }
+
     @FunctionTemplate(name = "intervaltype", scope = FunctionTemplate.FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
     public static class IntervalType implements DrillSimpleFunc {
 

http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
index 06c9212..3bc8253 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctionHelpers.java
@@ -213,11 +213,35 @@ public class StringFunctionHelpers {
     if (BoundsChecking.BOUNDS_CHECKING_ENABLED) {
       buf.checkBytes(start, end);
     }
-    return memGetDate(buf.memoryAddress(), start, end);
+    int[] dateFields = memGetDate(buf.memoryAddress(), start, end);
+    return CHRONOLOGY.getDateTimeMillis(dateFields[0], dateFields[1], dateFields[2], 0);
   }
 
+  /**
+   * Takes a string value, specified as a buffer with a start and end and
+   * returns true if the value can be read as a date.
+   *
+   * @param buf
+   * @param start
+   * @param end
+   * @return true iff the string value can be read as a date
+   */
+  public static boolean isReadableAsDate(DrillBuf buf, int start, int end){
+    // Tried looking for a method that would do this check without relying on
+    // an exception in the failure case (for better performance). Joda does
+    // not appear to provide such a function, so the try/catch block
+    // was chosen for compatibility with the getDate() method that actually
+    // returns the result of parsing.
+    try {
+      getDate(buf, start, end);
+      // the parsing from the line above succeeded, this was a valid date
+      return true;
+    } catch(IllegalArgumentException ex) {
+      return false;
+    }
+  }
 
-  private static long memGetDate(long memoryAddress, int start, int end){
+  private static int[] memGetDate(long memoryAddress, int start, int end){
     long index = memoryAddress + start;
     final long endIndex = memoryAddress + end;
     int digit = 0;
@@ -255,7 +279,6 @@ public class StringFunctionHelpers {
         dateFields[0] += 1900;
       }
     }
-
-    return CHRONOLOGY.getDateTimeMillis(dateFields[0], dateFields[1], dateFields[2], 0);
+    return dateFields;
   }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
index 112f5fd..78de816 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/expr/fn/impl/StringFunctions.java
@@ -151,7 +151,7 @@ public class StringFunctions{
 
     @Override
     public void setup() {
-      matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexSimilar(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start,  pattern.end,  pattern.buffer))).matcher("");
+      matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.RegexpUtil.sqlToRegexSimilar(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer))).matcher("");
     }
 
     @Override
@@ -200,7 +200,7 @@ public class StringFunctions{
 
     @Override
     public void setup() {
-      matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start,  pattern.end,  pattern.buffer)).matcher("");
+      matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start, pattern.end, pattern.buffer)).matcher("");
     }
 
     @Override
@@ -215,6 +215,33 @@ public class StringFunctions{
     }
   }
 
+  /*
+   * Match the given input against a regular expression.
+   *
+   * This differs from the "similar" function in that accepts a standard regex, rather than a SQL regex.
+   */
+  @FunctionTemplate(name = "regexp_matches", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
+  public static class RegexpMatches implements DrillSimpleFunc {
+
+    @Param VarCharHolder input;
+    @Param(constant=true) VarCharHolder pattern;
+    @Inject DrillBuf buffer;
+    @Workspace java.util.regex.Matcher matcher;
+    @Output BitHolder out;
+
+    @Override
+    public void setup() {
+      matcher = java.util.regex.Pattern.compile(org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(pattern.start,  pattern.end,  pattern.buffer)).matcher("");
+    }
+
+    @Override
+    public void eval() {
+      final String i = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
+      matcher.reset(i);
+      out.value = matcher.matches()? 1:0;
+    }
+  }
+
   @FunctionTemplate(names = {"char_length", "character_length", "length"}, scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class CharLength implements DrillSimpleFunc {
     @Param  VarCharHolder input;
@@ -304,6 +331,70 @@ public class StringFunctions{
 
   }
 
+
+  @FunctionTemplate(name = "split_part", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
+  public static class SplitPart implements DrillSimpleFunc {
+    @Param  VarCharHolder str;
+    @Param  VarCharHolder splitter;
+    @Param  IntHolder index;
+
+    @Output VarCharHolder out;
+
+    @Override
+    public void setup() {}
+
+    @Override
+    public void eval() {
+      if (index.value < 1) {
+        throw org.apache.drill.common.exceptions.UserException.functionError()
+            .message("Index in split_part must be positive, value provided was " + index.value).build();
+      }
+      int bufPos = str.start;
+      out.start = bufPos;
+      boolean beyondLastIndex = false;
+      int splitterLen = (splitter.end - splitter.start);
+      for (int i = 1; i < index.value + 1; i++) {
+        //Do string match.
+        final int pos = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.stringLeftMatchUTF8(str.buffer,
+            bufPos, str.end,
+            splitter.buffer, splitter.start, splitter.end);
+        if (pos < 0) {
+          // this is the last iteration, it is okay to hit the end of the string
+          if (i == index.value) {
+            bufPos = str.end;
+            // when the output is terminated by the end of the string we do not want
+            // to subtract the length of the splitter from the output at the end of
+            // the function below
+            splitterLen = 0;
+            break;
+          } else {
+            beyondLastIndex = true;
+            break;
+          }
+        } else {
+          // Count the # of characters. (one char could have 1-4 bytes)
+          // unlike the position function don't add 1, we are not translating the positions into SQL user level 1 based indices
+          bufPos = org.apache.drill.exec.expr.fn.impl.StringFunctionUtil.getUTF8CharLength(str.buffer, str.start, pos)
+              + splitterLen;
+          // if this is the second to last iteration, store the position again, as the start and end of the
+          // string to be returned need to be available
+          if (i == index.value - 1) {
+            out.start = bufPos;
+          }
+        }
+      }
+      if (beyondLastIndex) {
+        out.start = 0;
+        out.end = 0;
+        out.buffer = str.buffer;
+      } else {
+        out.buffer = str.buffer;
+        out.end = bufPos - splitterLen;
+      }
+    }
+
+  }
+
   // same as function "position(substr, str) ", except the reverse order of argument.
   @FunctionTemplate(name = "strpos", scope = FunctionScope.SIMPLE, nulls = NullHandling.NULL_IF_NULL)
   public static class Strpos implements DrillSimpleFunc {

http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
index 0ff789a..2efab3b 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/expr/fn/impl/TestStringFunctions.java
@@ -20,9 +20,100 @@ package org.apache.drill.exec.expr.fn.impl;
 import org.apache.drill.BaseTestQuery;
 import org.junit.Test;
 
+import static org.junit.Assert.assertTrue;
+
 public class TestStringFunctions extends BaseTestQuery {
 
   @Test
+  public void testStrPosMultiByte() throws Exception {
+    testBuilder()
+        .sqlQuery("select `position`('a', 'abc') res1 from (values(1))")
+        .ordered()
+        .baselineColumns("res1")
+        .baselineValues(1l)
+        .go();
+
+    testBuilder()
+        .sqlQuery("select `position`('\\u11E9', '\\u11E9\\u0031') res1 from (values(1))")
+        .ordered()
+        .baselineColumns("res1")
+        .baselineValues(1l)
+        .go();
+  }
+
+  @Test
+  public void testSplitPart() throws Exception {
+    testBuilder()
+        .sqlQuery("select split_part('abc~@~def~@~ghi', '~@~', 1) res1 from (values(1))")
+        .ordered()
+        .baselineColumns("res1")
+        .baselineValues("abc")
+        .go();
+
+    testBuilder()
+        .sqlQuery("select split_part('abc~@~def~@~ghi', '~@~', 2) res1 from (values(1))")
+        .ordered()
+        .baselineColumns("res1")
+        .baselineValues("def")
+        .go();
+
+    // invalid index
+    boolean expectedErrorEncountered;
+    try {
+      testBuilder()
+          .sqlQuery("select split_part('abc~@~def~@~ghi', '~@~', 0) res1 from (values(1))")
+          .ordered()
+          .baselineColumns("res1")
+          .baselineValues("abc")
+          .go();
+      expectedErrorEncountered = false;
+    } catch (Exception ex) {
+      assertTrue(ex.getMessage().contains("Index in split_part must be positive, value provided was 0"));
+      expectedErrorEncountered = true;
+    }
+    if (!expectedErrorEncountered) {
+      throw new RuntimeException("Missing expected error on invalid index for split_part function");
+    }
+
+    // with a multi-byte splitter
+    testBuilder()
+        .sqlQuery("select split_part('abc\\u1111drill\\u1111ghi', '\\u1111', 2) res1 from (values(1))")
+        .ordered()
+        .baselineColumns("res1")
+        .baselineValues("drill")
+        .go();
+
+    // going beyond the last available index, returns empty string
+    testBuilder()
+        .sqlQuery("select split_part('a,b,c', ',', 4) res1 from (values(1))")
+        .ordered()
+        .baselineColumns("res1")
+        .baselineValues("")
+        .go();
+
+    // if the delimiter does not appear in the string, 1 returns the whole string
+    testBuilder()
+        .sqlQuery("select split_part('a,b,c', ' ', 1) res1 from (values(1))")
+        .ordered()
+        .baselineColumns("res1")
+        .baselineValues("a,b,c")
+        .go();
+  }
+
+  @Test
+  public void testRegexpMatches() throws Exception {
+    testBuilder()
+        .sqlQuery("select regexp_matches(a, '^a.*') res1, regexp_matches(b, '^a.*') res2 " +
+                  "from (values('abc', 'bcd'), ('bcd', 'abc')) as t(a,b)")
+        .unOrdered()
+        .baselineColumns("res1", "res2")
+        .baselineValues(true, false)
+        .baselineValues(false, true)
+        .build()
+        .run();
+  }
+
+  @Test
   public void testILike() throws Exception {
     testBuilder()
         .sqlQuery("select n_name from cp.`tpch/nation.parquet` where ilike(n_name, '%united%') = true")

http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestNewDateFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestNewDateFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestNewDateFunctions.java
index 0e9276c..257cc53 100644
--- a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestNewDateFunctions.java
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestNewDateFunctions.java
@@ -23,12 +23,45 @@ import org.joda.time.format.DateTimeFormat;
 import org.joda.time.format.DateTimeFormatter;
 import org.junit.Test;
 
+import java.sql.Date;
+
 public class TestNewDateFunctions extends BaseTestQuery {
   DateTime date;
   DateTimeFormatter formatter;
   long unixTimeStamp = -1;
 
   @Test
+  public void testIsDate() throws Exception {
+    final String dateValues = "(values('1900-01-01'), ('3500-01-01'), ('2000-12-31'), ('2005-12-32'), ('2015-02-29'), (cast(null as varchar))) as t(date1)";
+    testBuilder()
+        .sqlQuery("select isDate(date1) res1 " +
+            "from " + dateValues)
+        .unOrdered()
+        .baselineColumns("res1")
+        .baselineValues(true)
+        .baselineValues(true)
+        .baselineValues(true)
+        .baselineValues(false)
+        .baselineValues(false)
+        .baselineValues(false)
+        .build()
+        .run();
+
+    testBuilder()
+        .sqlQuery("select case when isdate(date1) then cast(date1 as date) else null end res1 from " + dateValues)
+        .unOrdered()
+        .baselineColumns("res1")
+        .baselineValues(new DateTime(Date.valueOf("1900-01-01").getTime()))
+        .baselineValues(new DateTime(Date.valueOf("3500-01-01").getTime()))
+        .baselineValues(new DateTime(Date.valueOf("2000-12-31").getTime()))
+        .baselineValues(new Object[] {null})
+        .baselineValues(new Object[] {null})
+        .baselineValues(new Object[] {null})
+        .build()
+        .run();
+  }
+
+  @Test
   public void testUnixTimeStampForDate() throws Exception {
     formatter = DateTimeFormat.forPattern("yyyy-MM-dd HH:mm:ss");
     date = formatter.parseDateTime("2009-03-20 11:30:01");

http://git-wip-us.apache.org/repos/asf/drill/blob/e7e9b73c/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestTrigFunctions.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestTrigFunctions.java b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestTrigFunctions.java
new file mode 100644
index 0000000..de95360
--- /dev/null
+++ b/exec/java-exec/src/test/java/org/apache/drill/exec/fn/impl/TestTrigFunctions.java
@@ -0,0 +1,35 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * <p/>
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * <p/>
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.exec.fn.impl;
+
+import org.apache.drill.BaseTestQuery;
+import org.junit.Test;
+
+public class TestTrigFunctions extends BaseTestQuery {
+
+  @Test
+  public void testCot() throws Exception {
+    final String query = "select cot(radians(30)) as cot_result from (values(1))";
+    testBuilder()
+        .sqlQuery(query)
+        .ordered()
+        .baselineColumns("cot_result")
+        .baselineValues(1.7320508075688774)
+        .go();
+  }
+}