You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by hy...@apache.org on 2013/12/28 17:13:15 UTC

git commit: TAJO-381: Implement find_in_set function. (Jae Young Lee via hyunsik)

Updated Branches:
  refs/heads/master 319a37725 -> 4c7579009


TAJO-381: Implement find_in_set function. (Jae Young Lee via hyunsik)


Project: http://git-wip-us.apache.org/repos/asf/incubator-tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tajo/commit/4c757900
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tajo/tree/4c757900
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tajo/diff/4c757900

Branch: refs/heads/master
Commit: 4c7579009c137570ecc412d1365ace77fbbd4b87
Parents: 319a377
Author: Hyunsik Choi <hy...@apache.org>
Authored: Sun Dec 29 01:12:18 2013 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Sun Dec 29 01:13:01 2013 +0900

----------------------------------------------------------------------
 CHANGES.txt                                     |   2 +
 .../tajo/engine/function/string/FindInSet.java  | 104 +++++++++++++++++++
 .../java/org/apache/tajo/master/TajoMaster.java |   6 ++
 .../apache/tajo/engine/eval/ExprTestBase.java   |  14 ++-
 .../TestStringOperatorsAndFunctions.java        |  29 +++++-
 5 files changed, 150 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/4c757900/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 858b185..ff45694 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -4,6 +4,8 @@ Release 0.8.0 - unreleased
 
   NEW FEATURES
 
+    TAJO-381: Implement find_in_set function. (Jae Young Lee via hyunsik)
+
     TAJO-439: Time literal support. (DaeMyung Kang via jihoon)
 
     TAJO-437: Timestamp literal support. (hyunsik)

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/4c757900/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/FindInSet.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/FindInSet.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/FindInSet.java
new file mode 100644
index 0000000..17df825
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/FindInSet.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.string;
+
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.engine.function.GeneralFunction;
+import org.apache.tajo.storage.Tuple;
+
+/**
+ * find_in_set(text,str_array) - Returns the first occurrence of str in str_array where str_array
+ * is a comma-delimited string.
+ *
+ * Returns null if either argument is null.
+ * Returns 0 if the first argument has any commas.
+ *
+ * Example:
+ * SELECT find_in_set('cr','crt,c,cr,c,def') FROM src LIMIT 1;\n"
+ * -> result: 3
+ */
+public class FindInSet extends GeneralFunction {
+  public FindInSet() {
+    super(new Column[]{
+        new Column("text", TajoDataTypes.Type.TEXT),
+        new Column("str_array", TajoDataTypes.Type.TEXT)
+    });
+  }
+
+  @Override
+  public Datum eval(Tuple params) {
+    Datum finding = params.get(0);
+    Datum textArray = params.get(1);
+
+    if (finding instanceof NullDatum || textArray instanceof NullDatum) {
+      return NullDatum.get();
+    }
+
+    byte[] searchBytes = finding.asByteArray();
+
+    //  Returns 0 if the first argument has any commas.
+    for (int i = 0; i < finding.size(); i++) {
+      if (searchBytes[i] == ',') {
+        return DatumFactory.createInt4(0);
+      }
+    }
+
+    byte[] arrayData = textArray.asByteArray();
+    int findingLength = finding.size();
+
+    int posInTextArray = 0;
+    int curLengthOfCandidate = 0;
+    boolean matching = true;
+
+    for (int i = 0; i < textArray.size(); i++) {
+
+      if (arrayData[i] == ',') {
+        posInTextArray++;
+        if (matching && curLengthOfCandidate == findingLength) {
+          return DatumFactory.createInt4(posInTextArray);
+        } else {
+          matching = true;
+          curLengthOfCandidate = 0;
+        }
+      } else {
+        if (curLengthOfCandidate + 1 <= findingLength) {
+          if (!matching || searchBytes[curLengthOfCandidate] != arrayData[i]) {
+            matching = false;
+          }
+        } else {
+          matching = false;
+        }
+        curLengthOfCandidate++;
+      }
+
+    }
+
+    if (matching && curLengthOfCandidate == findingLength) {
+      posInTextArray++;
+      return DatumFactory.createInt4(posInTextArray);
+    } else {
+      return DatumFactory.createInt4(0);
+    }
+
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/4c757900/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
index 40277cb..0093ff7 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
@@ -834,6 +834,12 @@ public class TajoMaster extends CompositeService {
         new FunctionDesc("decode", Decode.class, FunctionType.GENERAL,
             CatalogUtil.newSimpleDataType(Type.TEXT),
             CatalogUtil.newSimpleDataTypeArray(Type.TEXT, Type.TEXT)));
+
+    sqlFuncs.add(
+        new FunctionDesc("find_in_set", FindInSet.class, FunctionType.GENERAL,
+            CatalogUtil.newSimpleDataType(Type.INT4),
+            CatalogUtil.newSimpleDataTypeArray(Type.TEXT, Type.TEXT)));
+
     return sqlFuncs;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/4c757900/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java
index 3c150ec..092a5bc 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java
@@ -91,7 +91,13 @@ public class ExprTestBase {
     testEval(null, null, null, query, expected);
   }
 
-  public void testEval(Schema schema, String tableName, String csvTuple, String query, String [] expected) throws IOException {
+  public void testEval(Schema schema, String tableName, String csvTuple, String query, String [] expected)
+      throws IOException {
+    testEval(schema, tableName, csvTuple, query, expected, ',');
+  }
+
+  public void testEval(Schema schema, String tableName, String csvTuple, String query, String [] expected,
+                       char delimiter) throws IOException {
     LazyTuple lazyTuple;
     VTuple vtuple  = null;
     Schema inputSchema = null;
@@ -104,7 +110,8 @@ public class ExprTestBase {
         targetIdx[i] = i;
       }
 
-      lazyTuple = new LazyTuple(inputSchema, Bytes.splitPreserveAllTokens(csvTuple.getBytes(), ',', targetIdx), 0);
+      lazyTuple =
+          new LazyTuple(inputSchema, Bytes.splitPreserveAllTokens(csvTuple.getBytes(), delimiter, targetIdx),0);
       vtuple = new VTuple(inputSchema.getColumnNum());
       for (int i = 0; i < inputSchema.getColumnNum(); i++) {
         // If null value occurs, null datum is manually inserted to an input tuple.
@@ -114,7 +121,8 @@ public class ExprTestBase {
           vtuple.put(i, lazyTuple.get(i));
         }
       }
-      cat.addTable(new TableDesc(tableName, inputSchema, CatalogProtos.StoreType.CSV, new Options(), CommonTestingUtil.getTestDir()));
+      cat.addTable(new TableDesc(tableName, inputSchema, CatalogProtos.StoreType.CSV, new Options(),
+          CommonTestingUtil.getTestDir()));
     }
 
     Target [] targets = null;

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/4c757900/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
index ac350fc..0df05b7 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
@@ -539,7 +539,8 @@ public class TestStringOperatorsAndFunctions extends ExprTestBase {
   @Test
   public void testEncode() throws IOException {
     testSimpleEval("select encode('Hello\nworld', 'base64') ", new String[]{"SGVsbG8Kd29ybGQ="});
-    testSimpleEval("select encode('Hello\nworld', 'hex') ", new String[]{"0x480x650x6c0x6c0x6f0x0a0x770x6f0x720x6c0x64"});
+    testSimpleEval("select encode('Hello\nworld', 'hex') ",
+        new String[]{"0x480x650x6c0x6c0x6f0x0a0x770x6f0x720x6c0x64"});
     testSimpleEval("select encode('한글', 'base64') ", new String[]{"7ZWc6riA"});
     testSimpleEval("select encode('한글', 'hex') ", new String[]{"0xd55c0xae00"});
     testSimpleEval("select encode('한글\n테스트\t입니다.', 'hex') ",
@@ -549,7 +550,8 @@ public class TestStringOperatorsAndFunctions extends ExprTestBase {
 
   @Test
   public void testDecode() throws IOException {
-    testSimpleEval("select decode('SGVsbG8Kd29ybGQ=', 'base64') ", new String[]{StringEscapeUtils.escapeJava("Hello\nworld")});
+    testSimpleEval("select decode('SGVsbG8Kd29ybGQ=', 'base64') ",
+        new String[]{StringEscapeUtils.escapeJava("Hello\nworld")});
     testSimpleEval("select decode('0x480x650x6c0x6c0x6f0x0a0x770x6f0x720x6c0x64', 'hex') ",
         new String[]{StringEscapeUtils.escapeJava("Hello\nworld")});
     testSimpleEval("select decode('7ZWc6riA', 'base64') ", new String[]{StringEscapeUtils.escapeJava("한글")});
@@ -557,4 +559,27 @@ public class TestStringOperatorsAndFunctions extends ExprTestBase {
     testSimpleEval("select decode('0xd55c0xae000x0a0xd14c0xc2a40xd2b80x090xc7850xb2c80xb2e40x2e', 'hex') ",
         new String[]{StringEscapeUtils.escapeJava("한글\n" + "테스트\t입니다.")});
   }
+
+  @Test
+  public void testFindInSet() throws IOException {
+    // abnormal cases
+    testSimpleEval("select find_in_set('cr','crt') as col1 ", new String[]{"0"}); // there is no matched string
+    testSimpleEval("select find_in_set('c,r','crt,c,cr,c,def') as col1 ", new String[]{"0"}); // abnormal parameter
+
+    // normal cases
+    testSimpleEval("select find_in_set('crt','crt,c,cr,d,def') as col1 ", new String[]{"1"});
+    testSimpleEval("select find_in_set('c','crt,c,cr,d,def') as col1 ", new String[]{"2"});
+    testSimpleEval("select find_in_set('def','crt,c,cr,d,def') as col1 ", new String[]{"5"});
+    // unicode test
+    testSimpleEval("select find_in_set('딸기','사과,배,옥수수,감자,딸기,수박') as col1 ", new String[]{"5"});
+
+    // null test
+    Schema schema = new Schema();
+    schema.addColumn("col1", TEXT);
+    schema.addColumn("col2", TEXT);
+    testEval(schema, "table1", "|crt,c,cr,c,def", "select find_in_set(col1, col2) is null from table1",
+        new String[]{"t"}, '|');
+    testEval(schema, "table1", "cr|", "select find_in_set(col1, col2) is null from table1",
+        new String[]{"t"}, '|');
+  }
 }