You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by hy...@apache.org on 2013/12/28 17:13:15 UTC
git commit: TAJO-381: Implement find_in_set function. (Jae Young Lee
via hyunsik)
Updated Branches:
refs/heads/master 319a37725 -> 4c7579009
TAJO-381: Implement find_in_set function. (Jae Young Lee via hyunsik)
Project: http://git-wip-us.apache.org/repos/asf/incubator-tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tajo/commit/4c757900
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tajo/tree/4c757900
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tajo/diff/4c757900
Branch: refs/heads/master
Commit: 4c7579009c137570ecc412d1365ace77fbbd4b87
Parents: 319a377
Author: Hyunsik Choi <hy...@apache.org>
Authored: Sun Dec 29 01:12:18 2013 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Sun Dec 29 01:13:01 2013 +0900
----------------------------------------------------------------------
CHANGES.txt | 2 +
.../tajo/engine/function/string/FindInSet.java | 104 +++++++++++++++++++
.../java/org/apache/tajo/master/TajoMaster.java | 6 ++
.../apache/tajo/engine/eval/ExprTestBase.java | 14 ++-
.../TestStringOperatorsAndFunctions.java | 29 +++++-
5 files changed, 150 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/4c757900/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 858b185..ff45694 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -4,6 +4,8 @@ Release 0.8.0 - unreleased
NEW FEATURES
+ TAJO-381: Implement find_in_set function. (Jae Young Lee via hyunsik)
+
TAJO-439: Time literal support. (DaeMyung Kang via jihoon)
TAJO-437: Timestamp literal support. (hyunsik)
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/4c757900/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/FindInSet.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/FindInSet.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/FindInSet.java
new file mode 100644
index 0000000..17df825
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/FindInSet.java
@@ -0,0 +1,104 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.string;
+
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.engine.function.GeneralFunction;
+import org.apache.tajo.storage.Tuple;
+
+/**
+ * find_in_set(text,str_array) - Returns the first occurrence of str in str_array where str_array
+ * is a comma-delimited string.
+ *
+ * Returns null if either argument is null.
+ * Returns 0 if the first argument has any commas.
+ *
+ * Example:
+ * SELECT find_in_set('cr','crt,c,cr,c,def') FROM src LIMIT 1;\n"
+ * -> result: 3
+ */
+public class FindInSet extends GeneralFunction {
+ public FindInSet() {
+ super(new Column[]{
+ new Column("text", TajoDataTypes.Type.TEXT),
+ new Column("str_array", TajoDataTypes.Type.TEXT)
+ });
+ }
+
+ @Override
+ public Datum eval(Tuple params) {
+ Datum finding = params.get(0);
+ Datum textArray = params.get(1);
+
+ if (finding instanceof NullDatum || textArray instanceof NullDatum) {
+ return NullDatum.get();
+ }
+
+ byte[] searchBytes = finding.asByteArray();
+
+ // Returns 0 if the first argument has any commas.
+ for (int i = 0; i < finding.size(); i++) {
+ if (searchBytes[i] == ',') {
+ return DatumFactory.createInt4(0);
+ }
+ }
+
+ byte[] arrayData = textArray.asByteArray();
+ int findingLength = finding.size();
+
+ int posInTextArray = 0;
+ int curLengthOfCandidate = 0;
+ boolean matching = true;
+
+ for (int i = 0; i < textArray.size(); i++) {
+
+ if (arrayData[i] == ',') {
+ posInTextArray++;
+ if (matching && curLengthOfCandidate == findingLength) {
+ return DatumFactory.createInt4(posInTextArray);
+ } else {
+ matching = true;
+ curLengthOfCandidate = 0;
+ }
+ } else {
+ if (curLengthOfCandidate + 1 <= findingLength) {
+ if (!matching || searchBytes[curLengthOfCandidate] != arrayData[i]) {
+ matching = false;
+ }
+ } else {
+ matching = false;
+ }
+ curLengthOfCandidate++;
+ }
+
+ }
+
+ if (matching && curLengthOfCandidate == findingLength) {
+ posInTextArray++;
+ return DatumFactory.createInt4(posInTextArray);
+ } else {
+ return DatumFactory.createInt4(0);
+ }
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/4c757900/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
index 40277cb..0093ff7 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
@@ -834,6 +834,12 @@ public class TajoMaster extends CompositeService {
new FunctionDesc("decode", Decode.class, FunctionType.GENERAL,
CatalogUtil.newSimpleDataType(Type.TEXT),
CatalogUtil.newSimpleDataTypeArray(Type.TEXT, Type.TEXT)));
+
+ sqlFuncs.add(
+ new FunctionDesc("find_in_set", FindInSet.class, FunctionType.GENERAL,
+ CatalogUtil.newSimpleDataType(Type.INT4),
+ CatalogUtil.newSimpleDataTypeArray(Type.TEXT, Type.TEXT)));
+
return sqlFuncs;
}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/4c757900/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java
index 3c150ec..092a5bc 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/eval/ExprTestBase.java
@@ -91,7 +91,13 @@ public class ExprTestBase {
testEval(null, null, null, query, expected);
}
- public void testEval(Schema schema, String tableName, String csvTuple, String query, String [] expected) throws IOException {
+ public void testEval(Schema schema, String tableName, String csvTuple, String query, String [] expected)
+ throws IOException {
+ testEval(schema, tableName, csvTuple, query, expected, ',');
+ }
+
+ public void testEval(Schema schema, String tableName, String csvTuple, String query, String [] expected,
+ char delimiter) throws IOException {
LazyTuple lazyTuple;
VTuple vtuple = null;
Schema inputSchema = null;
@@ -104,7 +110,8 @@ public class ExprTestBase {
targetIdx[i] = i;
}
- lazyTuple = new LazyTuple(inputSchema, Bytes.splitPreserveAllTokens(csvTuple.getBytes(), ',', targetIdx), 0);
+ lazyTuple =
+ new LazyTuple(inputSchema, Bytes.splitPreserveAllTokens(csvTuple.getBytes(), delimiter, targetIdx),0);
vtuple = new VTuple(inputSchema.getColumnNum());
for (int i = 0; i < inputSchema.getColumnNum(); i++) {
// If null value occurs, null datum is manually inserted to an input tuple.
@@ -114,7 +121,8 @@ public class ExprTestBase {
vtuple.put(i, lazyTuple.get(i));
}
}
- cat.addTable(new TableDesc(tableName, inputSchema, CatalogProtos.StoreType.CSV, new Options(), CommonTestingUtil.getTestDir()));
+ cat.addTable(new TableDesc(tableName, inputSchema, CatalogProtos.StoreType.CSV, new Options(),
+ CommonTestingUtil.getTestDir()));
}
Target [] targets = null;
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/4c757900/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
index ac350fc..0df05b7 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
@@ -539,7 +539,8 @@ public class TestStringOperatorsAndFunctions extends ExprTestBase {
@Test
public void testEncode() throws IOException {
testSimpleEval("select encode('Hello\nworld', 'base64') ", new String[]{"SGVsbG8Kd29ybGQ="});
- testSimpleEval("select encode('Hello\nworld', 'hex') ", new String[]{"0x480x650x6c0x6c0x6f0x0a0x770x6f0x720x6c0x64"});
+ testSimpleEval("select encode('Hello\nworld', 'hex') ",
+ new String[]{"0x480x650x6c0x6c0x6f0x0a0x770x6f0x720x6c0x64"});
testSimpleEval("select encode('한글', 'base64') ", new String[]{"7ZWc6riA"});
testSimpleEval("select encode('한글', 'hex') ", new String[]{"0xd55c0xae00"});
testSimpleEval("select encode('한글\n테스트\t입니다.', 'hex') ",
@@ -549,7 +550,8 @@ public class TestStringOperatorsAndFunctions extends ExprTestBase {
@Test
public void testDecode() throws IOException {
- testSimpleEval("select decode('SGVsbG8Kd29ybGQ=', 'base64') ", new String[]{StringEscapeUtils.escapeJava("Hello\nworld")});
+ testSimpleEval("select decode('SGVsbG8Kd29ybGQ=', 'base64') ",
+ new String[]{StringEscapeUtils.escapeJava("Hello\nworld")});
testSimpleEval("select decode('0x480x650x6c0x6c0x6f0x0a0x770x6f0x720x6c0x64', 'hex') ",
new String[]{StringEscapeUtils.escapeJava("Hello\nworld")});
testSimpleEval("select decode('7ZWc6riA', 'base64') ", new String[]{StringEscapeUtils.escapeJava("한글")});
@@ -557,4 +559,27 @@ public class TestStringOperatorsAndFunctions extends ExprTestBase {
testSimpleEval("select decode('0xd55c0xae000x0a0xd14c0xc2a40xd2b80x090xc7850xb2c80xb2e40x2e', 'hex') ",
new String[]{StringEscapeUtils.escapeJava("한글\n" + "테스트\t입니다.")});
}
+
+ @Test
+ public void testFindInSet() throws IOException {
+ // abnormal cases
+ testSimpleEval("select find_in_set('cr','crt') as col1 ", new String[]{"0"}); // there is no matched string
+ testSimpleEval("select find_in_set('c,r','crt,c,cr,c,def') as col1 ", new String[]{"0"}); // abnormal parameter
+
+ // normal cases
+ testSimpleEval("select find_in_set('crt','crt,c,cr,d,def') as col1 ", new String[]{"1"});
+ testSimpleEval("select find_in_set('c','crt,c,cr,d,def') as col1 ", new String[]{"2"});
+ testSimpleEval("select find_in_set('def','crt,c,cr,d,def') as col1 ", new String[]{"5"});
+ // unicode test
+ testSimpleEval("select find_in_set('딸기','사과,배,옥수수,감자,딸기,수박') as col1 ", new String[]{"5"});
+
+ // null test
+ Schema schema = new Schema();
+ schema.addColumn("col1", TEXT);
+ schema.addColumn("col2", TEXT);
+ testEval(schema, "table1", "|crt,c,cr,c,def", "select find_in_set(col1, col2) is null from table1",
+ new String[]{"t"}, '|');
+ testEval(schema, "table1", "cr|", "select find_in_set(col1, col2) is null from table1",
+ new String[]{"t"}, '|');
+ }
}