You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by hy...@apache.org on 2013/09/23 06:21:59 UTC
git commit: TAJO-185: Implement split_part function. (hyunsik)
Updated Branches:
refs/heads/master 7b0dec6a7 -> 7a2061bb9
TAJO-185: Implement split_part function. (hyunsik)
Project: http://git-wip-us.apache.org/repos/asf/incubator-tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tajo/commit/7a2061bb
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tajo/tree/7a2061bb
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tajo/diff/7a2061bb
Branch: refs/heads/master
Commit: 7a2061bb924a41077f8377b3392133a13f728455
Parents: 7b0dec6
Author: Hyunsik Choi <hy...@apache.org>
Authored: Mon Sep 23 11:29:33 2013 +0900
Committer: Hyunsik Choi <hy...@apache.org>
Committed: Mon Sep 23 13:20:29 2013 +0900
----------------------------------------------------------------------
CHANGES.txt | 2 +
.../tajo/engine/function/string/SplitPart.java | 55 ++++++++++++++++++++
.../apache/tajo/engine/query/ResultSetImpl.java | 44 +++-------------
.../java/org/apache/tajo/master/TajoMaster.java | 7 +++
.../engine/function/TestBuiltinFunctions.java | 44 ++++++++++++++++
.../java/org/apache/tajo/storage/LazyTuple.java | 2 +
.../org/apache/tajo/storage/TestLazyTuple.java | 12 ++++-
.../org/apache/tajo/storage/TestStorages.java | 3 +-
8 files changed, 128 insertions(+), 41 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 123ee14..dea65e4 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -4,6 +4,8 @@ Release 0.2.0 - unreleased
NEW FEATURES
+ TAJO-185: Implement split_part function. (hyunsik)
+
TAJO-193: Add string pattern matching operators. (hyunsik)
TAJO-101: HiveQL converter. (jaehwa)
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/SplitPart.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/SplitPart.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/SplitPart.java
new file mode 100644
index 0000000..bbaa441
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/SplitPart.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.string;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.catalog.function.GeneralFunction;
+import org.apache.tajo.common.TajoDataTypes;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.datum.TextDatum;
+import org.apache.tajo.storage.Tuple;
+
+/**
+ * Function definition
+ *
+ * text split_part(string text, delimiter text, field int)
+ */
+public class SplitPart extends GeneralFunction<TextDatum> {
+ public SplitPart() {
+ super(new Column[] {
+ new Column("text", TajoDataTypes.Type.TEXT),
+ new Column("delimiter", TajoDataTypes.Type.TEXT),
+ new Column("field", TajoDataTypes.Type.INT4),
+ });
+ }
+
+ @Override
+ public Datum eval(Tuple params) {
+ String [] split = StringUtils.split(params.get(0).asChars(), params.get(1).asChars());
+ int idx = params.get(2).asInt4();
+ if (split.length > idx) {
+ return DatumFactory.createText(split[idx]);
+ } else {
+ return NullDatum.get();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/query/ResultSetImpl.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/query/ResultSetImpl.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/query/ResultSetImpl.java
index 6b49558..639b43f 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/query/ResultSetImpl.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/query/ResultSetImpl.java
@@ -20,13 +20,14 @@ package org.apache.tajo.engine.query;
import com.google.common.collect.Lists;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.*;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.PathFilter;
import org.apache.tajo.QueryId;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.catalog.TableDesc;
import org.apache.tajo.catalog.TableMeta;
-import org.apache.tajo.catalog.TableMetaImpl;
-import org.apache.tajo.catalog.proto.CatalogProtos.TableProto;
import org.apache.tajo.client.TajoClient;
import org.apache.tajo.datum.Datum;
import org.apache.tajo.datum.NullDatum;
@@ -35,9 +36,7 @@ import org.apache.tajo.storage.Fragment;
import org.apache.tajo.storage.MergeScanner;
import org.apache.tajo.storage.Scanner;
import org.apache.tajo.storage.Tuple;
-import org.apache.tajo.util.FileUtil;
-import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
@@ -49,7 +48,6 @@ import java.util.*;
public class ResultSetImpl implements ResultSet {
private final String cursorName = "tajo";
- private Configuration conf;
private FileSystem fs;
private Scanner scanner;
private TableDesc desc;
@@ -70,13 +68,12 @@ public class ResultSetImpl implements ResultSet {
public ResultSetImpl(TajoClient tajoClient, QueryId queryId, Configuration conf, TableDesc desc) throws IOException {
this.tajoClient = tajoClient;
this.queryId = queryId;
- this.conf = conf;
this.desc = desc;
this.schema = desc.getMeta().getSchema();
if(desc != null) {
fs = desc.getPath().getFileSystem(conf);
this.totalRow = desc.getMeta().getStat() != null ? desc.getMeta().getStat().getNumRows() : 0;
- Collection<Fragment> frags = getFragmentsNG(desc.getMeta(), desc.getPath());
+ Collection<Fragment> frags = getFragments(desc.getMeta(), desc.getPath());
scanner = new MergeScanner(conf, desc.getMeta(), frags);
}
init();
@@ -87,19 +84,6 @@ public class ResultSetImpl implements ResultSet {
curRow = 0;
}
- private TableMeta getMeta(Configuration conf, Path tablePath)
- throws IOException {
- Path tableMetaPath = new Path(tablePath, ".meta");
- if (!fs.exists(tableMetaPath)) {
- throw new FileNotFoundException(".meta file not found in "
- + tablePath.toString());
- }
- FSDataInputStream in = fs.open(tableMetaPath);
- TableProto tableProto = (TableProto) FileUtil.loadProto(in,
- TableProto.getDefaultInstance());
- return new TableMetaImpl(tableProto);
- }
-
class FileNameComparator implements Comparator<FileStatus> {
@Override
@@ -108,7 +92,7 @@ public class ResultSetImpl implements ResultSet {
}
}
- private Collection<Fragment> getFragmentsNG(TableMeta meta, Path tablePath)
+ private Collection<Fragment> getFragments(TableMeta meta, Path tablePath)
throws IOException {
List<Fragment> fraglist = Lists.newArrayList();
FileStatus[] files = fs.listStatus(tablePath, new PathFilter() {
@@ -129,22 +113,6 @@ public class ResultSetImpl implements ResultSet {
return fraglist;
}
- private Fragment[] getFragments(TableMeta meta, Path tablePath)
- throws IOException {
- List<Fragment> fraglist = Lists.newArrayList();
- FileStatus[] files = fs.listStatus(tablePath);
- Arrays.sort(files, new FileNameComparator());
-
- String tbname = tablePath.getName();
- for (int i = 0; i < files.length; i++) {
- if (files[i].getLen() == 0) {
- continue;
- }
- fraglist.add(new Fragment(tbname + "_" + i, files[i].getPath(), meta, 0l, files[i].getLen()));
- }
- return fraglist.toArray(new Fragment[fraglist.size()]);
- }
-
/*
* (non-Javadoc)
*
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
index 0635156..1296ea4 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
@@ -38,12 +38,14 @@ import org.apache.hadoop.yarn.service.Service;
import org.apache.hadoop.yarn.util.RackResolver;
import org.apache.tajo.catalog.*;
import org.apache.tajo.catalog.proto.CatalogProtos.FunctionType;
+import org.apache.tajo.common.TajoDataTypes;
import org.apache.tajo.common.TajoDataTypes.Type;
import org.apache.tajo.conf.TajoConf;
import org.apache.tajo.conf.TajoConf.ConfVars;
import org.apache.tajo.engine.function.Country;
import org.apache.tajo.engine.function.InCountry;
import org.apache.tajo.engine.function.builtin.*;
+import org.apache.tajo.engine.function.string.SplitPart;
import org.apache.tajo.master.querymaster.QueryJobManager;
import org.apache.tajo.master.rm.WorkerResourceManager;
import org.apache.tajo.master.rm.YarnTajoResourceManager;
@@ -300,6 +302,11 @@ public class TajoMaster extends CompositeService {
CatalogUtil.newDataTypesWithoutLen(Type.INT4),
CatalogUtil.newDataTypesWithoutLen(Type.INT4)));
+ sqlFuncs.add(
+ new FunctionDesc("split_part", SplitPart.class, FunctionType.GENERAL,
+ CatalogUtil.newDataTypesWithoutLen(Type.TEXT),
+ CatalogUtil.newDataTypesWithoutLen(Type.TEXT, Type.TEXT, Type.INT4)));
+
return sqlFuncs;
}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
index a5a89f7..0384d51 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestBuiltinFunctions.java
@@ -19,6 +19,7 @@
package org.apache.tajo.engine.function;
import com.google.common.collect.Maps;
+import org.apache.tajo.client.ResultSetUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
@@ -31,6 +32,7 @@ import java.sql.ResultSet;
import java.util.Map;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
@Category(IntegrationTest.class)
@@ -132,4 +134,46 @@ public class TestBuiltinFunctions {
res.close();
}
}
+
+ @Test
+ public void testSplitPart() throws Exception {
+ ResultSet res = tpch.execute("select split_part(l_shipinstruct, ' ', 0) from lineitem");
+
+ String [] result ={
+ "DELIVER",
+ "TAKE",
+ "TAKE",
+ "NONE",
+ "TAKE"
+ };
+
+ for (int i = 0; i < result.length; i++) {
+ assertTrue(res.next());
+ assertEquals(result[i], res.getString(1));
+ }
+ assertFalse(res.next());
+
+ res.close();
+ }
+
+ @Test
+ public void testSplitPartNested() throws Exception {
+ ResultSet res = tpch.execute("select split_part(split_part(l_shipinstruct, ' ', 0), 'A', 1) from lineitem");
+
+ String [] result ={
+ "",
+ "KE",
+ "KE",
+ "",
+ "KE"
+ };
+
+ for (int i = 0; i < result.length; i++) {
+ assertTrue(res.next());
+ assertEquals(result[i], res.getString(1));
+ }
+ assertFalse(res.next());
+
+ res.close();
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java b/tajo-core/tajo-core-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
index c2b511c..c364ae2 100644
--- a/tajo-core/tajo-core-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
+++ b/tajo-core/tajo-core-storage/src/main/java/org/apache/tajo/storage/LazyTuple.java
@@ -113,6 +113,8 @@ public class LazyTuple implements Tuple {
else if (textBytes.length > fieldId && (textBytes[fieldId] != null)) {
values[fieldId] = createByTextBytes(schema.getColumn(fieldId).getDataType().getType(), textBytes[fieldId]);
textBytes[fieldId] = null;
+ } else {
+ values[fieldId] = NullDatum.get();
}
return values[fieldId];
}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java b/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java
index 3ca0789..9dd17e2 100644
--- a/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java
+++ b/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestLazyTuple.java
@@ -124,6 +124,14 @@ public class TestLazyTuple {
assertTrue(t1.getFloat(11).asFloat4() == 0.76f);
}
+ public VTuple createVTuple(int size) {
+ VTuple vtuple = new VTuple(size);
+ for (int i = 0; i < size; i++) {
+ vtuple.put(i, NullDatum.get());
+ }
+ return vtuple;
+ }
+
@Test
public void testEquals() {
int colNum = schema.getColumnNum();
@@ -140,7 +148,7 @@ public class TestLazyTuple {
assertEquals(t1, t2);
- Tuple t3 = new VTuple(colNum);
+ Tuple t3 = createVTuple(colNum);
t3.put(0, DatumFactory.createInt4(1));
t3.put(1, DatumFactory.createInt4(2));
t3.put(3, DatumFactory.createInt4(2));
@@ -169,7 +177,7 @@ public class TestLazyTuple {
assertEquals(t1.hashCode(), t2.hashCode());
- Tuple t3 = new VTuple(colNum);
+ Tuple t3 = createVTuple(colNum);
t3.put(0, DatumFactory.createInt4(1));
t3.put(1, DatumFactory.createInt4(2));
t3.put(3, DatumFactory.createInt4(2));
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/7a2061bb/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestStorages.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestStorages.java b/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestStorages.java
index 364600c..7589996 100644
--- a/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestStorages.java
+++ b/tajo-core/tajo-core-storage/src/test/java/org/apache/tajo/storage/TestStorages.java
@@ -44,6 +44,7 @@ import java.util.Collection;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
+import static org.junit.Assert.assertTrue;
@RunWith(Parameterized.class)
public class TestStorages {
@@ -170,7 +171,7 @@ public class TestStorages {
Tuple tuple;
while ((tuple = scanner.next()) != null) {
if (storeType == StoreType.RCFILE || storeType == StoreType.TREVNI || storeType == StoreType.CSV) {
- assertNull(tuple.get(0));
+ assertTrue(tuple.get(0) == null || tuple.get(0) instanceof NullDatum);
}
assertEquals(DatumFactory.createInt8(tupleCnt + 2), tuple.getLong(1));
assertEquals(DatumFactory.createFloat4(tupleCnt + 3), tuple.getFloat(2));