You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by ji...@apache.org on 2013/12/21 16:35:44 UTC
git commit: TAJO-382: Implement encode/decode functions. (Seungun
Choe via jihoon)
Updated Branches:
refs/heads/master 9409a0514 -> bd2b89199
TAJO-382: Implement encode/decode functions. (Seungun Choe via jihoon)
Project: http://git-wip-us.apache.org/repos/asf/incubator-tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tajo/commit/bd2b8919
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tajo/tree/bd2b8919
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tajo/diff/bd2b8919
Branch: refs/heads/master
Commit: bd2b891994c8e3c52d956ffb3879b1ec60d70eec
Parents: 9409a05
Author: Jihoon Son <ji...@apache.org>
Authored: Sun Dec 22 00:35:34 2013 +0900
Committer: Jihoon Son <ji...@apache.org>
Committed: Sun Dec 22 00:35:34 2013 +0900
----------------------------------------------------------------------
CHANGES.txt | 2 +
.../tajo/engine/function/string/Decode.java | 79 +++++++++++++++++++
.../tajo/engine/function/string/Encode.java | 81 ++++++++++++++++++++
.../function/string/HexStringConverter.java | 65 ++++++++++++++++
.../java/org/apache/tajo/master/TajoMaster.java | 10 +++
.../TestStringOperatorsAndFunctions.java | 23 ++++++
6 files changed, 260 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/bd2b8919/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 72e6320..20df0b5 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -4,6 +4,8 @@ Release 0.8.0 - unreleased
NEW FEATURES
+ TAJO-382: Implement encode/decode functions. (Seungun Choe via jihoon)
+
TAJO-436: Implement ceiling(FLOAT8) function. (DaeMyung Kang via hyunsik)
TAJO-338 : Add Query Optimization Part for Column-Partitioned Tables.
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/bd2b8919/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Decode.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Decode.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Decode.java
new file mode 100644
index 0000000..33e8030
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Decode.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.string;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.engine.function.GeneralFunction;
+import org.apache.tajo.storage.Tuple;
+
+
+/**
+ * Function definition
+ *
+ * bytearray decode(string text, format text)
+ */
+public class Decode extends GeneralFunction {
+ public Decode() {
+ super(new Column[] {
+ new Column("text", TajoDataTypes.Type.TEXT),
+ new Column("format", TajoDataTypes.Type.TEXT)
+ });
+ }
+
+ @Override
+ public Datum eval(Tuple params) {
+ Datum datum = params.get(0);
+ Datum formatType = params.get(1);
+ String decodedBase64Text="";
+ String decodedHexString="";
+
+ if(datum instanceof NullDatum) return NullDatum.get();
+ if(formatType instanceof NullDatum) return NullDatum.get();
+
+ if(formatType.asChars().toLowerCase().equals("base64")) {
+ try {
+ // Base64
+ decodedBase64Text = new String(Base64.decodeBase64(datum.asChars().getBytes()));
+ }
+ catch (Exception e) {
+ return NullDatum.get();
+ }
+
+ return DatumFactory.createText(StringEscapeUtils.escapeJava(decodedBase64Text));
+ }
+ else if(formatType.asChars().toLowerCase().equals("hex")) {
+ try {
+ // Hex
+ decodedHexString = HexStringConverter.getInstance().decodeHex(datum.asChars());
+ }
+ catch (Exception e) {
+ return NullDatum.get();
+ }
+ return DatumFactory.createText(StringEscapeUtils.escapeJava(decodedHexString));
+ }
+ else
+ return NullDatum.get();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/bd2b8919/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Encode.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Encode.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Encode.java
new file mode 100644
index 0000000..938d6c0
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Encode.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.string;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.engine.function.GeneralFunction;
+import org.apache.tajo.storage.Tuple;
+
+
+/**
+ * Function definition
+ *
+ * bytearray encode(data bytea, format text)
+ */
+public class Encode extends GeneralFunction {
+ public Encode() {
+ super(new Column[] {
+ new Column("text", TajoDataTypes.Type.TEXT),
+ new Column("format", TajoDataTypes.Type.TEXT)
+ });
+ }
+
+ @Override
+ public Datum eval(Tuple params) {
+ Datum datum = params.get(0);
+ Datum formatType = params.get(1);
+ String encodedBase64Text="";
+ String encodedHexString="";
+
+ if(datum instanceof NullDatum) return NullDatum.get();
+ if(formatType instanceof NullDatum) return NullDatum.get();
+
+
+ if(formatType.asChars().toLowerCase().equals("base64")) {
+ try {
+ // Base64
+ encodedBase64Text = new String(Base64.encodeBase64(StringEscapeUtils.unescapeJava(datum.asChars()).getBytes()));
+ }
+ catch (Exception e) {
+ return NullDatum.get();
+ }
+
+ return DatumFactory.createText(encodedBase64Text);
+ }
+ else if(formatType.asChars().toLowerCase().equals("hex")) {
+ try {
+ // Hex
+ encodedHexString = HexStringConverter.getInstance().encodeHex(StringEscapeUtils.unescapeJava(datum.asChars()));
+ }
+ catch (Exception e) {
+ return NullDatum.get();
+ }
+
+ return DatumFactory.createText(encodedHexString);
+ }
+ else
+ return NullDatum.get();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/bd2b8919/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/HexStringConverter.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/HexStringConverter.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/HexStringConverter.java
new file mode 100644
index 0000000..3b33359
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/HexStringConverter.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.string;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class HexStringConverter {
+ private static HexStringConverter hexStringConverter = null;
+
+ public static HexStringConverter getInstance() {
+ if (hexStringConverter==null)
+ hexStringConverter = new HexStringConverter();
+ return hexStringConverter;
+ }
+
+ private HexStringConverter() {
+ }
+
+ public String encodeHex(String str) {
+ StringBuffer buf = new StringBuffer();
+
+ for(int i=0; i<str.length(); i++) {
+ String tmp = Integer.toHexString(str.charAt(i));
+ if(tmp.length() == 1)
+ buf.append("0x0" + tmp);
+ else
+ buf.append("0x" + tmp);
+ }
+
+ return buf.toString();
+ }
+
+ public String decodeHex(String hexString) {
+ Pattern p = Pattern.compile("(0x([a-fA-F0-9]{2}([a-fA-F0-9]{2})?))");
+ Matcher m = p.matcher(hexString);
+
+ StringBuffer buf = new StringBuffer();
+ int hashCode = 0;
+ while( m.find() ) {
+ hashCode = Integer.decode("0x" + m.group(2));
+ m.appendReplacement(buf, new String( Character.toChars(hashCode)));
+ }
+
+ m.appendTail(buf);
+
+ return buf.toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/bd2b8919/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
index 5f35540..0f036bb 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
@@ -825,6 +825,16 @@ public class TajoMaster extends CompositeService {
new FunctionDesc("pi", Pi.class, FunctionType.GENERAL,
CatalogUtil.newSimpleDataType(Type.FLOAT8),
CatalogUtil.newSimpleDataTypeArray(Type.NULL_TYPE)));
+
+ sqlFuncs.add(
+ new FunctionDesc("encode", Encode.class, FunctionType.GENERAL,
+ CatalogUtil.newSimpleDataType(Type.TEXT),
+ CatalogUtil.newSimpleDataTypeArray(Type.TEXT, Type.TEXT)));
+
+ sqlFuncs.add(
+ new FunctionDesc("decode", Decode.class, FunctionType.GENERAL,
+ CatalogUtil.newSimpleDataType(Type.TEXT),
+ CatalogUtil.newSimpleDataTypeArray(Type.TEXT, Type.TEXT)));
return sqlFuncs;
}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/bd2b8919/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
index 7d3aa7d..c611b83 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
@@ -19,6 +19,7 @@
package org.apache.tajo.engine.function;
+import org.apache.commons.lang.StringEscapeUtils;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.engine.eval.ExprTestBase;
import org.junit.Test;
@@ -520,4 +521,26 @@ public class TestStringOperatorsAndFunctions extends ExprTestBase {
testSimpleEval("select quote_ident('Foo bar') ", new String[]{"\"Foo bar\""});
testSimpleEval("select QUOTE_IDENT('Tajo Function') ", new String[]{"\"Tajo Function\""});
}
+
+ @Test
+ public void testEncode() throws IOException {
+ testSimpleEval("select encode('Hello\nworld', 'base64') ", new String[]{"SGVsbG8Kd29ybGQ="});
+ testSimpleEval("select encode('Hello\nworld', 'hex') ", new String[]{"0x480x650x6c0x6c0x6f0x0a0x770x6f0x720x6c0x64"});
+ testSimpleEval("select encode('한글', 'base64') ", new String[]{"7ZWc6riA"});
+ testSimpleEval("select encode('한글', 'hex') ", new String[]{"0xd55c0xae00"});
+ testSimpleEval("select encode('한글\n테스트\t입니다.', 'hex') ",
+ new String[]{"0xd55c0xae000x0a0xd14c0xc2a40xd2b80x090xc7850xb2c80xb2e40x2e"});
+ }
+
+
+ @Test
+ public void testDecode() throws IOException {
+ testSimpleEval("select decode('SGVsbG8Kd29ybGQ=', 'base64') ", new String[]{StringEscapeUtils.escapeJava("Hello\nworld")});
+ testSimpleEval("select decode('0x480x650x6c0x6c0x6f0x0a0x770x6f0x720x6c0x64', 'hex') ",
+ new String[]{StringEscapeUtils.escapeJava("Hello\nworld")});
+ testSimpleEval("select decode('7ZWc6riA', 'base64') ", new String[]{StringEscapeUtils.escapeJava("한글")});
+ testSimpleEval("select decode('0xd55c0xae00', 'hex') ", new String[]{StringEscapeUtils.escapeJava("한글")});
+ testSimpleEval("select decode('0xd55c0xae000x0a0xd14c0xc2a40xd2b80x090xc7850xb2c80xb2e40x2e', 'hex') ",
+ new String[]{StringEscapeUtils.escapeJava("한글\n" + "테스트\t입니다.")});
+ }
}