You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by ji...@apache.org on 2013/12/21 16:35:44 UTC

git commit: TAJO-382: Implement encode/decode functions. (Seungun Choe via jihoon)

Updated Branches:
  refs/heads/master 9409a0514 -> bd2b89199


TAJO-382: Implement encode/decode functions. (Seungun Choe via jihoon)


Project: http://git-wip-us.apache.org/repos/asf/incubator-tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tajo/commit/bd2b8919
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tajo/tree/bd2b8919
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tajo/diff/bd2b8919

Branch: refs/heads/master
Commit: bd2b891994c8e3c52d956ffb3879b1ec60d70eec
Parents: 9409a05
Author: Jihoon Son <ji...@apache.org>
Authored: Sun Dec 22 00:35:34 2013 +0900
Committer: Jihoon Son <ji...@apache.org>
Committed: Sun Dec 22 00:35:34 2013 +0900

----------------------------------------------------------------------
 CHANGES.txt                                     |  2 +
 .../tajo/engine/function/string/Decode.java     | 79 +++++++++++++++++++
 .../tajo/engine/function/string/Encode.java     | 81 ++++++++++++++++++++
 .../function/string/HexStringConverter.java     | 65 ++++++++++++++++
 .../java/org/apache/tajo/master/TajoMaster.java | 10 +++
 .../TestStringOperatorsAndFunctions.java        | 23 ++++++
 6 files changed, 260 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/bd2b8919/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 72e6320..20df0b5 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -4,6 +4,8 @@ Release 0.8.0 - unreleased
 
   NEW FEATURES
 
+    TAJO-382: Implement encode/decode functions. (Seungun Choe via jihoon)
+
     TAJO-436: Implement ceiling(FLOAT8) function. (DaeMyung Kang via hyunsik)
 
     TAJO-338 : Add Query Optimization Part for Column-Partitioned Tables.

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/bd2b8919/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Decode.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Decode.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Decode.java
new file mode 100644
index 0000000..33e8030
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Decode.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.string;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.engine.function.GeneralFunction;
+import org.apache.tajo.storage.Tuple;
+
+
+/**
+ * Function definition
+ *
+ * bytearray decode(string text, format text)
+ */
+public class Decode extends GeneralFunction {
+  public Decode() {
+    super(new Column[] {
+        new Column("text", TajoDataTypes.Type.TEXT),
+        new Column("format", TajoDataTypes.Type.TEXT)
+    });
+  }
+
+  @Override
+  public Datum eval(Tuple params) {
+    Datum datum = params.get(0);
+    Datum formatType = params.get(1);
+    String decodedBase64Text="";
+    String decodedHexString="";
+
+    if(datum instanceof NullDatum) return NullDatum.get();
+    if(formatType instanceof NullDatum) return NullDatum.get();
+
+    if(formatType.asChars().toLowerCase().equals("base64")) {
+      try {
+        // Base64
+        decodedBase64Text = new String(Base64.decodeBase64(datum.asChars().getBytes()));
+      }
+      catch (Exception e) {
+        return NullDatum.get();
+      }
+
+      return DatumFactory.createText(StringEscapeUtils.escapeJava(decodedBase64Text));
+    }
+    else if(formatType.asChars().toLowerCase().equals("hex")) {
+      try {
+        // Hex
+        decodedHexString = HexStringConverter.getInstance().decodeHex(datum.asChars());
+      }
+      catch (Exception e) {
+        return NullDatum.get();
+      }
+      return DatumFactory.createText(StringEscapeUtils.escapeJava(decodedHexString));
+    }
+    else
+      return NullDatum.get();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/bd2b8919/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Encode.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Encode.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Encode.java
new file mode 100644
index 0000000..938d6c0
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/Encode.java
@@ -0,0 +1,81 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.string;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.commons.lang.StringEscapeUtils;
+import org.apache.tajo.catalog.Column;
+import org.apache.tajo.common.TajoDataTypes;
+import org.apache.tajo.datum.Datum;
+import org.apache.tajo.datum.DatumFactory;
+import org.apache.tajo.datum.NullDatum;
+import org.apache.tajo.engine.function.GeneralFunction;
+import org.apache.tajo.storage.Tuple;
+
+
+/**
+ * Function definition
+ *
+ * bytearray encode(data bytea, format text)
+ */
+public class Encode extends GeneralFunction {
+  public Encode() {
+    super(new Column[] {
+        new Column("text", TajoDataTypes.Type.TEXT),
+        new Column("format", TajoDataTypes.Type.TEXT)
+    });
+  }
+
+  @Override
+  public Datum eval(Tuple params) {
+    Datum datum = params.get(0);
+    Datum formatType = params.get(1);
+    String encodedBase64Text="";
+    String encodedHexString="";
+
+    if(datum instanceof NullDatum) return NullDatum.get();
+    if(formatType instanceof NullDatum) return NullDatum.get();
+
+
+    if(formatType.asChars().toLowerCase().equals("base64")) {
+      try {
+        // Base64
+        encodedBase64Text = new String(Base64.encodeBase64(StringEscapeUtils.unescapeJava(datum.asChars()).getBytes()));
+      }
+      catch (Exception e) {
+        return NullDatum.get();
+      }
+
+      return DatumFactory.createText(encodedBase64Text);
+    }
+    else if(formatType.asChars().toLowerCase().equals("hex")) {
+      try {
+        // Hex
+        encodedHexString = HexStringConverter.getInstance().encodeHex(StringEscapeUtils.unescapeJava(datum.asChars()));
+      }
+      catch (Exception e) {
+        return NullDatum.get();
+      }
+
+      return DatumFactory.createText(encodedHexString);
+    }
+    else
+      return NullDatum.get();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/bd2b8919/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/HexStringConverter.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/HexStringConverter.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/HexStringConverter.java
new file mode 100644
index 0000000..3b33359
--- /dev/null
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/engine/function/string/HexStringConverter.java
@@ -0,0 +1,65 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tajo.engine.function.string;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class HexStringConverter {
+  private static HexStringConverter hexStringConverter = null;
+
+  public static HexStringConverter getInstance() {
+    if (hexStringConverter==null)
+      hexStringConverter = new HexStringConverter();
+    return hexStringConverter;
+  }
+
+  private HexStringConverter() {
+  }
+
+  public String encodeHex(String str) {
+    StringBuffer buf = new StringBuffer();
+
+    for(int i=0; i<str.length(); i++) {
+      String tmp = Integer.toHexString(str.charAt(i));
+      if(tmp.length() == 1)
+        buf.append("0x0" + tmp);
+      else
+        buf.append("0x" + tmp);
+    }
+
+    return buf.toString();
+  }
+
+  public String decodeHex(String hexString) {
+    Pattern p = Pattern.compile("(0x([a-fA-F0-9]{2}([a-fA-F0-9]{2})?))");
+    Matcher m = p.matcher(hexString);
+
+    StringBuffer buf = new StringBuffer();
+    int hashCode = 0;
+    while( m.find() ) {
+      hashCode = Integer.decode("0x" + m.group(2));
+      m.appendReplacement(buf, new String( Character.toChars(hashCode)));
+    }
+
+    m.appendTail(buf);
+
+    return buf.toString();
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/bd2b8919/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
index 5f35540..0f036bb 100644
--- a/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
+++ b/tajo-core/tajo-core-backend/src/main/java/org/apache/tajo/master/TajoMaster.java
@@ -825,6 +825,16 @@ public class TajoMaster extends CompositeService {
         new FunctionDesc("pi", Pi.class, FunctionType.GENERAL,
             CatalogUtil.newSimpleDataType(Type.FLOAT8),
             CatalogUtil.newSimpleDataTypeArray(Type.NULL_TYPE)));
+
+    sqlFuncs.add(
+        new FunctionDesc("encode", Encode.class, FunctionType.GENERAL,
+            CatalogUtil.newSimpleDataType(Type.TEXT),
+            CatalogUtil.newSimpleDataTypeArray(Type.TEXT, Type.TEXT)));
+
+    sqlFuncs.add(
+        new FunctionDesc("decode", Decode.class, FunctionType.GENERAL,
+            CatalogUtil.newSimpleDataType(Type.TEXT),
+            CatalogUtil.newSimpleDataTypeArray(Type.TEXT, Type.TEXT)));
     return sqlFuncs;
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/bd2b8919/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
----------------------------------------------------------------------
diff --git a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
index 7d3aa7d..c611b83 100644
--- a/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
+++ b/tajo-core/tajo-core-backend/src/test/java/org/apache/tajo/engine/function/TestStringOperatorsAndFunctions.java
@@ -19,6 +19,7 @@
 package org.apache.tajo.engine.function;
 
 
+import org.apache.commons.lang.StringEscapeUtils;
 import org.apache.tajo.catalog.Schema;
 import org.apache.tajo.engine.eval.ExprTestBase;
 import org.junit.Test;
@@ -520,4 +521,26 @@ public class TestStringOperatorsAndFunctions extends ExprTestBase {
     testSimpleEval("select quote_ident('Foo bar') ", new String[]{"\"Foo bar\""});
     testSimpleEval("select QUOTE_IDENT('Tajo Function') ", new String[]{"\"Tajo Function\""});
   }
+
+  @Test
+  public void testEncode() throws IOException {
+    testSimpleEval("select encode('Hello\nworld', 'base64') ", new String[]{"SGVsbG8Kd29ybGQ="});
+    testSimpleEval("select encode('Hello\nworld', 'hex') ", new String[]{"0x480x650x6c0x6c0x6f0x0a0x770x6f0x720x6c0x64"});
+    testSimpleEval("select encode('한글', 'base64') ", new String[]{"7ZWc6riA"});
+    testSimpleEval("select encode('한글', 'hex') ", new String[]{"0xd55c0xae00"});
+    testSimpleEval("select encode('한글\n테스트\t입니다.', 'hex') ",
+        new String[]{"0xd55c0xae000x0a0xd14c0xc2a40xd2b80x090xc7850xb2c80xb2e40x2e"});
+  }
+
+
+  @Test
+  public void testDecode() throws IOException {
+    testSimpleEval("select decode('SGVsbG8Kd29ybGQ=', 'base64') ", new String[]{StringEscapeUtils.escapeJava("Hello\nworld")});
+    testSimpleEval("select decode('0x480x650x6c0x6c0x6f0x0a0x770x6f0x720x6c0x64', 'hex') ",
+        new String[]{StringEscapeUtils.escapeJava("Hello\nworld")});
+    testSimpleEval("select decode('7ZWc6riA', 'base64') ", new String[]{StringEscapeUtils.escapeJava("한글")});
+    testSimpleEval("select decode('0xd55c0xae00', 'hex') ", new String[]{StringEscapeUtils.escapeJava("한글")});
+    testSimpleEval("select decode('0xd55c0xae000x0a0xd14c0xc2a40xd2b80x090xc7850xb2c80xb2e40x2e', 'hex') ",
+        new String[]{StringEscapeUtils.escapeJava("한글\n" + "테스트\t입니다.")});
+  }
 }