You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2015/05/07 03:21:21 UTC
[27/52] [abbrv] hive git commit: HIVE-10485: Create md5 UDF (Alex
Pivovarov via Jason Dere)
HIVE-10485: Create md5 UDF (Alex Pivovarov via Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9803344b
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9803344b
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9803344b
Branch: refs/heads/llap
Commit: 9803344bff3d8aecafae3e03261b48592a86bfb1
Parents: ec12a61
Author: Jason Dere <jd...@hortonworks.com>
Authored: Mon May 4 23:11:47 2015 -0700
Committer: Jason Dere <jd...@hortonworks.com>
Committed: Mon May 4 23:11:47 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/FunctionRegistry.java | 6 +-
.../org/apache/hadoop/hive/ql/udf/UDFMd5.java | 79 ++++++++++++++++++++
.../apache/hadoop/hive/ql/udf/TestUDFMd5.java | 57 ++++++++++++++
ql/src/test/queries/clientpositive/udf_md5.q | 13 ++++
.../results/clientpositive/show_functions.q.out | 2 +
.../test/results/clientpositive/udf_md5.q.out | 61 +++++++++++++++
6 files changed, 216 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/9803344b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index bf2809c..02a604f 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -24,8 +24,8 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashSet;
-import java.util.LinkedHashSet;
import java.util.Iterator;
+import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
@@ -65,12 +65,13 @@ import org.apache.hadoop.hive.ql.udf.UDFLn;
import org.apache.hadoop.hive.ql.udf.UDFLog;
import org.apache.hadoop.hive.ql.udf.UDFLog10;
import org.apache.hadoop.hive.ql.udf.UDFLog2;
+import org.apache.hadoop.hive.ql.udf.UDFMd5;
import org.apache.hadoop.hive.ql.udf.UDFMinute;
import org.apache.hadoop.hive.ql.udf.UDFMonth;
import org.apache.hadoop.hive.ql.udf.UDFOPBitAnd;
-import org.apache.hadoop.hive.ql.udf.UDFOPBitShiftLeft;
import org.apache.hadoop.hive.ql.udf.UDFOPBitNot;
import org.apache.hadoop.hive.ql.udf.UDFOPBitOr;
+import org.apache.hadoop.hive.ql.udf.UDFOPBitShiftLeft;
import org.apache.hadoop.hive.ql.udf.UDFOPBitShiftRight;
import org.apache.hadoop.hive.ql.udf.UDFOPBitShiftRightUnsigned;
import org.apache.hadoop.hive.ql.udf.UDFOPBitXor;
@@ -224,6 +225,7 @@ public final class FunctionRegistry {
system.registerUDF("unhex", UDFUnhex.class, false);
system.registerUDF("base64", UDFBase64.class, false);
system.registerUDF("unbase64", UDFUnbase64.class, false);
+ system.registerUDF("md5", UDFMd5.class, false);
system.registerGenericUDF("encode", GenericUDFEncode.class);
system.registerGenericUDF("decode", GenericUDFDecode.class);
http://git-wip-us.apache.org/repos/asf/hive/blob/9803344b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java
new file mode 100644
index 0000000..62c16c2
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFMd5.java
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf;
+
+import org.apache.commons.codec.digest.DigestUtils;
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+
+/**
+ * UDFMd5.
+ *
+ */
+@Description(name = "md5",
+ value = "_FUNC_(str or bin) - Calculates an MD5 128-bit checksum for the string or binary.",
+ extended = "The value is returned as a string of 32 hex digits, or NULL if the argument was NULL.\n"
+ + "Example:\n"
+ + " > SELECT _FUNC_('ABC');\n"
+ + " '902fbdd2b1df0c4f70b4a5d23525e932'\n"
+ + " > SELECT _FUNC_(binary('ABC'));\n"
+ + " '902fbdd2b1df0c4f70b4a5d23525e932'")
+public class UDFMd5 extends UDF {
+
+ private final Text result = new Text();
+
+ /**
+ * Convert String to md5
+ */
+ public Text evaluate(Text n) {
+ if (n == null) {
+ return null;
+ }
+
+ String str = n.toString();
+ String md5Hex = DigestUtils.md5Hex(str);
+
+ result.set(md5Hex);
+ return result;
+ }
+
+ /**
+ * Convert bytes to md5
+ */
+ public Text evaluate(BytesWritable b) {
+ if (b == null) {
+ return null;
+ }
+
+ byte[] bytes = copyBytes(b);
+ String md5Hex = DigestUtils.md5Hex(bytes);
+
+ result.set(md5Hex);
+ return result;
+ }
+
+ protected byte[] copyBytes(BytesWritable b) {
+ int size = b.getLength();
+ byte[] result = new byte[size];
+ System.arraycopy(b.getBytes(), 0, result, 0, size);
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9803344b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFMd5.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFMd5.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFMd5.java
new file mode 100644
index 0000000..715e987
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFMd5.java
@@ -0,0 +1,57 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+
+public class TestUDFMd5 extends TestCase {
+
+ public void testMD5Str() throws HiveException {
+ UDFMd5 udf = new UDFMd5();
+
+ runAndVerifyStr("ABC", "902fbdd2b1df0c4f70b4a5d23525e932", udf);
+ runAndVerifyStr("", "d41d8cd98f00b204e9800998ecf8427e", udf);
+ // null
+ runAndVerifyStr(null, null, udf);
+ }
+
+ public void testMD5Bin() throws HiveException {
+ UDFMd5 udf = new UDFMd5();
+
+ runAndVerifyBin(new byte[] { 65, 66, 67 }, "902fbdd2b1df0c4f70b4a5d23525e932", udf);
+ runAndVerifyBin(new byte[0], "d41d8cd98f00b204e9800998ecf8427e", udf);
+ // null
+ runAndVerifyBin(null, null, udf);
+ }
+
+ private void runAndVerifyStr(String str, String expResult, UDFMd5 udf) throws HiveException {
+ Text t = str != null ? new Text(str) : null;
+ Text output = (Text) udf.evaluate(t);
+ assertEquals("md5() test ", expResult, output != null ? output.toString() : null);
+ }
+
+ private void runAndVerifyBin(byte[] binV, String expResult, UDFMd5 udf) throws HiveException {
+ BytesWritable binWr = binV != null ? new BytesWritable(binV) : null;
+ Text output = (Text) udf.evaluate(binWr);
+ assertEquals("md5() test ", expResult, output != null ? output.toString() : null);
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/9803344b/ql/src/test/queries/clientpositive/udf_md5.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_md5.q b/ql/src/test/queries/clientpositive/udf_md5.q
new file mode 100644
index 0000000..c22417a
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_md5.q
@@ -0,0 +1,13 @@
+DESCRIBE FUNCTION md5;
+DESC FUNCTION EXTENDED md5;
+
+explain select md5('ABC');
+
+select
+md5('ABC'),
+md5(''),
+md5(binary('ABC')),
+md5(binary('')),
+md5(cast(null as string)),
+md5(cast(null as binary)),
+md5(null);
http://git-wip-us.apache.org/repos/asf/hive/blob/9803344b/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index ffc32c8..a422760 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -123,6 +123,7 @@ map_keys
map_values
matchpath
max
+md5
min
minute
month
@@ -327,6 +328,7 @@ map_keys
map_values
matchpath
max
+md5
min
minute
month
http://git-wip-us.apache.org/repos/asf/hive/blob/9803344b/ql/src/test/results/clientpositive/udf_md5.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_md5.q.out b/ql/src/test/results/clientpositive/udf_md5.q.out
new file mode 100644
index 0000000..01744fe
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_md5.q.out
@@ -0,0 +1,61 @@
+PREHOOK: query: DESCRIBE FUNCTION md5
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION md5
+POSTHOOK: type: DESCFUNCTION
+md5(str or bin) - Calculates an MD5 128-bit checksum for the string or binary.
+PREHOOK: query: DESC FUNCTION EXTENDED md5
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESC FUNCTION EXTENDED md5
+POSTHOOK: type: DESCFUNCTION
+md5(str or bin) - Calculates an MD5 128-bit checksum for the string or binary.
+The value is returned as a string of 32 hex digits, or NULL if the argument was NULL.
+Example:
+ > SELECT md5('ABC');
+ '902fbdd2b1df0c4f70b4a5d23525e932'
+ > SELECT md5(binary('ABC'));
+ '902fbdd2b1df0c4f70b4a5d23525e932'
+PREHOOK: query: explain select md5('ABC')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select md5('ABC')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ expressions: '902fbdd2b1df0c4f70b4a5d23525e932' (type: string)
+ outputColumnNames: _col0
+ Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: select
+md5('ABC'),
+md5(''),
+md5(binary('ABC')),
+md5(binary('')),
+md5(cast(null as string)),
+md5(cast(null as binary)),
+md5(null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select
+md5('ABC'),
+md5(''),
+md5(binary('ABC')),
+md5(binary('')),
+md5(cast(null as string)),
+md5(cast(null as binary)),
+md5(null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+902fbdd2b1df0c4f70b4a5d23525e932 d41d8cd98f00b204e9800998ecf8427e 902fbdd2b1df0c4f70b4a5d23525e932 d41d8cd98f00b204e9800998ecf8427e NULL NULL NULL