You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sp...@apache.org on 2015/05/20 18:01:45 UTC
[26/50] [abbrv] hive git commit: HIVE-10641 create CRC32 UDF
(Alexander Pivovarov, reviewed by Jason Dere)
HIVE-10641 create CRC32 UDF (Alexander Pivovarov, reviewed by Jason Dere)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f82c0c20
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f82c0c20
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f82c0c20
Branch: refs/heads/parquet
Commit: f82c0c20c5038f54b4a24561c943ca646a272d18
Parents: 84a2235
Author: Alexander Pivovarov <ap...@gmail.com>
Authored: Wed May 6 23:14:29 2015 -0700
Committer: Alexander Pivovarov <ap...@gmail.com>
Committed: Fri May 15 10:27:26 2015 -0700
----------------------------------------------------------------------
.../hadoop/hive/ql/exec/FunctionRegistry.java | 2 +
.../org/apache/hadoop/hive/ql/udf/UDFCrc32.java | 75 ++++++++++++++++++++
.../apache/hadoop/hive/ql/udf/TestUDFCrc32.java | 74 +++++++++++++++++++
ql/src/test/queries/clientpositive/udf_crc32.q | 13 ++++
.../results/clientpositive/show_functions.q.out | 2 +
.../test/results/clientpositive/udf_crc32.q.out | 60 ++++++++++++++++
6 files changed, 226 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 606185c..7ce0a1c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.udf.UDFBase64;
import org.apache.hadoop.hive.ql.udf.UDFBin;
import org.apache.hadoop.hive.ql.udf.UDFConv;
import org.apache.hadoop.hive.ql.udf.UDFCos;
+import org.apache.hadoop.hive.ql.udf.UDFCrc32;
import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
import org.apache.hadoop.hive.ql.udf.UDFDegrees;
import org.apache.hadoop.hive.ql.udf.UDFE;
@@ -219,6 +220,7 @@ public final class FunctionRegistry {
system.registerUDF("tan", UDFTan.class, false);
system.registerUDF("e", UDFE.class, false);
system.registerGenericUDF("factorial", GenericUDFFactorial.class);
+ system.registerUDF("crc32", UDFCrc32.class, false);
system.registerUDF("conv", UDFConv.class, false);
system.registerUDF("bin", UDFBin.class, false);
http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java
new file mode 100644
index 0000000..c1f0e38
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf;
+
+import java.util.zip.CRC32;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+
+/**
+ * UDFCrc32.
+ *
+ */
+@Description(name = "crc32",
+ value = "_FUNC_(str or bin) - Computes a cyclic redundancy check value "
+ + "for string or binary argument and returns bigint value.",
+ extended = "Example:\n"
+ + " > SELECT _FUNC_('ABC');\n"
+ + " 2743272264\n"
+ + " > SELECT _FUNC_(binary('ABC'));\n"
+ + " 2743272264")
+public class UDFCrc32 extends UDF {
+
+ private final LongWritable result = new LongWritable();
+ private final CRC32 crc32 = new CRC32();
+
+ /**
+ * CRC32 for string
+ */
+ public LongWritable evaluate(Text n) {
+ if (n == null) {
+ return null;
+ }
+
+ crc32.reset();
+ crc32.update(n.getBytes(), 0, n.getLength());
+
+ result.set(crc32.getValue());
+ return result;
+ }
+
+ /**
+ * CRC32 for binary
+ */
+ public LongWritable evaluate(BytesWritable b) {
+ if (b == null) {
+ return null;
+ }
+
+ crc32.reset();
+ crc32.update(b.getBytes(), 0, b.getLength());
+
+ result.set(crc32.getValue());
+ return result;
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFCrc32.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFCrc32.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFCrc32.java
new file mode 100644
index 0000000..97a8672
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFCrc32.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+
+public class TestUDFCrc32 extends TestCase {
+
+ public void testCrc32Str() throws HiveException {
+ UDFCrc32 udf = new UDFCrc32();
+
+ runAndVerifyStr("ABC", 2743272264L, udf);
+ runAndVerifyStr("", 0L, udf);
+ // repeat again
+ runAndVerifyStr("ABC", 2743272264L, udf);
+ runAndVerifyStr("", 0L, udf);
+ // null
+ runAndVerifyStr(null, null, udf);
+ }
+
+ public void testCrc32Bin() throws HiveException {
+ UDFCrc32 udf = new UDFCrc32();
+
+ runAndVerifyBin(new byte[] { 65, 66, 67 }, 2743272264L, udf);
+ runAndVerifyBin(new byte[0], 0L, udf);
+ // repeat again
+ runAndVerifyBin(new byte[] { 65, 66, 67 }, 2743272264L, udf);
+ runAndVerifyBin(new byte[0], 0L, udf);
+ // null
+ runAndVerifyBin(null, null, udf);
+ }
+
+ private void runAndVerifyStr(String str, Long expResult, UDFCrc32 udf) throws HiveException {
+ Text t = str != null ? new Text(str) : null;
+ LongWritable output = (LongWritable) udf.evaluate(t);
+ if (expResult == null) {
+ assertNull(output);
+ } else {
+ assertNotNull(output);
+ assertEquals("crc32() test ", expResult.longValue(), output.get());
+ }
+ }
+
+ private void runAndVerifyBin(byte[] binV, Long expResult, UDFCrc32 udf) throws HiveException {
+ BytesWritable binWr = binV != null ? new BytesWritable(binV) : null;
+ LongWritable output = (LongWritable) udf.evaluate(binWr);
+ if (expResult == null) {
+ assertNull(output);
+ } else {
+ assertNotNull(output);
+ assertEquals("crc32() test ", expResult.longValue(), output.get());
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/test/queries/clientpositive/udf_crc32.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_crc32.q b/ql/src/test/queries/clientpositive/udf_crc32.q
new file mode 100644
index 0000000..650205e
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_crc32.q
@@ -0,0 +1,13 @@
+DESCRIBE FUNCTION crc32;
+DESC FUNCTION EXTENDED crc32;
+
+explain select crc32('ABC');
+
+select
+crc32('ABC'),
+crc32(''),
+crc32(binary('ABC')),
+crc32(binary('')),
+crc32(cast(null as string)),
+crc32(cast(null as binary)),
+crc32(null);
http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index 0948154..2c138a3 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -50,6 +50,7 @@ cos
count
covar_pop
covar_samp
+crc32
create_union
cume_dist
current_database
@@ -244,6 +245,7 @@ cos
count
covar_pop
covar_samp
+crc32
create_union
cume_dist
current_database
http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/test/results/clientpositive/udf_crc32.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_crc32.q.out b/ql/src/test/results/clientpositive/udf_crc32.q.out
new file mode 100644
index 0000000..8280210
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_crc32.q.out
@@ -0,0 +1,60 @@
+PREHOOK: query: DESCRIBE FUNCTION crc32
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION crc32
+POSTHOOK: type: DESCFUNCTION
+crc32(str or bin) - Computes a cyclic redundancy check value for string or binary argument and returns bigint value.
+PREHOOK: query: DESC FUNCTION EXTENDED crc32
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESC FUNCTION EXTENDED crc32
+POSTHOOK: type: DESCFUNCTION
+crc32(str or bin) - Computes a cyclic redundancy check value for string or binary argument and returns bigint value.
+Example:
+ > SELECT crc32('ABC');
+ 2743272264
+ > SELECT crc32(binary('ABC'));
+ 2743272264
+PREHOOK: query: explain select crc32('ABC')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select crc32('ABC')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
+ Select Operator
+ expressions: 2743272264 (type: bigint)
+ outputColumnNames: _col0
+ Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
+ ListSink
+
+PREHOOK: query: select
+crc32('ABC'),
+crc32(''),
+crc32(binary('ABC')),
+crc32(binary('')),
+crc32(cast(null as string)),
+crc32(cast(null as binary)),
+crc32(null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select
+crc32('ABC'),
+crc32(''),
+crc32(binary('ABC')),
+crc32(binary('')),
+crc32(cast(null as string)),
+crc32(cast(null as binary)),
+crc32(null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+2743272264 0 2743272264 0 NULL NULL NULL