You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by sp...@apache.org on 2015/05/20 18:01:45 UTC

[26/50] [abbrv] hive git commit: HIVE-10641 create CRC32 UDF (Alexander Pivovarov, reviewed by Jason Dere)

HIVE-10641 create CRC32 UDF (Alexander Pivovarov, reviewed by Jason Dere)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f82c0c20
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f82c0c20
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f82c0c20

Branch: refs/heads/parquet
Commit: f82c0c20c5038f54b4a24561c943ca646a272d18
Parents: 84a2235
Author: Alexander Pivovarov <ap...@gmail.com>
Authored: Wed May 6 23:14:29 2015 -0700
Committer: Alexander Pivovarov <ap...@gmail.com>
Committed: Fri May 15 10:27:26 2015 -0700

----------------------------------------------------------------------
 .../hadoop/hive/ql/exec/FunctionRegistry.java   |  2 +
 .../org/apache/hadoop/hive/ql/udf/UDFCrc32.java | 75 ++++++++++++++++++++
 .../apache/hadoop/hive/ql/udf/TestUDFCrc32.java | 74 +++++++++++++++++++
 ql/src/test/queries/clientpositive/udf_crc32.q  | 13 ++++
 .../results/clientpositive/show_functions.q.out |  2 +
 .../test/results/clientpositive/udf_crc32.q.out | 60 ++++++++++++++++
 6 files changed, 226 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 606185c..7ce0a1c 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -50,6 +50,7 @@ import org.apache.hadoop.hive.ql.udf.UDFBase64;
 import org.apache.hadoop.hive.ql.udf.UDFBin;
 import org.apache.hadoop.hive.ql.udf.UDFConv;
 import org.apache.hadoop.hive.ql.udf.UDFCos;
+import org.apache.hadoop.hive.ql.udf.UDFCrc32;
 import org.apache.hadoop.hive.ql.udf.UDFDayOfMonth;
 import org.apache.hadoop.hive.ql.udf.UDFDegrees;
 import org.apache.hadoop.hive.ql.udf.UDFE;
@@ -219,6 +220,7 @@ public final class FunctionRegistry {
     system.registerUDF("tan", UDFTan.class, false);
     system.registerUDF("e", UDFE.class, false);
     system.registerGenericUDF("factorial", GenericUDFFactorial.class);
+    system.registerUDF("crc32", UDFCrc32.class, false);
 
     system.registerUDF("conv", UDFConv.class, false);
     system.registerUDF("bin", UDFBin.class, false);

http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java
new file mode 100644
index 0000000..c1f0e38
--- /dev/null
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFCrc32.java
@@ -0,0 +1,75 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf;
+
+import java.util.zip.CRC32;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.exec.UDF;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+
+/**
+ * UDFCrc32.
+ *
+ */
+@Description(name = "crc32",
+    value = "_FUNC_(str or bin) - Computes a cyclic redundancy check value "
+    + "for string or binary argument and returns bigint value.",
+    extended = "Example:\n"
+    + "  > SELECT _FUNC_('ABC');\n"
+    + "  2743272264\n"
+    + "  > SELECT _FUNC_(binary('ABC'));\n"
+    + "  2743272264")
+public class UDFCrc32 extends UDF {
+
+  private final LongWritable result = new LongWritable();
+  private final CRC32 crc32 = new CRC32();
+
+  /**
+   * CRC32 for string
+   */
+  public LongWritable evaluate(Text n) {
+    if (n == null) {
+      return null;
+    }
+
+    crc32.reset();
+    crc32.update(n.getBytes(), 0, n.getLength());
+
+    result.set(crc32.getValue());
+    return result;
+  }
+
+  /**
+   * CRC32 for binary
+   */
+  public LongWritable evaluate(BytesWritable b) {
+    if (b == null) {
+      return null;
+    }
+
+    crc32.reset();
+    crc32.update(b.getBytes(), 0, b.getLength());
+
+    result.set(crc32.getValue());
+    return result;
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFCrc32.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFCrc32.java b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFCrc32.java
new file mode 100644
index 0000000..97a8672
--- /dev/null
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFCrc32.java
@@ -0,0 +1,74 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.Text;
+
+public class TestUDFCrc32 extends TestCase {
+
+  public void testCrc32Str() throws HiveException {
+    UDFCrc32 udf = new UDFCrc32();
+
+    runAndVerifyStr("ABC", 2743272264L, udf);
+    runAndVerifyStr("", 0L, udf);
+    // repeat again
+    runAndVerifyStr("ABC", 2743272264L, udf);
+    runAndVerifyStr("", 0L, udf);
+    // null
+    runAndVerifyStr(null, null, udf);
+  }
+
+  public void testCrc32Bin() throws HiveException {
+    UDFCrc32 udf = new UDFCrc32();
+
+    runAndVerifyBin(new byte[] { 65, 66, 67 }, 2743272264L, udf);
+    runAndVerifyBin(new byte[0], 0L, udf);
+    // repeat again
+    runAndVerifyBin(new byte[] { 65, 66, 67 }, 2743272264L, udf);
+    runAndVerifyBin(new byte[0], 0L, udf);
+    // null
+    runAndVerifyBin(null, null, udf);
+  }
+
+  private void runAndVerifyStr(String str, Long expResult, UDFCrc32 udf) throws HiveException {
+    Text t = str != null ? new Text(str) : null;
+    LongWritable output = (LongWritable) udf.evaluate(t);
+    if (expResult == null) {
+      assertNull(output);
+    } else {
+      assertNotNull(output);
+      assertEquals("crc32() test ", expResult.longValue(), output.get());
+    }
+  }
+
+  private void runAndVerifyBin(byte[] binV, Long expResult, UDFCrc32 udf) throws HiveException {
+    BytesWritable binWr = binV != null ? new BytesWritable(binV) : null;
+    LongWritable output = (LongWritable) udf.evaluate(binWr);
+    if (expResult == null) {
+      assertNull(output);
+    } else {
+      assertNotNull(output);
+      assertEquals("crc32() test ", expResult.longValue(), output.get());
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/test/queries/clientpositive/udf_crc32.q
----------------------------------------------------------------------
diff --git a/ql/src/test/queries/clientpositive/udf_crc32.q b/ql/src/test/queries/clientpositive/udf_crc32.q
new file mode 100644
index 0000000..650205e
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_crc32.q
@@ -0,0 +1,13 @@
+DESCRIBE FUNCTION crc32;
+DESC FUNCTION EXTENDED crc32;
+
+explain select crc32('ABC');
+
+select
+crc32('ABC'),
+crc32(''),
+crc32(binary('ABC')),
+crc32(binary('')),
+crc32(cast(null as string)),
+crc32(cast(null as binary)),
+crc32(null);

http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/test/results/clientpositive/show_functions.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/show_functions.q.out b/ql/src/test/results/clientpositive/show_functions.q.out
index 0948154..2c138a3 100644
--- a/ql/src/test/results/clientpositive/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/show_functions.q.out
@@ -50,6 +50,7 @@ cos
 count
 covar_pop
 covar_samp
+crc32
 create_union
 cume_dist
 current_database
@@ -244,6 +245,7 @@ cos
 count
 covar_pop
 covar_samp
+crc32
 create_union
 cume_dist
 current_database

http://git-wip-us.apache.org/repos/asf/hive/blob/f82c0c20/ql/src/test/results/clientpositive/udf_crc32.q.out
----------------------------------------------------------------------
diff --git a/ql/src/test/results/clientpositive/udf_crc32.q.out b/ql/src/test/results/clientpositive/udf_crc32.q.out
new file mode 100644
index 0000000..8280210
--- /dev/null
+++ b/ql/src/test/results/clientpositive/udf_crc32.q.out
@@ -0,0 +1,60 @@
+PREHOOK: query: DESCRIBE FUNCTION crc32
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION crc32
+POSTHOOK: type: DESCFUNCTION
+crc32(str or bin) - Computes a cyclic redundancy check value for string or binary argument and returns bigint value.
+PREHOOK: query: DESC FUNCTION EXTENDED crc32
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESC FUNCTION EXTENDED crc32
+POSTHOOK: type: DESCFUNCTION
+crc32(str or bin) - Computes a cyclic redundancy check value for string or binary argument and returns bigint value.
+Example:
+  > SELECT crc32('ABC');
+  2743272264
+  > SELECT crc32(binary('ABC'));
+  2743272264
+PREHOOK: query: explain select crc32('ABC')
+PREHOOK: type: QUERY
+POSTHOOK: query: explain select crc32('ABC')
+POSTHOOK: type: QUERY
+STAGE DEPENDENCIES:
+  Stage-0 is a root stage
+
+STAGE PLANS:
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        TableScan
+          alias: _dummy_table
+          Row Limit Per Split: 1
+          Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
+          Select Operator
+            expressions: 2743272264 (type: bigint)
+            outputColumnNames: _col0
+            Statistics: Num rows: 0 Data size: 1 Basic stats: PARTIAL Column stats: COMPLETE
+            ListSink
+
+PREHOOK: query: select
+crc32('ABC'),
+crc32(''),
+crc32(binary('ABC')),
+crc32(binary('')),
+crc32(cast(null as string)),
+crc32(cast(null as binary)),
+crc32(null)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+POSTHOOK: query: select
+crc32('ABC'),
+crc32(''),
+crc32(binary('ABC')),
+crc32(binary('')),
+crc32(cast(null as string)),
+crc32(cast(null as binary)),
+crc32(null)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+#### A masked pattern was here ####
+2743272264	0	2743272264	0	NULL	NULL	NULL