You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by da...@apache.org on 2015/07/02 07:19:59 UTC
spark git commit: [SPARK-8227] [SQL] Add function unhex
Repository: spark
Updated Branches:
refs/heads/master 4e4f74b5e -> b285ac5ba
[SPARK-8227] [SQL] Add function unhex
cc chenghao-intel adrian-wang
Author: zhichao.li <zh...@intel.com>
Closes #7113 from zhichao-li/unhex and squashes the following commits:
379356e [zhichao.li] remove exception checking
a4ae6dc [zhichao.li] add udf_unhex to whitelist
fe5c14a [zhichao.li] add todigit
607d7a3 [zhichao.li] use checkInputTypes
bffd37f [zhichao.li] change to use Hex in apache common package
cde73f5 [zhichao.li] update to use AutoCastInputTypes
11945c7 [zhichao.li] style
c852d46 [zhichao.li] Add function unhex
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b285ac5b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b285ac5b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b285ac5b
Branch: refs/heads/master
Commit: b285ac5ba85fe0b32b00726ad7d3a2efb602e885
Parents: 4e4f74b
Author: zhichao.li <zh...@intel.com>
Authored: Wed Jul 1 22:19:51 2015 -0700
Committer: Davies Liu <da...@databricks.com>
Committed: Wed Jul 1 22:19:51 2015 -0700
----------------------------------------------------------------------
.../catalyst/analysis/FunctionRegistry.scala | 1 +
.../spark/sql/catalyst/expressions/math.scala | 52 ++++++++++++++++++++
.../expressions/MathFunctionsSuite.scala | 6 +++
.../scala/org/apache/spark/sql/functions.scala | 18 +++++++
.../apache/spark/sql/MathExpressionsSuite.scala | 10 ++++
.../hive/execution/HiveCompatibilitySuite.scala | 1 +
6 files changed, 88 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/b285ac5b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index d53eaed..6f04298 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -157,6 +157,7 @@ object FunctionRegistry {
expression[Substring]("substr"),
expression[Substring]("substring"),
expression[Upper]("ucase"),
+ expression[UnHex]("unhex"),
expression[Upper]("upper")
)
http://git-wip-us.apache.org/repos/asf/spark/blob/b285ac5b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
index b51318d..8633eb0 100644
--- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/math.scala
@@ -351,6 +351,58 @@ case class Pow(left: Expression, right: Expression)
}
}
+/**
+ * Performs the inverse operation of HEX.
+ * Resulting characters are returned as a byte array.
+ */
+case class UnHex(child: Expression) extends UnaryExpression with Serializable {
+
+ override def dataType: DataType = BinaryType
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ if (child.dataType.isInstanceOf[StringType] || child.dataType == NullType) {
+ TypeCheckResult.TypeCheckSuccess
+ } else {
+ TypeCheckResult.TypeCheckFailure(s"unHex accepts String type, not ${child.dataType}")
+ }
+ }
+
+ override def eval(input: InternalRow): Any = {
+ val num = child.eval(input)
+ if (num == null) {
+ null
+ } else {
+ unhex(num.asInstanceOf[UTF8String].getBytes)
+ }
+ }
+
+ private val unhexDigits = {
+ val array = Array.fill[Byte](128)(-1)
+ (0 to 9).foreach(i => array('0' + i) = i.toByte)
+ (0 to 5).foreach(i => array('A' + i) = (i + 10).toByte)
+ (0 to 5).foreach(i => array('a' + i) = (i + 10).toByte)
+ array
+ }
+
+ private def unhex(inputBytes: Array[Byte]): Array[Byte] = {
+ var bytes = inputBytes
+ if ((bytes.length & 0x01) != 0) {
+ bytes = '0'.toByte +: bytes
+ }
+ val out = new Array[Byte](bytes.length >> 1)
+ // two characters form the hex value.
+ var i = 0
+ while (i < bytes.length) {
+ val first = unhexDigits(bytes(i))
+ val second = unhexDigits(bytes(i + 1))
+ if (first == -1 || second == -1) { return null}
+ out(i / 2) = (((first << 4) | second) & 0xFF).toByte
+ i += 2
+ }
+ out
+ }
+}
+
case class Hypot(left: Expression, right: Expression)
extends BinaryMathExpression(math.hypot, "HYPOT")
http://git-wip-us.apache.org/repos/asf/spark/blob/b285ac5b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
index b932d4a..b3345d7 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/MathFunctionsSuite.scala
@@ -238,6 +238,12 @@ class MathFunctionsSuite extends SparkFunSuite with ExpressionEvalHelper {
// scalastyle:on
}
+ test("unhex") {
+ checkEvaluation(UnHex(Literal("737472696E67")), "string".getBytes)
+ checkEvaluation(UnHex(Literal("")), new Array[Byte](0))
+ checkEvaluation(UnHex(Literal("0")), Array[Byte](0))
+ }
+
test("hypot") {
testBinary(Hypot, math.hypot)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/b285ac5b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
index 4e8f3f9..e6f623b 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala
@@ -1054,6 +1054,24 @@ object functions {
def hex(colName: String): Column = hex(Column(colName))
/**
+ * Inverse of hex. Interprets each pair of characters as a hexadecimal number
+ * and converts to the byte representation of number.
+ *
+ * @group math_funcs
+ * @since 1.5.0
+ */
+ def unhex(column: Column): Column = UnHex(column.expr)
+
+ /**
+ * Inverse of hex. Interprets each pair of characters as a hexadecimal number
+ * and converts to the byte representation of number.
+ *
+ * @group math_funcs
+ * @since 1.5.0
+ */
+ def unhex(colName: String): Column = unhex(Column(colName))
+
+ /**
* Computes `sqrt(a^2^ + b^2^)` without intermediate overflow or underflow.
*
* @group math_funcs
http://git-wip-us.apache.org/repos/asf/spark/blob/b285ac5b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
----------------------------------------------------------------------
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
index d6331aa..c03cde3 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/MathExpressionsSuite.scala
@@ -225,6 +225,16 @@ class MathExpressionsSuite extends QueryTest {
checkAnswer(data.selectExpr("hex(cast(d as binary))"), Seq(Row("68656C6C6F")))
}
+ test("unhex") {
+ val data = Seq(("1C", "737472696E67")).toDF("a", "b")
+ checkAnswer(data.select(unhex('a)), Row(Array[Byte](28.toByte)))
+ checkAnswer(data.select(unhex('b)), Row("string".getBytes))
+ checkAnswer(data.selectExpr("unhex(a)"), Row(Array[Byte](28.toByte)))
+ checkAnswer(data.selectExpr("unhex(b)"), Row("string".getBytes))
+ checkAnswer(data.selectExpr("""unhex("##")"""), Row(null))
+ checkAnswer(data.selectExpr("""unhex("G123")"""), Row(null))
+ }
+
test("hypot") {
testTwoToOneMathFunction(hypot, hypot, math.hypot)
}
http://git-wip-us.apache.org/repos/asf/spark/blob/b285ac5b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
----------------------------------------------------------------------
diff --git a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
index f88e627..415a816 100644
--- a/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
+++ b/sql/hive/compatibility/src/test/scala/org/apache/spark/sql/hive/execution/HiveCompatibilitySuite.scala
@@ -949,6 +949,7 @@ class HiveCompatibilitySuite extends HiveQueryFileTest with BeforeAndAfter {
"udf_trim",
"udf_ucase",
"udf_unix_timestamp",
+ "udf_unhex",
"udf_upper",
"udf_var_pop",
"udf_var_samp",
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org