You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@spark.apache.org by hv...@apache.org on 2018/03/19 08:41:47 UTC
spark git commit: [SPARK-23599][SQL] Add a UUID generator from
Pseudo-Random Numbers
Repository: spark
Updated Branches:
refs/heads/master 745c8c090 -> 4de638c19
[SPARK-23599][SQL] Add a UUID generator from Pseudo-Random Numbers
## What changes were proposed in this pull request?
This patch adds a UUID generator from Pseudo-Random Numbers. We can use it later to have deterministic `UUID()` expression.
## How was this patch tested?
Added unit tests.
Author: Liang-Chi Hsieh <vi...@gmail.com>
Closes #20817 from viirya/SPARK-23599.
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4de638c1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4de638c1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4de638c1
Branch: refs/heads/master
Commit: 4de638c1976dea74761bbe5c30da808178ee885d
Parents: 745c8c0
Author: Liang-Chi Hsieh <vi...@gmail.com>
Authored: Mon Mar 19 09:41:43 2018 +0100
Committer: Herman van Hovell <hv...@databricks.com>
Committed: Mon Mar 19 09:41:43 2018 +0100
----------------------------------------------------------------------
.../sql/catalyst/util/RandomUUIDGenerator.scala | 43 +++++++++++++++
.../util/RandomUUIDGeneratorSuite.scala | 57 ++++++++++++++++++++
2 files changed, 100 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/4de638c1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGenerator.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGenerator.scala
new file mode 100644
index 0000000..4fe07a0
--- /dev/null
+++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGenerator.scala
@@ -0,0 +1,43 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import java.util.UUID
+
+import org.apache.commons.math3.random.MersenneTwister
+
+import org.apache.spark.unsafe.types.UTF8String
+
+/**
+ * This class is used to generate a UUID from Pseudo-Random Numbers.
+ *
+ * For the algorithm, see RFC 4122: A Universally Unique IDentifier (UUID) URN Namespace,
+ * section 4.4 "Algorithms for Creating a UUID from Truly Random or Pseudo-Random Numbers".
+ */
+case class RandomUUIDGenerator(randomSeed: Long) {
+ private val random = new MersenneTwister(randomSeed)
+
+ def getNextUUID(): UUID = {
+ val mostSigBits = (random.nextLong() & 0xFFFFFFFFFFFF0FFFL) | 0x0000000000004000L
+ val leastSigBits = (random.nextLong() | 0x8000000000000000L) & 0xBFFFFFFFFFFFFFFFL
+
+ new UUID(mostSigBits, leastSigBits)
+ }
+
+ def getNextUUIDUTF8String(): UTF8String = UTF8String.fromString(getNextUUID().toString())
+}
http://git-wip-us.apache.org/repos/asf/spark/blob/4de638c1/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGeneratorSuite.scala
----------------------------------------------------------------------
diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGeneratorSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGeneratorSuite.scala
new file mode 100644
index 0000000..b75739e
--- /dev/null
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/RandomUUIDGeneratorSuite.scala
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import scala.util.Random
+
+import org.apache.spark.SparkFunSuite
+
+class RandomUUIDGeneratorSuite extends SparkFunSuite {
+ test("RandomUUIDGenerator should generate version 4, variant 2 UUIDs") {
+ val generator = RandomUUIDGenerator(new Random().nextLong())
+ for (_ <- 0 to 100) {
+ val uuid = generator.getNextUUID()
+ assert(uuid.version() == 4)
+ assert(uuid.variant() == 2)
+ }
+ }
+
+ test("UUID from RandomUUIDGenerator should be deterministic") {
+ val r1 = new Random(100)
+ val generator1 = RandomUUIDGenerator(r1.nextLong())
+ val r2 = new Random(100)
+ val generator2 = RandomUUIDGenerator(r2.nextLong())
+ val r3 = new Random(101)
+ val generator3 = RandomUUIDGenerator(r3.nextLong())
+
+ for (_ <- 0 to 100) {
+ val uuid1 = generator1.getNextUUID()
+ val uuid2 = generator2.getNextUUID()
+ val uuid3 = generator3.getNextUUID()
+ assert(uuid1 == uuid2)
+ assert(uuid1 != uuid3)
+ }
+ }
+
+ test("Get UTF8String UUID") {
+ val generator = RandomUUIDGenerator(new Random().nextLong())
+ val utf8StringUUID = generator.getNextUUIDUTF8String()
+ val uuid = java.util.UUID.fromString(utf8StringUUID.toString)
+ assert(uuid.version() == 4 && uuid.variant() == 2 && utf8StringUUID.toString == uuid.toString)
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@spark.apache.org
For additional commands, e-mail: commits-help@spark.apache.org