You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by rs...@apache.org on 2021/04/16 11:55:51 UTC

[avro] branch master updated: AVRO-3065: Introduce UUID logical type to Python implementation (#1148)

This is an automated email from the ASF dual-hosted git repository.

rskraba pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new 0c9a60e  AVRO-3065: Introduce UUID logical type to Python implementation (#1148)
0c9a60e is described below

commit 0c9a60eba295adf9b92f785f204b5b2875ef5ac5
Author: Subhash Bhushan <su...@gmail.com>
AuthorDate: Fri Apr 16 04:55:38 2021 -0700

    AVRO-3065: Introduce UUID logical type to Python implementation (#1148)
    
    * AVRO-3065 Introduce UUID logical type to Python implementation
    
    This PR introduces the UUID logical type implementation that was missing in the
    primary python implementation. A new `UUIDSchema` has been introduced, and test
    cases for schema and io have been updated.
    
    Closes: https://issues.apache.org/jira/browse/AVRO-3065
    
    * Fix lint issue in schema.py
    
    * AVRO-3065 Accept UUID values of different versions
    
    This commit changes UUID validation to accept UUID values of any
    version, instead of being locked to version 4.
    
    Also, return `None` on failed validation instead of `False`, for
    consistency with others.
---
 lang/py/avro/constants.py        |  4 +++-
 lang/py/avro/schema.py           | 29 +++++++++++++++++++++++++++++
 lang/py/avro/test/test_io.py     |  3 ++-
 lang/py/avro/test/test_schema.py |  8 +++++---
 4 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/lang/py/avro/constants.py b/lang/py/avro/constants.py
index 3a6fbf6..21a7dd1 100644
--- a/lang/py/avro/constants.py
+++ b/lang/py/avro/constants.py
@@ -28,6 +28,7 @@ TIMESTAMP_MICROS = "timestamp-micros"
 TIMESTAMP_MILLIS = "timestamp-millis"
 TIME_MICROS = "time-micros"
 TIME_MILLIS = "time-millis"
+UUID = "uuid"
 
 SUPPORTED_LOGICAL_TYPE = [
     DATE,
@@ -35,5 +36,6 @@ SUPPORTED_LOGICAL_TYPE = [
     TIMESTAMP_MICROS,
     TIMESTAMP_MILLIS,
     TIME_MICROS,
-    TIME_MILLIS
+    TIME_MILLIS,
+    UUID
 ]
diff --git a/lang/py/avro/schema.py b/lang/py/avro/schema.py
index e4021fa..7999d0f 100644
--- a/lang/py/avro/schema.py
+++ b/lang/py/avro/schema.py
@@ -48,6 +48,7 @@ import json
 import math
 import re
 import sys
+import uuid
 import warnings
 
 import avro.constants
@@ -1183,6 +1184,33 @@ class TimestampMicrosSchema(LogicalSchema, PrimitiveSchema):
     def __eq__(self, that):
         return self.props == that.props
 
+
+#
+# uuid Type
+#
+
+
+class UUIDSchema(LogicalSchema, PrimitiveSchema):
+    def __init__(self, other_props=None):
+        LogicalSchema.__init__(self, avro.constants.UUID)
+        PrimitiveSchema.__init__(self, 'string', other_props)
+
+    def to_json(self, names=None):
+        return self.props
+
+    def validate(self, datum):
+        try:
+            val = uuid.UUID(datum)
+        except ValueError:
+            # If it's a value error, then the string
+            # is not a valid hex code for a UUID.
+            return None
+
+        return self
+
+    def __eq__(self, that):
+        return self.props == that.props
+
 #
 # Module Methods
 #
@@ -1213,6 +1241,7 @@ def make_logical_schema(logical_type, type_, other_props):
         (avro.constants.TIMESTAMP_MILLIS, 'long'): TimestampMillisSchema,
         (avro.constants.TIME_MICROS, 'long'): TimeMicrosSchema,
         (avro.constants.TIME_MILLIS, 'int'): TimeMillisSchema,
+        (avro.constants.UUID, 'string'): UUIDSchema,
     }
     try:
         schema_type = logical_types.get((logical_type, type_), None)
diff --git a/lang/py/avro/test/test_io.py b/lang/py/avro/test/test_io.py
index 750afa4..4ca8800 100644
--- a/lang/py/avro/test/test_io.py
+++ b/lang/py/avro/test/test_io.py
@@ -80,7 +80,8 @@ SCHEMAS_TO_VALIDATE = (
         '{"type": "long", "logicalType": "timestamp-micros"}',
         datetime.datetime(2000, 1, 18, 2, 2, 1, 123499, tzinfo=avro.timezones.tst)
     ),
-    ('{"type": "string", "logicalType": "uuid"}', u'12345abcd'),
+    ('{"type": "string", "logicalType": "uuid"}', u'a4818e1c-8e59-11eb-8dcd-0242ac130003'),  # UUID1
+    ('{"type": "string", "logicalType": "uuid"}', u'570feebe-2bbc-4937-98df-285944e1dbbd'),  # UUID4
     ('{"type": "string", "logicalType": "unknown-logical-type"}', u'12345abcd'),
     ('{"type": "string", "logicalType": "timestamp-millis"}', u'12345abcd'),
     ("""\
diff --git a/lang/py/avro/test/test_schema.py b/lang/py/avro/test/test_schema.py
index fceb973..6e5516a 100644
--- a/lang/py/avro/test/test_schema.py
+++ b/lang/py/avro/test/test_schema.py
@@ -242,11 +242,12 @@ TIMESTAMPMICROS_LOGICAL_TYPE = [
     ValidTestSchema({"type": "long", "logicalType": "timestamp-micros"})
 ]
 
+UUID_LOGICAL_TYPE = [
+    ValidTestSchema({"type": "string", "logicalType": "uuid"})
+]
+
 IGNORED_LOGICAL_TYPE = [
     ValidTestSchema(
-        {"type": "string", "logicalType": "uuid"},
-        warnings=[avro.errors.IgnoredLogicalType('Unknown uuid, using string.')]),
-    ValidTestSchema(
         {"type": "string", "logicalType": "unknown-logical-type"},
         warnings=[avro.errors.IgnoredLogicalType('Unknown unknown-logical-type, using string.')]),
     ValidTestSchema(
@@ -326,6 +327,7 @@ EXAMPLES += TIMEMILLIS_LOGICAL_TYPE
 EXAMPLES += TIMEMICROS_LOGICAL_TYPE
 EXAMPLES += TIMESTAMPMILLIS_LOGICAL_TYPE
 EXAMPLES += TIMESTAMPMICROS_LOGICAL_TYPE
+EXAMPLES += UUID_LOGICAL_TYPE
 EXAMPLES += IGNORED_LOGICAL_TYPE
 
 VALID_EXAMPLES = [e for e in EXAMPLES if getattr(e, "valid", False)]