You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by rs...@apache.org on 2021/04/16 11:55:51 UTC
[avro] branch master updated: AVRO-3065: Introduce UUID logical
type to Python implementation (#1148)
This is an automated email from the ASF dual-hosted git repository.
rskraba pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git
The following commit(s) were added to refs/heads/master by this push:
new 0c9a60e AVRO-3065: Introduce UUID logical type to Python implementation (#1148)
0c9a60e is described below
commit 0c9a60eba295adf9b92f785f204b5b2875ef5ac5
Author: Subhash Bhushan <su...@gmail.com>
AuthorDate: Fri Apr 16 04:55:38 2021 -0700
AVRO-3065: Introduce UUID logical type to Python implementation (#1148)
* AVRO-3065 Introduce UUID logical type to Python implementation
This PR introduces the UUID logical type implementation that was missing in the
primary python implementation. A new `UUIDSchema` has been introduced, and test
cases for schema and io have been updated.
Closes: https://issues.apache.org/jira/browse/AVRO-3065
* Fix lint issue in schema.py
* AVRO-3065 Accept UUID values of different versions
This commit changes UUID validation to accept UUID values of any
version, instead of being locked to version 4.
Also, return `None` on failed validation instead of `False`, for
consistency with others.
---
lang/py/avro/constants.py | 4 +++-
lang/py/avro/schema.py | 29 +++++++++++++++++++++++++++++
lang/py/avro/test/test_io.py | 3 ++-
lang/py/avro/test/test_schema.py | 8 +++++---
4 files changed, 39 insertions(+), 5 deletions(-)
diff --git a/lang/py/avro/constants.py b/lang/py/avro/constants.py
index 3a6fbf6..21a7dd1 100644
--- a/lang/py/avro/constants.py
+++ b/lang/py/avro/constants.py
@@ -28,6 +28,7 @@ TIMESTAMP_MICROS = "timestamp-micros"
TIMESTAMP_MILLIS = "timestamp-millis"
TIME_MICROS = "time-micros"
TIME_MILLIS = "time-millis"
+UUID = "uuid"
SUPPORTED_LOGICAL_TYPE = [
DATE,
@@ -35,5 +36,6 @@ SUPPORTED_LOGICAL_TYPE = [
TIMESTAMP_MICROS,
TIMESTAMP_MILLIS,
TIME_MICROS,
- TIME_MILLIS
+ TIME_MILLIS,
+ UUID
]
diff --git a/lang/py/avro/schema.py b/lang/py/avro/schema.py
index e4021fa..7999d0f 100644
--- a/lang/py/avro/schema.py
+++ b/lang/py/avro/schema.py
@@ -48,6 +48,7 @@ import json
import math
import re
import sys
+import uuid
import warnings
import avro.constants
@@ -1183,6 +1184,33 @@ class TimestampMicrosSchema(LogicalSchema, PrimitiveSchema):
def __eq__(self, that):
return self.props == that.props
+
+#
+# uuid Type
+#
+
+
+class UUIDSchema(LogicalSchema, PrimitiveSchema):
+ def __init__(self, other_props=None):
+ LogicalSchema.__init__(self, avro.constants.UUID)
+ PrimitiveSchema.__init__(self, 'string', other_props)
+
+ def to_json(self, names=None):
+ return self.props
+
+ def validate(self, datum):
+ try:
+ val = uuid.UUID(datum)
+ except ValueError:
+ # If it's a value error, then the string
+ # is not a valid hex code for a UUID.
+ return None
+
+ return self
+
+ def __eq__(self, that):
+ return self.props == that.props
+
#
# Module Methods
#
@@ -1213,6 +1241,7 @@ def make_logical_schema(logical_type, type_, other_props):
(avro.constants.TIMESTAMP_MILLIS, 'long'): TimestampMillisSchema,
(avro.constants.TIME_MICROS, 'long'): TimeMicrosSchema,
(avro.constants.TIME_MILLIS, 'int'): TimeMillisSchema,
+ (avro.constants.UUID, 'string'): UUIDSchema,
}
try:
schema_type = logical_types.get((logical_type, type_), None)
diff --git a/lang/py/avro/test/test_io.py b/lang/py/avro/test/test_io.py
index 750afa4..4ca8800 100644
--- a/lang/py/avro/test/test_io.py
+++ b/lang/py/avro/test/test_io.py
@@ -80,7 +80,8 @@ SCHEMAS_TO_VALIDATE = (
'{"type": "long", "logicalType": "timestamp-micros"}',
datetime.datetime(2000, 1, 18, 2, 2, 1, 123499, tzinfo=avro.timezones.tst)
),
- ('{"type": "string", "logicalType": "uuid"}', u'12345abcd'),
+ ('{"type": "string", "logicalType": "uuid"}', u'a4818e1c-8e59-11eb-8dcd-0242ac130003'), # UUID1
+ ('{"type": "string", "logicalType": "uuid"}', u'570feebe-2bbc-4937-98df-285944e1dbbd'), # UUID4
('{"type": "string", "logicalType": "unknown-logical-type"}', u'12345abcd'),
('{"type": "string", "logicalType": "timestamp-millis"}', u'12345abcd'),
("""\
diff --git a/lang/py/avro/test/test_schema.py b/lang/py/avro/test/test_schema.py
index fceb973..6e5516a 100644
--- a/lang/py/avro/test/test_schema.py
+++ b/lang/py/avro/test/test_schema.py
@@ -242,11 +242,12 @@ TIMESTAMPMICROS_LOGICAL_TYPE = [
ValidTestSchema({"type": "long", "logicalType": "timestamp-micros"})
]
+UUID_LOGICAL_TYPE = [
+ ValidTestSchema({"type": "string", "logicalType": "uuid"})
+]
+
IGNORED_LOGICAL_TYPE = [
ValidTestSchema(
- {"type": "string", "logicalType": "uuid"},
- warnings=[avro.errors.IgnoredLogicalType('Unknown uuid, using string.')]),
- ValidTestSchema(
{"type": "string", "logicalType": "unknown-logical-type"},
warnings=[avro.errors.IgnoredLogicalType('Unknown unknown-logical-type, using string.')]),
ValidTestSchema(
@@ -326,6 +327,7 @@ EXAMPLES += TIMEMILLIS_LOGICAL_TYPE
EXAMPLES += TIMEMICROS_LOGICAL_TYPE
EXAMPLES += TIMESTAMPMILLIS_LOGICAL_TYPE
EXAMPLES += TIMESTAMPMICROS_LOGICAL_TYPE
+EXAMPLES += UUID_LOGICAL_TYPE
EXAMPLES += IGNORED_LOGICAL_TYPE
VALID_EXAMPLES = [e for e in EXAMPLES if getattr(e, "valid", False)]