You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@superset.apache.org by GitBox <gi...@apache.org> on 2022/11/03 19:16:42 UTC

[GitHub] [superset] john-bodley commented on a diff in pull request #22029: fix: JSON serializers

john-bodley commented on code in PR #22029:
URL: https://github.com/apache/superset/pull/22029#discussion_r1013307678


##########
superset/utils/core.py:
##########
@@ -581,47 +588,60 @@ def base_json_conv(  # pylint: disable=inconsistent-return-statements
         except Exception:  # pylint: disable=broad-except
             return "[bytes]"
 
+    raise TypeError(f"Unserializable object {obj} of type {type(obj)}")
+
 
-def json_iso_dttm_ser(obj: Any, pessimistic: bool = False) -> str:
+def json_iso_dttm_ser(obj: Any, pessimistic: bool = False) -> Any:
     """
-    json serializer that deals with dates
+    A JSON serializer that deals with dates by serializing them to ISO 8601.
 
-    >>> dttm = datetime(1970, 1, 1)
-    >>> json.dumps({'dttm': dttm}, default=json_iso_dttm_ser)
-    '{"dttm": "1970-01-01T00:00:00"}'
+        >>> json.dumps({'dttm': datetime(1970, 1, 1)}, default=json_iso_dttm_ser)
+        '{"dttm": "1970-01-01T00:00:00"}'
+
+    :param obj: The serializable object
+    :param pessimistic: Whether to be pessimistic regarding serialization
+    :returns: The JSON compatible form
+    :raises TypeError: If the non-pessimistic object cannot be serialized
     """
-    val = base_json_conv(obj)
-    if val is not None:
-        return val
+
     if isinstance(obj, (datetime, date, pd.Timestamp)):
-        obj = obj.isoformat()
-    else:
+        return obj.isoformat()
+
+    try:
+        return base_json_conv(obj)
+    except TypeError as ex:
         if pessimistic:
-            return "Unserializable [{}]".format(type(obj))
+            return f"Unserializable [{type(obj)}]"
 
-        raise TypeError("Unserializable object {} of type {}".format(obj, type(obj)))
-    return obj
+        raise ex
 
 
-def pessimistic_json_iso_dttm_ser(obj: Any) -> str:
+def pessimistic_json_iso_dttm_ser(obj: Any) -> Any:
     """Proxy to call json_iso_dttm_ser in a pessimistic way
 
     If one of object is not serializable to json, it will still succeed"""
     return json_iso_dttm_ser(obj, pessimistic=True)
 
 
-def json_int_dttm_ser(obj: Any) -> float:
-    """json serializer that deals with dates"""
-    val = base_json_conv(obj)
-    if val is not None:
-        return val
+def json_int_dttm_ser(obj: Any) -> Any:
+    """
+    A JSON serializer that deals with dates by serializing them to EPOCH.
+
+        >>> json.dumps({'dttm': datetime(1970, 1, 1)}, default=json_int_dttm_ser)
+        '{"dttm": 0}'
+
+    :param obj: The serializable object
+    :returns: The JSON compatible form
+    :raises TypeError: If the object cannot be serialized
+    """
+
     if isinstance(obj, (datetime, pd.Timestamp)):
-        obj = datetime_to_epoch(obj)
-    elif isinstance(obj, date):
-        obj = (obj - EPOCH.date()).total_seconds() * 1000
-    else:
-        raise TypeError("Unserializable object {} of type {}".format(obj, type(obj)))
-    return obj
+        return datetime_to_epoch(obj)

Review Comment:
   See above.



##########
tests/integration_tests/utils_tests.py:
##########
@@ -105,19 +111,26 @@ def test_json_iso_dttm_ser(self):
         assert json_iso_dttm_ser(dttm) == dttm.isoformat()
         assert json_iso_dttm_ser(dt) == dt.isoformat()
         assert json_iso_dttm_ser(t) == t.isoformat()
+        assert json_int_dttm_ser(np.int64(1)) == 1
 
         with self.assertRaises(TypeError):
-            json_iso_dttm_ser("this is not a date")
+            json_iso_dttm_ser(np.datetime64())
 
     def test_base_json_conv(self):
-        assert isinstance(base_json_conv(np.bool_(1)), bool) is True
-        assert isinstance(base_json_conv(np.int64(1)), int) is True
-        assert isinstance(base_json_conv(np.array([1, 2, 3])), list) is True
-        assert isinstance(base_json_conv(set([1])), list) is True
-        assert isinstance(base_json_conv(Decimal("1.0")), float) is True
-        assert isinstance(base_json_conv(uuid.uuid4()), str) is True
-        assert isinstance(base_json_conv(time()), str) is True
-        assert isinstance(base_json_conv(timedelta(0)), str) is True
+        assert isinstance(base_json_conv(np.bool_(1)), bool)
+        assert isinstance(base_json_conv(np.int64(1)), int)
+        assert isinstance(base_json_conv(np.array([1, 2, 3])), list)
+        assert base_json_conv(np.array(None)) is None

Review Comment:
   This is a new test.



##########
tests/integration_tests/utils_tests.py:
##########
@@ -94,9 +94,15 @@ def test_json_int_dttm_ser(self):
         assert json_int_dttm_ser(datetime(1970, 1, 1)) == 0
         assert json_int_dttm_ser(date(1970, 1, 1)) == 0
         assert json_int_dttm_ser(dttm + timedelta(milliseconds=1)) == (ts + 1)
+        assert json_int_dttm_ser(np.int64(1)) == 1

Review Comment:
   The non-temporal case was never tested.



##########
tests/integration_tests/utils_tests.py:
##########
@@ -105,19 +111,26 @@ def test_json_iso_dttm_ser(self):
         assert json_iso_dttm_ser(dttm) == dttm.isoformat()
         assert json_iso_dttm_ser(dt) == dt.isoformat()
         assert json_iso_dttm_ser(t) == t.isoformat()
+        assert json_int_dttm_ser(np.int64(1)) == 1
 
         with self.assertRaises(TypeError):
-            json_iso_dttm_ser("this is not a date")
+            json_iso_dttm_ser(np.datetime64())
 
     def test_base_json_conv(self):
-        assert isinstance(base_json_conv(np.bool_(1)), bool) is True
-        assert isinstance(base_json_conv(np.int64(1)), int) is True
-        assert isinstance(base_json_conv(np.array([1, 2, 3])), list) is True
-        assert isinstance(base_json_conv(set([1])), list) is True
-        assert isinstance(base_json_conv(Decimal("1.0")), float) is True
-        assert isinstance(base_json_conv(uuid.uuid4()), str) is True
-        assert isinstance(base_json_conv(time()), str) is True
-        assert isinstance(base_json_conv(timedelta(0)), str) is True
+        assert isinstance(base_json_conv(np.bool_(1)), bool)
+        assert isinstance(base_json_conv(np.int64(1)), int)
+        assert isinstance(base_json_conv(np.array([1, 2, 3])), list)
+        assert base_json_conv(np.array(None)) is None
+        assert isinstance(base_json_conv(set([1])), list)
+        assert isinstance(base_json_conv(Decimal("1.0")), float)
+        assert isinstance(base_json_conv(uuid.uuid4()), str)
+        assert isinstance(base_json_conv(time()), str)
+        assert isinstance(base_json_conv(timedelta(0)), str)
+        assert isinstance(base_json_conv(bytes()), str)
+        assert base_json_conv(bytes("", encoding="utf-16")) == ["bytes"]
+
+        with pytest.raises(TypeError):

Review Comment:
   This is a new test.



##########
superset/utils/core.py:
##########
@@ -581,47 +588,60 @@ def base_json_conv(  # pylint: disable=inconsistent-return-statements
         except Exception:  # pylint: disable=broad-except
             return "[bytes]"
 
+    raise TypeError(f"Unserializable object {obj} of type {type(obj)}")
+
 
-def json_iso_dttm_ser(obj: Any, pessimistic: bool = False) -> str:
+def json_iso_dttm_ser(obj: Any, pessimistic: bool = False) -> Any:
     """
-    json serializer that deals with dates
+    A JSON serializer that deals with dates by serializing them to ISO 8601.
 
-    >>> dttm = datetime(1970, 1, 1)
-    >>> json.dumps({'dttm': dttm}, default=json_iso_dttm_ser)
-    '{"dttm": "1970-01-01T00:00:00"}'
+        >>> json.dumps({'dttm': datetime(1970, 1, 1)}, default=json_iso_dttm_ser)
+        '{"dttm": "1970-01-01T00:00:00"}'
+
+    :param obj: The serializable object
+    :param pessimistic: Whether to be pessimistic regarding serialization
+    :returns: The JSON compatible form
+    :raises TypeError: If the non-pessimistic object cannot be serialized
     """
-    val = base_json_conv(obj)
-    if val is not None:
-        return val
+
     if isinstance(obj, (datetime, date, pd.Timestamp)):
-        obj = obj.isoformat()
-    else:
+        return obj.isoformat()
+
+    try:

Review Comment:
   The logic has been flipped. Rather than trying the base conversion—which now throws—first and then the more specific temporal types, the logic now tries to convert the more specific and then falls back to the more generic. This ensures there's less error handling.



##########
superset/utils/core.py:
##########
@@ -581,47 +588,60 @@ def base_json_conv(  # pylint: disable=inconsistent-return-statements
         except Exception:  # pylint: disable=broad-except
             return "[bytes]"
 
+    raise TypeError(f"Unserializable object {obj} of type {type(obj)}")
+
 
-def json_iso_dttm_ser(obj: Any, pessimistic: bool = False) -> str:
+def json_iso_dttm_ser(obj: Any, pessimistic: bool = False) -> Any:

Review Comment:
   The return type was wrong. Per the `base_json_conv` method there's a slew of viable return types.



##########
superset/utils/core.py:
##########
@@ -581,47 +588,60 @@ def base_json_conv(  # pylint: disable=inconsistent-return-statements
         except Exception:  # pylint: disable=broad-except
             return "[bytes]"
 
+    raise TypeError(f"Unserializable object {obj} of type {type(obj)}")
+
 
-def json_iso_dttm_ser(obj: Any, pessimistic: bool = False) -> str:
+def json_iso_dttm_ser(obj: Any, pessimistic: bool = False) -> Any:
     """
-    json serializer that deals with dates
+    A JSON serializer that deals with dates by serializing them to ISO 8601.
 
-    >>> dttm = datetime(1970, 1, 1)
-    >>> json.dumps({'dttm': dttm}, default=json_iso_dttm_ser)
-    '{"dttm": "1970-01-01T00:00:00"}'
+        >>> json.dumps({'dttm': datetime(1970, 1, 1)}, default=json_iso_dttm_ser)
+        '{"dttm": "1970-01-01T00:00:00"}'
+
+    :param obj: The serializable object
+    :param pessimistic: Whether to be pessimistic regarding serialization
+    :returns: The JSON compatible form
+    :raises TypeError: If the non-pessimistic object cannot be serialized
     """
-    val = base_json_conv(obj)
-    if val is not None:
-        return val
+
     if isinstance(obj, (datetime, date, pd.Timestamp)):
-        obj = obj.isoformat()
-    else:
+        return obj.isoformat()
+
+    try:
+        return base_json_conv(obj)
+    except TypeError as ex:
         if pessimistic:
-            return "Unserializable [{}]".format(type(obj))
+            return f"Unserializable [{type(obj)}]"
 
-        raise TypeError("Unserializable object {} of type {}".format(obj, type(obj)))
-    return obj
+        raise ex
 
 
-def pessimistic_json_iso_dttm_ser(obj: Any) -> str:
+def pessimistic_json_iso_dttm_ser(obj: Any) -> Any:

Review Comment:
   The return type was wrong. Per the `base_json_conv` method there's a slew of viable return types.



##########
superset/utils/core.py:
##########
@@ -581,47 +588,60 @@ def base_json_conv(  # pylint: disable=inconsistent-return-statements
         except Exception:  # pylint: disable=broad-except
             return "[bytes]"
 
+    raise TypeError(f"Unserializable object {obj} of type {type(obj)}")

Review Comment:
   Previously this would return `None` however there was no way to distinguish between the conversion of `np.array(None)` which returns `None` and an unserializable object.



##########
tests/integration_tests/utils_tests.py:
##########
@@ -105,19 +111,26 @@ def test_json_iso_dttm_ser(self):
         assert json_iso_dttm_ser(dttm) == dttm.isoformat()
         assert json_iso_dttm_ser(dt) == dt.isoformat()
         assert json_iso_dttm_ser(t) == t.isoformat()
+        assert json_int_dttm_ser(np.int64(1)) == 1

Review Comment:
   The non-temporal case was never tested.



##########
tests/integration_tests/utils_tests.py:
##########
@@ -105,19 +111,26 @@ def test_json_iso_dttm_ser(self):
         assert json_iso_dttm_ser(dttm) == dttm.isoformat()
         assert json_iso_dttm_ser(dt) == dt.isoformat()
         assert json_iso_dttm_ser(t) == t.isoformat()
+        assert json_int_dttm_ser(np.int64(1)) == 1
 
         with self.assertRaises(TypeError):
-            json_iso_dttm_ser("this is not a date")
+            json_iso_dttm_ser(np.datetime64())
 
     def test_base_json_conv(self):
-        assert isinstance(base_json_conv(np.bool_(1)), bool) is True
-        assert isinstance(base_json_conv(np.int64(1)), int) is True
-        assert isinstance(base_json_conv(np.array([1, 2, 3])), list) is True
-        assert isinstance(base_json_conv(set([1])), list) is True
-        assert isinstance(base_json_conv(Decimal("1.0")), float) is True
-        assert isinstance(base_json_conv(uuid.uuid4()), str) is True
-        assert isinstance(base_json_conv(time()), str) is True
-        assert isinstance(base_json_conv(timedelta(0)), str) is True
+        assert isinstance(base_json_conv(np.bool_(1)), bool)

Review Comment:
   The `is True` is unnecessary.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: notifications-unsubscribe@superset.apache.org
For additional commands, e-mail: notifications-help@superset.apache.org