You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by fo...@apache.org on 2022/11/16 20:48:52 UTC

[iceberg] branch master updated: Python: Fix rough edges around literals (#6197)

This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git


The following commit(s) were added to refs/heads/master by this push:
     new cae14e3d0b Python: Fix rough edges around literals (#6197)
cae14e3d0b is described below

commit cae14e3d0baa464d242c26bc5494ac2f0603f215
Author: Fokko Driesprong <fo...@apache.org>
AuthorDate: Wed Nov 16 21:48:45 2022 +0100

    Python: Fix rough edges around literals (#6197)
    
    * Python: Fix rough edges around literals
    
    * timezone -> zone offset
---
 python/pyiceberg/expressions/literals.py  | 20 +++++-----
 python/pyiceberg/utils/datetime.py        |  6 +++
 python/tests/expressions/test_literals.py | 66 ++++++++++++++++++++++++++++++-
 3 files changed, 80 insertions(+), 12 deletions(-)

diff --git a/python/pyiceberg/expressions/literals.py b/python/pyiceberg/expressions/literals.py
index 81d92a4448..a84cd9ae39 100644
--- a/python/pyiceberg/expressions/literals.py
+++ b/python/pyiceberg/expressions/literals.py
@@ -31,7 +31,6 @@ from typing import (
     Generic,
     Type,
     TypeVar,
-    Union,
 )
 from uuid import UUID
 
@@ -344,7 +343,7 @@ class DoubleLiteral(Literal[float]):
         return self
 
     @to.register(FloatType)
-    def _(self, _: FloatType) -> Union[FloatAboveMax, FloatBelowMin, FloatLiteral]:
+    def _(self, _: FloatType) -> Literal[float]:
         if FloatType.max < self.value:
             return FloatAboveMax()
         elif FloatType.min > self.value:
@@ -427,7 +426,7 @@ class StringLiteral(Literal[str]):
         return self
 
     @to.register(IntegerType)
-    def _(self, type_var: IntegerType) -> Union[IntAboveMax, IntBelowMin, LongLiteral]:
+    def _(self, type_var: IntegerType) -> Literal[int]:
         try:
             number = int(float(self.value))
 
@@ -461,11 +460,8 @@ class StringLiteral(Literal[str]):
             raise ValueError(f"Could not convert {self.value} into a {type_var}") from e
 
     @to.register(TimestampType)
-    def _(self, type_var: TimestampType) -> Literal[int]:
-        try:
-            return TimestampLiteral(timestamp_to_micros(self.value))
-        except (TypeError, ValueError) as e:
-            raise ValueError(f"Could not convert {self.value} into a {type_var}") from e
+    def _(self, _: TimestampType) -> Literal[int]:
+        return TimestampLiteral(timestamp_to_micros(self.value))
 
     @to.register(TimestamptzType)
     def _(self, _: TimestamptzType) -> Literal[int]:
@@ -481,7 +477,9 @@ class StringLiteral(Literal[str]):
         if type_var.scale == abs(dec.as_tuple().exponent):
             return DecimalLiteral(dec)
         else:
-            raise ValueError(f"Could not convert {self.value} into a {type_var}")
+            raise ValueError(
+                f"Could not convert {self.value} into a {type_var}, scales differ {type_var.scale} <> {abs(dec.as_tuple().exponent)}"
+            )
 
 
 class UUIDLiteral(Literal[UUID]):
@@ -510,7 +508,9 @@ class FixedLiteral(Literal[bytes]):
         if len(self.value) == len(type_var):
             return self
         else:
-            raise ValueError(f"Could not convert {self.value!r} into a {type_var}")
+            raise ValueError(
+                f"Could not convert {self.value!r} into a {type_var}, lengths differ {len(self.value)} <> {len(type_var)}"
+            )
 
     @to.register(BinaryType)
     def _(self, _: BinaryType) -> Literal[bytes]:
diff --git a/python/pyiceberg/utils/datetime.py b/python/pyiceberg/utils/datetime.py
index 31a8e6193d..7353ebd4ba 100644
--- a/python/pyiceberg/utils/datetime.py
+++ b/python/pyiceberg/utils/datetime.py
@@ -81,6 +81,9 @@ def timestamp_to_micros(timestamp_str: str) -> int:
     """Converts an ISO-9601 formatted timestamp without zone to microseconds from 1970-01-01T00:00:00.000000"""
     if ISO_TIMESTAMP.fullmatch(timestamp_str):
         return datetime_to_micros(datetime.fromisoformat(timestamp_str))
+    if ISO_TIMESTAMPTZ.fullmatch(timestamp_str):
+        # When we can match a timestamp without a zone, we can give a more specific error
+        raise ValueError(f"Zone offset provided, but not expected: {timestamp_str}")
     raise ValueError(f"Invalid timestamp without zone: {timestamp_str} (must be ISO-8601)")
 
 
@@ -88,6 +91,9 @@ def timestamptz_to_micros(timestamptz_str: str) -> int:
     """Converts an ISO-8601 formatted timestamp with zone to microseconds from 1970-01-01T00:00:00.000000+00:00"""
     if ISO_TIMESTAMPTZ.fullmatch(timestamptz_str):
         return datetime_to_micros(datetime.fromisoformat(timestamptz_str))
+    if ISO_TIMESTAMP.fullmatch(timestamptz_str):
+        # When we can match a timestamp without a zone, we can give a more specific error
+        raise ValueError(f"Missing zone offset: {timestamptz_str} (must be ISO-8601)")
     raise ValueError(f"Invalid timestamp with zone: {timestamptz_str} (must be ISO-8601)")
 
 
diff --git a/python/tests/expressions/test_literals.py b/python/tests/expressions/test_literals.py
index dee19359e6..96656ba825 100644
--- a/python/tests/expressions/test_literals.py
+++ b/python/tests/expressions/test_literals.py
@@ -322,14 +322,28 @@ def test_timestamp_with_zone_without_zone_in_literal():
     timestamp_str = literal("2017-08-18T14:21:01.919234")
     with pytest.raises(ValueError) as e:
         _ = timestamp_str.to(timestamp_str.to(TimestamptzType()))
-    assert "Invalid timestamp with zone: 2017-08-18T14:21:01.919234 (must be ISO-8601)" in str(e.value)
+    assert "Missing zone offset: 2017-08-18T14:21:01.919234 (must be ISO-8601)" in str(e.value)
+
+
+def test_invalid_timestamp_in_literal():
+    timestamp_str = literal("abc")
+    with pytest.raises(ValueError) as e:
+        _ = timestamp_str.to(timestamp_str.to(TimestamptzType()))
+    assert "Invalid timestamp with zone: abc (must be ISO-8601)" in str(e.value)
 
 
 def test_timestamp_without_zone_with_zone_in_literal():
     timestamp_str = literal("2017-08-18T14:21:01.919234+07:00")
     with pytest.raises(ValueError) as e:
         _ = timestamp_str.to(TimestampType())
-    assert "Could not convert 2017-08-18T14:21:01.919234+07:00 into a timestamp" in str(e.value)
+    assert "Zone offset provided, but not expected: 2017-08-18T14:21:01.919234+07:00" in str(e.value)
+
+
+def test_invalid_timestamp_with_zone_in_literal():
+    timestamp_str = literal("abc")
+    with pytest.raises(ValueError) as e:
+        _ = timestamp_str.to(TimestampType())
+    assert "Invalid timestamp without zone: abc (must be ISO-8601)" in str(e.value)
 
 
 def test_string_to_uuid_literal():
@@ -742,3 +756,51 @@ def assert_invalid_conversions(lit, types=None):
     for type_var in types:
         with pytest.raises(TypeError):
             _ = lit.to(type_var)
+
+
+def test_compare_floats():
+    lhs = literal(18.15).to(FloatType())
+    rhs = literal(19.25).to(FloatType())
+    assert lhs != rhs
+    assert lhs < rhs
+    assert lhs <= rhs
+    assert not lhs > rhs
+    assert not lhs >= rhs
+
+
+def test_string_to_int_max_value():
+    assert isinstance(literal(str(IntegerType.max + 1)).to(IntegerType()), IntAboveMax)
+
+
+def test_string_to_int_min_value():
+    assert isinstance(literal(str(IntegerType.min - 1)).to(IntegerType()), IntBelowMin)
+
+
+def test_string_to_integer_type_invalid_value():
+    with pytest.raises(ValueError) as e:
+        _ = literal("abc").to(IntegerType())
+    assert "Could not convert abc into a int" in str(e.value)
+
+
+def test_string_to_long_type_invalid_value():
+    with pytest.raises(ValueError) as e:
+        _ = literal("abc").to(LongType())
+    assert "Could not convert abc into a long" in str(e.value)
+
+
+def test_string_to_date_type_invalid_value():
+    with pytest.raises(ValueError) as e:
+        _ = literal("abc").to(DateType())
+    assert "Could not convert abc into a date" in str(e.value)
+
+
+def test_string_to_time_type_invalid_value():
+    with pytest.raises(ValueError) as e:
+        _ = literal("abc").to(TimeType())
+    assert "Could not convert abc into a time" in str(e.value)
+
+
+def test_string_to_decimal_type_invalid_value():
+    with pytest.raises(ValueError) as e:
+        _ = literal("18.15").to(DecimalType(10, 0))
+    assert "Could not convert 18.15 into a decimal(10, 0), scales differ 0 <> 2" in str(e.value)