You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@iceberg.apache.org by bl...@apache.org on 2022/06/05 18:15:20 UTC
[iceberg] branch master updated: Python: Use struct constant to improve pack/unpack performance (#4929)
This is an automated email from the ASF dual-hosted git repository.
blue pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/master by this push:
new 75fb871ec Python: Use struct constant to improve pack/unpack performance (#4929)
75fb871ec is described below
commit 75fb871ec0d8bb092d41746d4611e10106a15e75
Author: chulucninh09 <43...@users.noreply.github.com>
AuthorDate: Mon Jun 6 01:15:15 2022 +0700
Python: Use struct constant to improve pack/unpack performance (#4929)
---
python/src/iceberg/conversions.py | 35 +++++++++++++++++++++--------------
1 file changed, 21 insertions(+), 14 deletions(-)
diff --git a/python/src/iceberg/conversions.py b/python/src/iceberg/conversions.py
index f5bf709d2..d302f4e19 100644
--- a/python/src/iceberg/conversions.py
+++ b/python/src/iceberg/conversions.py
@@ -27,10 +27,10 @@ Note:
implementation, a concrete function is registered for each generic conversion function. For PrimitiveType
implementations that share the same conversion logic, registrations can be stacked.
"""
-import struct
import uuid
from decimal import Decimal
from functools import singledispatch
+from struct import Struct
from typing import Union
from iceberg.types import (
@@ -52,6 +52,13 @@ from iceberg.types import (
)
from iceberg.utils.decimal import decimal_to_bytes, unscaled_to_decimal
+_BOOL_STRUCT = Struct("<?")
+_INT_STRUCT = Struct("<i")
+_LONG_STRUCT = Struct("<q")
+_FLOAT_STRUCT = Struct("<f")
+_DOUBLE_STRUCT = Struct("<d")
+_UUID_STRUCT = Struct(">QQ")
+
def handle_none(func):
"""A decorator function to handle cases where partition values are `None` or "__HIVE_DEFAULT_PARTITION__"
@@ -154,13 +161,13 @@ def to_bytes(primitive_type: PrimitiveType, value: Union[bool, bytes, Decimal, f
@to_bytes.register(BooleanType)
def _(primitive_type, value: bool) -> bytes:
- return struct.pack("<?", 1 if value else 0)
+ return _BOOL_STRUCT.pack(1 if value else 0)
@to_bytes.register(IntegerType)
@to_bytes.register(DateType)
def _(primitive_type, value: int) -> bytes:
- return struct.pack("<i", value)
+ return _INT_STRUCT.pack(value)
@to_bytes.register(LongType)
@@ -168,7 +175,7 @@ def _(primitive_type, value: int) -> bytes:
@to_bytes.register(TimestampType)
@to_bytes.register(TimestamptzType)
def _(primitive_type, value: int) -> bytes:
- return struct.pack("<q", value)
+ return _LONG_STRUCT.pack(value)
@to_bytes.register(FloatType)
@@ -177,12 +184,12 @@ def _(primitive_type, value: float) -> bytes:
Note: float in python is implemented using a double in C. Therefore this involves a conversion of a 32-bit (single precision)
float to a 64-bit (double precision) float which introduces some imprecision.
"""
- return struct.pack("<f", value)
+ return _FLOAT_STRUCT.pack(value)
@to_bytes.register(DoubleType)
def _(primitive_type, value: float) -> bytes:
- return struct.pack("<d", value)
+ return _DOUBLE_STRUCT.pack(value)
@to_bytes.register(StringType)
@@ -192,7 +199,7 @@ def _(primitive_type, value: str) -> bytes:
@to_bytes.register(UUIDType)
def _(primitive_type, value: uuid.UUID) -> bytes:
- return struct.pack(">QQ", (value.int >> 64) & 0xFFFFFFFFFFFFFFFF, value.int & 0xFFFFFFFFFFFFFFFF)
+ return _UUID_STRUCT.pack((value.int >> 64) & 0xFFFFFFFFFFFFFFFF, value.int & 0xFFFFFFFFFFFFFFFF)
@to_bytes.register(BinaryType)
@@ -241,13 +248,13 @@ def from_bytes(primitive_type: PrimitiveType, b: bytes) -> Union[bool, bytes, De
@from_bytes.register(BooleanType)
def _(primitive_type, b: bytes) -> bool:
- return struct.unpack("<?", b)[0] != 0
+ return _BOOL_STRUCT.unpack(b)[0] != 0
@from_bytes.register(IntegerType)
@from_bytes.register(DateType)
def _(primitive_type, b: bytes) -> int:
- return struct.unpack("<i", b)[0]
+ return _INT_STRUCT.unpack(b)[0]
@from_bytes.register(LongType)
@@ -255,17 +262,17 @@ def _(primitive_type, b: bytes) -> int:
@from_bytes.register(TimestampType)
@from_bytes.register(TimestamptzType)
def _(primitive_type, b: bytes) -> int:
- return struct.unpack("<q", b)[0]
+ return _LONG_STRUCT.unpack(b)[0]
@from_bytes.register(FloatType)
-def _(primitive_type, b: bytes):
- return struct.unpack("<f", b)[0]
+def _(primitive_type, b: bytes) -> float:
+ return _FLOAT_STRUCT.unpack(b)[0]
@from_bytes.register(DoubleType)
def _(primitive_type, b: bytes) -> float:
- return struct.unpack("<d", b)[0]
+ return _DOUBLE_STRUCT.unpack(b)[0]
@from_bytes.register(StringType)
@@ -275,7 +282,7 @@ def _(primitive_type: PrimitiveType, b: bytes) -> str:
@from_bytes.register(UUIDType)
def _(primitive_type, b: bytes) -> uuid.UUID:
- unpacked_bytes = struct.unpack(">QQ", b)
+ unpacked_bytes = _UUID_STRUCT.unpack(b)
return uuid.UUID(int=unpacked_bytes[0] << 64 | unpacked_bytes[1])