You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@sedona.apache.org by GitBox <gi...@apache.org> on 2023/01/08 01:17:23 UTC

[GitHub] [sedona] douglasdennis commented on a diff in pull request #745: [SEDONA-227] Python Serde Refactor

douglasdennis commented on code in PR #745:
URL: https://github.com/apache/sedona/pull/745#discussion_r1064067248


##########
python/sedona/utils/geometry_serde.py:
##########
@@ -217,70 +233,97 @@ def create_buffer_for_geom(geom_type: int, coord_type: int, size: int, num_coord
     struct.pack_into('i', buffer, 4, num_coords)
     return buffer
 
+def generate_header_bytes(geom_type: int, coord_type: int, num_coords: int) -> bytes:
+    preamble_byte = (geom_type << 4) | (coord_type << 1)
+    return struct.pack(
+        'BBBBi',
+        preamble_byte,
+        0,
+        0,
+        0,
+        num_coords
+    )
+
 
-def put_coordinates(buffer: bytearray, offset: int, coord_type: int, coords: CoordinateSequence):
+def put_coordinates(buffer: bytearray, offset: int, coord_type: int, coords: ListCoordType) -> int:
     for coord in coords:
-        offset = put_coordinate(buffer, offset, coord_type, coord)
+        struct.pack_into(CoordinateType.unpack_format(coord_type, buffer, offset, *coord))
+        offset += CoordinateType.bytes_per_coord(coord_type)
     return offset
 
 
-def put_coordinate(buffer: bytearray, offset: int, coord_type: int, coord: tuple):
-    x = coord[0]
-    y = coord[1]
-    z = coord[2] if len(coord) > 2 else math.nan
-    if coord_type == CoordinateType.XY:
-        struct.pack_into('d', buffer, offset, x)
-        struct.pack_into('d', buffer, offset + 8, y)
-        offset += 16
-    elif coord_type == CoordinateType.XYZ:
-        struct.pack_into('d', buffer, offset, x)
-        struct.pack_into('d', buffer, offset + 8, y)
-        struct.pack_into('d', buffer, offset + 16, z)
-        offset += 24
-    else:
-        # Shapely does not support M dimension for now
-        raise ValueError("Invalid coordinate type: {}".format(coord_type))
+def put_coordinate(buffer: bytearray, offset: int, coord_type: int, coord: CoordType) -> int:
+    struct.pack_into(CoordinateType.unpack_format(coord_type, buffer, offset, *coord))
+    offset += CoordinateType.bytes_per_coord(coord_type)
     return offset
 
 
-def get_coordinates(buffer: bytearray, offset: int, coord_type: int, num_coords: int) -> List[tuple]:
-    coords = []
-    bytes_per_coord = CoordinateType.bytes_per_coord(coord_type)
-    for i in range(num_coords):
-        coord = get_coordinate(buffer, offset, coord_type)
-        coords.append(coord)
-        offset += bytes_per_coord
+def get_coordinates(buffer: bytearray, offset: int, coord_type: int, num_coords: int) -> Union[np.ndarray, ListCoordType]:
+    if coord_type == CoordinateType.XYM or coord_type == CoordinateType.XYZM:
+        raise NotImplementedError("XYM or XYZM coordinates are not supported")
+
+    if num_coords < 50:
+        coords = [
+            struct.unpack_from(CoordinateType.unpack_format(coord_type), buffer, offset + (i * CoordinateType.bytes_per_coord(coord_type)))
+            for i in range(num_coords)
+        ]
+    else:
+        nums_per_coord = CoordinateType.components_per_coord(coord_type)
+        coords = np.frombuffer(buffer, np.float64, num_coords * nums_per_coord, offset).reshape((num_coords, nums_per_coord))
+
     return coords
 
 
-def get_coordinate(buffer: bytearray, offset: int, coord_type: int) -> tuple:
-    x = struct.unpack_from('d', buffer, offset)[0]
-    y = struct.unpack_from('d', buffer, offset + 8)[0]
-    # Shapely does not support M dimension for now, so we'll simply ignore them
-    if coord_type == CoordinateType.XY or coord_type == CoordinateType.XYM:
-        return x, y
-    elif coord_type == CoordinateType.XYZ or coord_type == CoordinateType.XYZM:
-        z = struct.unpack_from('d', buffer, offset + 16)[0]
-        return x, y, z
-    else:
-        raise NotImplementedError("XYM or XYZM coordinates were not supported")
+def get_coordinate(buffer: bytearray, offset: int, coord_type: int) -> CoordType:
+    # Shapely does not support M dimension for now, so raise if it was passed
+    return struct.unpack_from(CoordinateType.unpack_format(coord_type), buffer, offset)
 
 
-def aligned_offset(offset):
+def aligned_offset(offset: int) -> int:
     return (offset + 7) & ~7
 
 
-def serialize_point(geom: Point) -> bytearray:
-    coords = geom.coords
-    if not coords:
-        return create_buffer_for_geom(GeometryTypeID.POINT, CoordinateType.XY, 8, 0)
-    coord_type = CoordinateType.type_of(coords, geom.has_z)
-    bytes_per_coord = CoordinateType.bytes_per_coord(coord_type)
-    size = 8 + bytes_per_coord
-    buffer = create_buffer_for_geom(GeometryTypeID.POINT, coord_type, size, 1)
-    put_coordinates(buffer, 8, coord_type, coords)
-    return buffer
-
+def serialize_point(geom: Point) -> bytes:
+    coords = [tuple(c) for c in geom.coords]

Review Comment:
   You're right :) Changes made.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: dev-unsubscribe@sedona.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org