You are viewing a plain text version of this content. The canonical link for it is here.
Posted to github@beam.apache.org by GitBox <gi...@apache.org> on 2021/12/16 00:27:03 UTC

[GitHub] [beam] TheNeuralBit commented on a change in pull request #15410: [BEAM-10277] Encoding position initial implementation

TheNeuralBit commented on a change in pull request #15410:
URL: https://github.com/apache/beam/pull/15410#discussion_r770135017



##########
File path: sdks/python/apache_beam/coders/row_coder.py
##########
@@ -183,14 +198,14 @@ def encode_to_stream(self, value, out, nested):
 
     self.NULL_MARKER_CODER.encode_to_stream(words.tobytes(), out, True)
 
-    for c, field, attr in zip(self.components, self.schema.fields, attrs):
-      if attr is None:
-        if not field.type.nullable:
+    for i in np.argsort(self.encoding_positions):

Review comment:
       Could you pre-compute np.argsort and store it (e.g. on `self.encoding_positions_argsort`) in `__init__` so we don't have to repeat it for each element? We could use the pre-computed argsort result here and in `decode_from_stream`

##########
File path: model/fn-execution/src/main/resources/org/apache/beam/model/fnexecution/v1/standard_coders.yaml
##########
@@ -409,6 +409,39 @@ examples:
   "\x01\x00\x00\x00\x00\x02\x03foo\x01\xa9F\x03bar\x01\xff\xff\xff\xff\xff\xff\xff\xff\x7f": {f_map: {"foo": 9001, "bar": 9223372036854775807}}
   "\x01\x00\x00\x00\x00\x04\neverything\x00\x02is\x00\x05null!\x00\r\xc2\xaf\\_(\xe3\x83\x84)_/\xc2\xaf\x00": {f_map: {"everything": null, "is": null, "null!": null, "¯\\_(ツ)_/¯": null}}
 
+---
+# Binary data generated with the python SDK:
+# schema1 = schema_pb2.Schema(
+# id="30ea5a25-dcd8-4cdb-abeb-5332d15ab4b9",
+# fields=[
+#     schema_pb2.Field(
+#         name="str",
+#         type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING),
+#         encoding_position=2),
+#     schema_pb2.Field(
+#         name="f_bool",
+#         type=schema_pb2.FieldType(atomic_type=schema_pb2.BOOLEAN),
+#         encoding_position=3),
+#     schema_pb2.Field(
+#         name="i32",
+#         type=schema_pb2.FieldType(
+#             atomic_type=schema_pb2.INT32, nullable=True),
+#         encoding_position=1)
+#   ],

Review comment:
       These encoding positions are 0-indexed now right?

##########
File path: sdks/python/apache_beam/coders/row_coder_test.py
##########
@@ -282,6 +284,61 @@ def test_row_coder_nested_struct(self):
 
     self.assertEqual(value, coder.decode(coder.encode(value)))
 
+  def test_encoding_position_reorder_fields(self):
+    fields = [("field1", str), ("field2", int), ("field3", int)]
+
+    expected = typing.NamedTuple('expected', fields)
+    reorder = schema_pb2.Schema(
+        id="new_order",
+        fields=[
+            schema_pb2.Field(
+                name="field3",
+                type=schema_pb2.FieldType(atomic_type=schema_pb2.STRING),
+                encoding_position=2),
+            schema_pb2.Field(
+                name="field2",
+                type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32),
+                encoding_position=1),
+            schema_pb2.Field(
+                name="field1",
+                type=schema_pb2.FieldType(atomic_type=schema_pb2.INT32),
+                encoding_position=0)
+        ])
+
+    old_coder = RowCoder.from_type_hint(expected, None)
+    new_coder = RowCoder(reorder)
+
+    encode_expected = old_coder.encode(expected("foo", 7, 12))
+    encode_reorder = new_coder.encode(expected(12, 7, "foo"))
+    self.assertEqual(encode_expected, encode_reorder)
+
+  def test_encoding_position_add_fields(self):
+    fields = [("field1", str), ("field2", str)]
+
+    Old = typing.NamedTuple("Old", fields[:-1])
+    New = typing.NamedTuple("New", fields)
+
+    old_coder = RowCoder.from_type_hint(Old, None)
+    new_coder = RowCoder.from_type_hint(New, None)
+
+    self.assertEqual(
+        New("bar", None), new_coder.decode(old_coder.encode(Old("bar"))))
+
+  def test_encoding_position_add_fields_and_reorder(self):
+    fields = [("field1", typing.Optional[str]), ("field2", str),
+              ("field3", typing.Optional[str])]
+
+    Old = typing.NamedTuple("Old", fields[:-1])
+    New = typing.NamedTuple("New", fields[::1])

Review comment:
       nit: I think this is the same, no?
   
   ```suggestion
       New = typing.NamedTuple("New", fields[:])
   ```




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscribe@beam.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org