You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by fo...@apache.org on 2020/05/09 07:11:30 UTC

[avro] branch master updated: Avro-2785: Update specs on how unions encoded (#847)

This is an automated email from the ASF dual-hosted git repository.

fokko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new 7ab212b  Avro-2785: Update specs on how unions encoded (#847)
7ab212b is described below

commit 7ab212b8e454c271797fc343204762ca1b925057
Author: Anh Le (Andy) <an...@vng.com.vn>
AuthorDate: Sat May 9 14:11:22 2020 +0700

    Avro-2785: Update specs on how unions encoded (#847)
    
    * AVRO-2785: Updated specs about how Unions are encoded
    
    * AVRO-2785: Updated related docs for Perl & Python
    
    * AVRO-2785: add side notes for enum_encoding
    
    Thank you @kojiromike
    
    * AVRO-2785: add side notes for union_encoding
    
    Thank you @kojiromike
    
    * AVRO-2785: update side notes for union_encoding
    
    * AVRO-2785: remove invalid side note on enum_encoding
---
 doc/src/content/xdocs/spec.xml      | 4 +++-
 lang/perl/lib/Avro/BinaryDecoder.pm | 2 +-
 lang/perl/lib/Avro/BinaryEncoder.pm | 2 +-
 lang/py/avro/io.py                  | 4 ++--
 lang/py3/avro/io.py                 | 4 ++--
 5 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/doc/src/content/xdocs/spec.xml b/doc/src/content/xdocs/spec.xml
index 947ae60..feb8e2b 100644
--- a/doc/src/content/xdocs/spec.xml
+++ b/doc/src/content/xdocs/spec.xml
@@ -546,7 +546,7 @@
 
           <section id="union_encoding">
             <title>Unions</title>
-            <p>A union is encoded by first writing a <code>long</code>
+            <p>A union is encoded by first writing an <code>int</code>
               value indicating the zero-based position within the
               union of the schema of its value.  The value is then
               encoded per the indicated schema within the union.</p>
@@ -560,6 +560,8 @@
                 followed by the serialized string:
                 <source>02 02 61</source></li>
             </ul>
+            <p><em>NOTE</em>: Currently for C/C++ implementtions, the positions are practically an int, but theoretically a long. 
+            In reality, we don't expect unions with 215M members </p>
           </section>
 
           <section id="fixed_encoding">
diff --git a/lang/perl/lib/Avro/BinaryDecoder.pm b/lang/perl/lib/Avro/BinaryDecoder.pm
index c5308f2..fa12fcf 100644
--- a/lang/perl/lib/Avro/BinaryDecoder.pm
+++ b/lang/perl/lib/Avro/BinaryDecoder.pm
@@ -328,7 +328,7 @@ sub skip_union {
     $class->skip($union_schema, $reader);
 }
 
-## 1.3.2 A union is encoded by first writing a long value indicating the
+## 1.3.2 A union is encoded by first writing an int value indicating the
 ## zero-based position within the union of the schema of its value. The value
 ## is then encoded per the indicated schema within the union.
 sub decode_union {
diff --git a/lang/perl/lib/Avro/BinaryEncoder.pm b/lang/perl/lib/Avro/BinaryEncoder.pm
index f47abd1..d476f4b 100644
--- a/lang/perl/lib/Avro/BinaryEncoder.pm
+++ b/lang/perl/lib/Avro/BinaryEncoder.pm
@@ -234,7 +234,7 @@ sub encode_map {
     $class->encode_long(undef, 0, $cb);
 }
 
-## 1.3.2 A union is encoded by first writing a long value indicating the
+## 1.3.2 A union is encoded by first writing an int value indicating the
 ## zero-based position within the union of the schema of its value. The value
 ## is then encoded per the indicated schema within the union.
 sub encode_union {
diff --git a/lang/py/avro/io.py b/lang/py/avro/io.py
index b910ba5..e165db3 100644
--- a/lang/py/avro/io.py
+++ b/lang/py/avro/io.py
@@ -846,7 +846,7 @@ class DatumReader(object):
 
     def read_union(self, writers_schema, readers_schema, decoder):
         """
-        A union is encoded by first writing a long value indicating
+        A union is encoded by first writing an int value indicating
         the zero-based position within the union of the schema of its value.
         The value is then encoded per the indicated schema within the union.
         """
@@ -1096,7 +1096,7 @@ class DatumWriter(object):
 
     def write_union(self, writers_schema, datum, encoder):
         """
-        A union is encoded by first writing a long value indicating
+        A union is encoded by first writing an int value indicating
         the zero-based position within the union of the schema of its value.
         The value is then encoded per the indicated schema within the union.
         """
diff --git a/lang/py3/avro/io.py b/lang/py3/avro/io.py
index 51f5a13..31623a9 100644
--- a/lang/py3/avro/io.py
+++ b/lang/py3/avro/io.py
@@ -636,7 +636,7 @@ class DatumReader(object):
 
   def read_union(self, writer_schema, reader_schema, decoder):
     """
-    A union is encoded by first writing a long value indicating
+    A union is encoded by first writing an int value indicating
     the zero-based position within the union of the schema of its value.
     The value is then encoded per the indicated schema within the union.
     """
@@ -866,7 +866,7 @@ class DatumWriter(object):
 
   def write_union(self, writer_schema, datum, encoder):
     """
-    A union is encoded by first writing a long value indicating
+    A union is encoded by first writing an int value indicating
     the zero-based position within the union of the schema of its value.
     The value is then encoded per the indicated schema within the union.
     """