You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by rs...@apache.org on 2021/12/22 17:16:16 UTC

[avro] branch master updated: AVRO-3257: IDL support for nullable types (#1411)

This is an automated email from the ASF dual-hosted git repository.

rskraba pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/avro.git


The following commit(s) were added to refs/heads/master by this push:
     new 7c5d8df  AVRO-3257: IDL support for nullable types (#1411)
7c5d8df is described below

commit 7c5d8df3fb282b1dffa6d11169756ed5593de838
Author: Oscar Westra van Holthe - Kind <op...@users.noreply.github.com>
AuthorDate: Wed Dec 22 18:16:08 2021 +0100

    AVRO-3257: IDL support for nullable types (#1411)
    
    * AVRO-3256: IDL type reference with annotation throws error
    
    Previous versions would alter the referenced type when encountering an
    annotation on (for example) a field type. This change makes references
    read-only.
    
    * AVRO-3256: Document new behavior of annotations
    
    Documented that references to named types cannot be annotated. Also
    described where annotations for named types should go.
    
    Lastly, the example has been fixed to match this change, and now also
     contains various types of documentation.
    
    * AVRO-3257: Add syntax for unions of null with a type
    
    Added Kotlin-style syntax for optional types. `MyType?` compiles to the
    same result as `union { null, MyType }`.
    
    This commit includes the documentation update.
    
    * AVRO-3257: Remove unused variable from JavaCC grammar
    
    * AVRO-3257: Fluid unions for optional types
    
    The syntax for optional types now put the null type in the unions based
    on the default value (if any).
    
    This commit includes the documentation update.
    
    * AVRO-3257: Add comments explaining features
    
    * Fix typo in HTML
    
    Co-authored-by: Ryan Skraba <ry...@skraba.com>
    
    Co-authored-by: Ryan Skraba <ry...@skraba.com>
---
 doc/src/content/xdocs/idl.xml                      | 23 ++++++-
 .../javacc/org/apache/avro/compiler/idl/idl.jj     | 79 +++++++++++++++++++---
 lang/java/compiler/src/test/idl/input/simple.avdl  | 13 +++-
 lang/java/compiler/src/test/idl/output/simple.avpr |  2 +-
 4 files changed, 101 insertions(+), 16 deletions(-)

diff --git a/doc/src/content/xdocs/idl.xml b/doc/src/content/xdocs/idl.xml
index e8c1b81..7606803 100644
--- a/doc/src/content/xdocs/idl.xml
+++ b/doc/src/content/xdocs/idl.xml
@@ -309,17 +309,36 @@ record Card {
         <section id="unions">
           <title>Unions</title>
           <p>Union types are denoted as <code>union { typeA, typeB, typeC, ... }</code>. For example,
-          this record contains a string field that is optional (unioned with <code>null</code>):
+          this record contains a string field that is optional (unioned with <code>null</code>), and
+          a field containing either a precise or a imprecise number:
           </p>
           <source>
 record RecordWithUnion {
   union { null, string } optionalString;
+  union { decimal(12, 6), float } number;
 }
           </source>
           <p>
             Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the
-            JSON format; namely, a record may not contain multiple elements of the same type.
+            JSON format; namely, a record may not contain multiple elements of the same type. Also,
+            fields/parameters that use the union type and have a default parameter must specify a
+            default value of the same type as the <em>first</em> union type.
           </p>
+          <p>Because it occurs so often, there is a special shorthand to denote a union of
+            <code>null</code> with another type. In the following snippet, the first three fields have
+            identical types:
+          </p>
+          <source>
+record RecordWithUnion {
+  union { null, string } optionalString1 = null;
+  string? optionalString2 = null;
+  string? optionalString3; // No default value
+  string? optionalString4 = "something";
+}
+          </source>
+          <p>Note that unlike explicit unions, the position of the <code>null</code> type is fluid; it will be
+            the first or last type depending on the default value (if any). So in the example above, all fields
+            are valid.</p>
         </section> <!-- unions -->
       </section> <!-- complex types -->
     </section> <!-- how to define records -->
diff --git a/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj b/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj
index 32fc2fa..d62faff 100644
--- a/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj
+++ b/lang/java/compiler/src/main/javacc/org/apache/avro/compiler/idl/idl.jj
@@ -91,6 +91,7 @@ import org.apache.commons.lang3.StringEscapeUtils;
  */
 public class Idl implements Closeable {
   static JsonNodeFactory FACTORY = JsonNodeFactory.instance;
+  private static final String OPTIONAL_NULLABLE_TYPE_PROPERTY = "org.apache.avro.compiler.idl.Idl.NullableType.optional";
 
   URI inputDir;
   ClassLoader resourceLoader = null;
@@ -183,6 +184,32 @@ public class Idl implements Closeable {
     return result;
   }
 
+  /**
+   * For "optional schemas" (recognized by the marker property the NullableType
+   * production adds), ensure the null schema is in the right place.
+   *
+   * @param schema       a schema
+   * @param defaultValue the intended default value
+   * @return the schema, or an optional schema with null in the right place
+   */
+  private static Schema fixOptionalSchema(Schema schema, JsonNode defaultValue) {
+    final Object optionalType = schema.getObjectProp(OPTIONAL_NULLABLE_TYPE_PROPERTY);
+    if (optionalType != null) {
+      // The schema is a union schema with 2 types: "null" and a non-"null" schema
+      Schema nullSchema = schema.getTypes().get(0);
+      Schema nonNullSchema = schema.getTypes().get(1);
+      boolean nonNullDefault = defaultValue != null && !defaultValue.isNull();
+
+      // Always return a new schema: this drops the marker property.
+      if (nonNullDefault) {
+        return Schema.createUnion(nonNullSchema, nullSchema);
+      } else {
+        return Schema.createUnion(nullSchema, nonNullSchema);
+      }
+    }
+    return schema;
+  }
+
 }
 
 PARSER_END(Idl)
@@ -1000,6 +1027,7 @@ TOKEN :
 | < EQUALS: "=" >
 | < DOT: "." >
 | < DASH: "-" >
+| < QUESTION_MARK: "?" >
 }
 
 TOKEN :
@@ -1349,7 +1377,8 @@ void VariableDeclarator(Schema type, List<Field> fields):
         order = Field.Order.valueOf(getTextProp(key,props,token).toUpperCase());
 
     boolean validate = SchemaResolver.isFullyResolvedSchema(type);
-    Field field = Accessor.createField(name, type, DocCommentHelper.getDoc(), defaultValue, validate, order);
+    Schema fieldType = fixOptionalSchema(type, defaultValue);
+    Field field = Accessor.createField(name, fieldType, DocCommentHelper.getDoc(), defaultValue, validate, order);
     for (String key : props.keySet())
       if ("order".equals(key)) {                  // already handled: ignore
       } else if ("aliases".equals(key)) {         // aliases
@@ -1440,11 +1469,6 @@ Schema Type():
     ( SchemaProperty(props) )*
   s = UnannotatedType(props)
   {
-    for (String key : props.keySet())
-      Accessor.addProp(s, key, props.get(key));
-    LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(s);
-    if (logicalType != null)
-      logicalType.addToSchema(s);
     return s;
   }
 }
@@ -1455,13 +1479,48 @@ Schema UnannotatedType(Map<String, JsonNode> props):
 }
 {
   (
+    s = NullableType(props)
+    | (
+        s = UnionDefinition()
+        | s = ArrayType()
+        | s = MapType()
+      )
+      {
+        // NullableType also applies properties, inside any union with null it may create.
+        for (String key : props.keySet())
+          Accessor.addProp(s, key, props.get(key));
+        LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(s);
+        if (logicalType != null)
+          logicalType.addToSchema(s);
+      }
+  )
+  {
+    return s;
+  }
+}
+
+Schema NullableType(Map<String, JsonNode> props):
+{
+  Schema s;
+  boolean optional = false;
+}
+{
+  (
       s = ReferenceType() { if (!props.isEmpty()) { throw error("Type references may not be annotated", token); } }
     | s = PrimitiveType()
-    | s = UnionDefinition()
-    | s = ArrayType()
-    | s = MapType()
-  )
+  ) [ <QUESTION_MARK> { optional = true; } ]
   {
+    // By applying the properties here (before creating the union), type annotations modify the optional type instead of the union.
+    for (String key : props.keySet())
+      Accessor.addProp(s, key, props.get(key));
+    LogicalType logicalType = LogicalTypes.fromSchemaIgnoreInvalid(s);
+    if (logicalType != null)
+      logicalType.addToSchema(s);
+    if (optional) {
+      s = Schema.createUnion(Schema.create(Schema.Type.NULL), s);
+      // Add a marker property to the union (it will be removed when creating fields)
+      Accessor.addProp(s, OPTIONAL_NULLABLE_TYPE_PROPERTY, BooleanNode.TRUE);
+    }
     return s;
   }
 }
diff --git a/lang/java/compiler/src/test/idl/input/simple.avdl b/lang/java/compiler/src/test/idl/input/simple.avdl
index 5208c7e..715bbd0 100644
--- a/lang/java/compiler/src/test/idl/input/simple.avdl
+++ b/lang/java/compiler/src/test/idl/input/simple.avdl
@@ -34,11 +34,12 @@ protocol Simple {
     A,
     B,
     C
-  } = C;
+  } = C; // C is the default value used when reading unknown values from another schema version (without it, reading throws an exception).
 
   /** A TestRecord. */
   @my-property({"key":3})
   record TestRecord {
+    // Tests that keywords can also appear in identifiers.
     @avro.java.`string`("String") string @order("ignore") name = "foo";
 
     /** The kind of record. */
@@ -49,16 +50,21 @@ protocol Simple {
 
     MD5 hash = "0000000000000000";
 
+    // A traditional optional field
     union {null, MD5} @aliases(["hash", "hsh"]) nullableHash = null;
 
+    // These two fields parse correctly, but will brewak (be changed to strings) when serializing the protocol/schema as JSON.
     double value = NaN;
     float average = -Infinity;
     date d = 0;
-    time_ms t = 0;
+    // An optional type with a non-null default value (results in a union with null last).
+    time_ms? t = 0;
 
     @foo.bar("bar.foo") long l = 0;
+    // Arrays (and maps) may also have properties
     @foo.bar.bar("foo.bar2") array<string> a = [];
-    union {null, @foo.foo.bar(42) @foo.foo.foo("3foo") string} prop = null;
+    // An optional type with a null default value (results in a union with null first).
+    @foo.foo.bar(42) @foo.foo.foo("3foo") string? prop = null;
   }
 
   /** An MD5 hash. */
@@ -70,6 +76,7 @@ protocol Simple {
 
   /** method 'hello' takes @parameter 'greeting' */
   string hello(string greeting);
+  // The value of TestRecord also contains defaults for fields not mentioned.
   TestRecord echo(TestRecord `record` = {"name":"bar","kind":"BAR"});
   /** method 'add' takes @parameter 'arg1' @parameter 'arg2' */
   @specialProp("test")
diff --git a/lang/java/compiler/src/test/idl/output/simple.avpr b/lang/java/compiler/src/test/idl/output/simple.avpr
index 5e86c6c..0ec9edb 100644
--- a/lang/java/compiler/src/test/idl/output/simple.avpr
+++ b/lang/java/compiler/src/test/idl/output/simple.avpr
@@ -64,7 +64,7 @@
       "default": 0
     }, {
       "name": "t",
-      "type": {"type": "int", "logicalType": "time-millis"},
+      "type": [ {"type": "int", "logicalType": "time-millis"}, "null" ],
       "default": 0
     } , {
       "name": "l",