You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by cu...@apache.org on 2010/08/17 21:09:44 UTC
svn commit: r986455 - in /avro/trunk: CHANGES.txt
doc/src/content/xdocs/spec.xml lang/java/src/java/org/apache/avro/Schema.java
lang/java/src/java/org/apache/avro/generic/GenericDatumReader.java
lang/java/src/test/java/org/apache/avro/TestSchema.java
Author: cutting
Date: Tue Aug 17 19:09:44 2010
New Revision: 986455
URL: http://svn.apache.org/viewvc?rev=986455&view=rev
Log:
AVRO-600. Add support for type and field name aliases.
Modified:
avro/trunk/CHANGES.txt
avro/trunk/doc/src/content/xdocs/spec.xml
avro/trunk/lang/java/src/java/org/apache/avro/Schema.java
avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumReader.java
avro/trunk/lang/java/src/test/java/org/apache/avro/TestSchema.java
Modified: avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/avro/trunk/CHANGES.txt?rev=986455&r1=986454&r2=986455&view=diff
==============================================================================
--- avro/trunk/CHANGES.txt (original)
+++ avro/trunk/CHANGES.txt Tue Aug 17 19:09:44 2010
@@ -50,6 +50,9 @@ Avro 1.4.0 (unreleased)
SpecificDatumReader and SpecificDatumWriter will now use generic
types when no specific class is available. (cutting)
+ AVRO-600. Add support for type and field name aliases,
+ facilitating schema migration. (cutting)
+
IMPROVEMENTS
AVRO-587. Add Charts and Templating to Stats View
Modified: avro/trunk/doc/src/content/xdocs/spec.xml
URL: http://svn.apache.org/viewvc/avro/trunk/doc/src/content/xdocs/spec.xml?rev=986455&r1=986454&r2=986455&view=diff
==============================================================================
--- avro/trunk/doc/src/content/xdocs/spec.xml (original)
+++ avro/trunk/doc/src/content/xdocs/spec.xml Tue Aug 17 19:09:44 2010
@@ -93,6 +93,8 @@
<li><em>namespace</em>, a JSON string that qualifies the name;</li>
<li><code>doc</code>: a JSON string providing documentation to the
user of this schema (optional).</li>
+ <li><code>aliases:</code> a JSON array of strings, providing
+ alternate names for this record (optional).</li>
<li><code>fields</code>: a JSON array, listing fields (required).
Each field is a JSON object with the following attributes:
<ul>
@@ -134,6 +136,8 @@
"descending", or "ignore". For more details on how
this is used, see the the <a href="#order">sort
order</a> section below.</li>
+ <li><code>aliases:</code> a JSON array of strings, providing
+ alternate names for this field (optional).</li>
</ul>
</li>
</ul>
@@ -143,6 +147,7 @@
{
"type": "record",
"name": "LongList",
+ "aliases": ["LinkedLongs"], // old name for this
"fields" : [
{"name": "value", "type": "long"}, // each element has a long
{"name": "next", "type": ["LongList", "null"]} // optional next element
@@ -160,6 +165,8 @@
<li><code>name</code>: a JSON string providing the name
of the enum (required).</li>
<li><em>namespace</em>, a JSON string that qualifies the name;</li>
+ <li><code>aliases:</code> a JSON array of strings, providing
+ alternate names for this enum (optional).</li>
<li><code>doc</code>: a JSON string providing documentation to the
user of this schema (optional).</li>
<li><code>symbols</code>: a JSON array, listing symbols,
@@ -221,6 +228,8 @@
<ul>
<li><code>name</code>: a string naming this fixed (required).</li>
<li><em>namespace</em>, a string that qualifies the name;</li>
+ <li><code>aliases:</code> a JSON array of strings, providing
+ alternate names for this enum (optional).</li>
<li><code>size</code>: an integer, specifying the number
of bytes per value (required).</li>
</ul>
@@ -273,6 +282,28 @@
equivalent.</p>
</section>
+ <section>
+ <title>Aliases</title>
+ <p>Named types and fields may have aliases. An implementation
+ may optionally use aliases to map a writer's schema to the
+ reader's. This faciliates both schema evolution as well as
+ processing disparate datasets.</p>
+ <p>Aliases function by re-writing the writer's schema using
+ aliases from the reader's schema. For example, if the
+ writer's schema was named "Foo" and the reader's schema is
+ named "Bar" and has an alias of "Foo", then the implementation
+ would act as though "Foo" were named "Bar" when reading.
+ Similarly, if data was written as a record with a field named
+ "x" and is read as a record with a field named "y" with alias
+ "x", then the implementation would act as though "x" were
+ named "y" when reading.</p>
+ <p>A type alias may be specified either as a fully
+ namespace-qualified, or relative to the namespace of the name
+ it is an alias for. For example, if a type named "a.b" has
+ aliases of "c" and "x.y", then the fully qualified names of
+ its aliases are "a.c" and "x.y".</p>
+ </section>
+
</section> <!-- end schemas -->
<section>
Modified: avro/trunk/lang/java/src/java/org/apache/avro/Schema.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/Schema.java?rev=986455&r1=986454&r2=986455&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/Schema.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/Schema.java Tue Aug 17 19:09:44 2010
@@ -270,6 +270,16 @@ public abstract class Schema {
throw new AvroRuntimeException("Not a named type: "+this);
}
+ /** If this is a record, enum or fixed, add an alias. */
+ public void addAlias(String alias) {
+ throw new AvroRuntimeException("Not a named type: "+this);
+ }
+
+ /** If this is a record, enum or fixed, return its aliases, if any. */
+ public Set<String> getAliases() {
+ throw new AvroRuntimeException("Not a named type: "+this);
+ }
+
/** Returns true if this record is an error type. */
public boolean isError() {
throw new AvroRuntimeException("Not a record: "+this);
@@ -360,6 +370,7 @@ public abstract class Schema {
private final String doc;
private final JsonNode defaultValue;
private final Order order;
+ private Set<String> aliases;
private final Props props = new Props(FIELD_RESERVED);
public Field(String name, Schema schema, String doc,
@@ -442,11 +453,15 @@ public abstract class Schema {
names.space(space);
}
}
+ public String getQualified(String defaultSpace) {
+ return (space == null || space.equals(defaultSpace)) ? name : full;
+ }
}
private static abstract class NamedSchema extends Schema {
- private final Name name;
- private final String doc;
+ final Name name;
+ final String doc;
+ Set<Name> aliases;
public NamedSchema(Type type, Name name, String doc) {
super(type);
this.name = name;
@@ -459,13 +474,22 @@ public abstract class Schema {
public String getDoc() { return doc; }
public String getNamespace() { return name.space; }
public String getFullName() { return name.full; }
+ public void addAlias(String alias) {
+ if (aliases == null)
+ this.aliases = new HashSet<Name>();
+ aliases.add(new Name(alias, name.space));
+ }
+ public Set<String> getAliases() {
+ Set<String> result = new HashSet<String>();
+ if (aliases != null)
+ for (Name alias : aliases)
+ result.add(alias.full);
+ return result;
+ }
public boolean writeNameRef(Names names, JsonGenerator gen)
throws IOException {
if (this.equals(names.get(name))) {
- if (name.space == null || name.space.equals(names.space()))
- gen.writeString(name.name); // in default namespace
- else
- gen.writeString(name.full); // use fully-qualified name
+ gen.writeString(name.getQualified(names.space()));
return true;
} else if (name.name != null) {
names.put(name, this);
@@ -481,6 +505,15 @@ public abstract class Schema {
public int hashCode() {
return getType().hashCode() + name.hashCode() + props.hashCode();
}
+ public void aliasesToJson(JsonGenerator gen) throws IOException {
+ if (aliases == null) return;
+ gen.writeFieldName("aliases");
+ gen.writeStartArray();
+ for (Name alias : aliases)
+ gen.writeString(alias.getQualified(name.space));
+ gen.writeEndArray();
+ }
+
}
private static class SeenPair {
@@ -573,6 +606,7 @@ public abstract class Schema {
gen.writeFieldName("fields");
fieldsToJson(names, gen);
props.write(gen);
+ aliasesToJson(gen);
gen.writeEndObject();
}
@@ -632,6 +666,7 @@ public abstract class Schema {
gen.writeString(symbol);
gen.writeEndArray();
props.write(gen);
+ aliasesToJson(gen);
gen.writeEndObject();
}
}
@@ -767,6 +802,7 @@ public abstract class Schema {
writeName(names, gen);
gen.writeNumberField("size", size);
props.write(gen);
+ aliasesToJson(gen);
gen.writeEndObject();
}
}
@@ -950,6 +986,7 @@ public abstract class Schema {
if (!FIELD_RESERVED.contains(prop) && value != null)
f.addProp(prop, value);
}
+ f.aliases = parseAliases(field);
fields.add(f);
}
result.setFields(fields);
@@ -989,6 +1026,12 @@ public abstract class Schema {
}
if (savedSpace != null)
names.space(savedSpace); // restore space
+ if (result instanceof NamedSchema) {
+ Set<String> aliases = parseAliases(schema);
+ if (aliases != null) // add aliases
+ for (String alias : aliases)
+ result.addAlias(alias);
+ }
return result;
} else if (schema.isArray()) { // union
LockableArrayList<Schema> types =
@@ -1001,6 +1044,21 @@ public abstract class Schema {
}
}
+ private static Set<String> parseAliases(JsonNode node) {
+ JsonNode aliasesNode = node.get("aliases");
+ if (aliasesNode == null)
+ return null;
+ if (!aliasesNode.isArray())
+ throw new SchemaParseException("aliases not an array: "+node);
+ Set<String> aliases = new HashSet<String>();
+ for (JsonNode aliasNode : aliasesNode) {
+ if (!aliasNode.isTextual())
+ throw new SchemaParseException("alias not a string: "+aliasNode);
+ aliases.add(aliasNode.getTextValue());
+ }
+ return aliases;
+ }
+
/** Extracts text value associated to key from the container JsonNode,
* and throws {@link SchemaParseException} if it doesn't exist.
*
@@ -1034,6 +1092,135 @@ public abstract class Schema {
}
}
+ /** Rewrite a writer's schema using the aliases from a reader's schema. This
+ * permits reading records, enums and fixed schemas whose names have changed,
+ * and records whose field names have changed. The returned schema always
+ * contains the same data elements in the same order, but with possibly
+ * different names. */
+ public static Schema applyAliases(Schema writer, Schema reader) {
+ if (writer == reader) return writer; // same schema
+
+ // create indexes of names
+ Map<Schema,Schema> seen = new IdentityHashMap<Schema,Schema>(1);
+ Map<Name,Name> aliases = new HashMap<Name, Name>(1);
+ Map<Name,Map<String,String>> fieldAliases =
+ new HashMap<Name, Map<String,String>>(1);
+ getAliases(reader, seen, aliases, fieldAliases);
+
+ if (aliases.size() == 0 && fieldAliases.size() == 0)
+ return writer; // no aliases
+
+ seen.clear();
+ return applyAliases(writer, seen, aliases, fieldAliases);
+ }
+
+ private static Schema applyAliases(Schema s, Map<Schema,Schema> seen,
+ Map<Name,Name> aliases,
+ Map<Name,Map<String,String>> fieldAliases){
+
+ Name name = s instanceof NamedSchema ? ((NamedSchema)s).name : null;
+ Schema result = s;
+ switch (s.getType()) {
+ case RECORD:
+ if (seen.containsKey(s)) return seen.get(s); // break loops
+ if (aliases.containsKey(name))
+ name = aliases.get(name);
+ result = Schema.createRecord(name.full, s.getDoc(), null, s.isError());
+ seen.put(s, result);
+ List<Field> newFields = new ArrayList<Field>();
+ for (Field f : s.getFields()) {
+ Schema fSchema = applyAliases(f.schema, seen, aliases, fieldAliases);
+ String fName = getFieldAlias(name, f.name, fieldAliases);
+ Field newF = new Field(fName, fSchema, f.doc, f.defaultValue, f.order);
+ newF.props.putAll(f.props); // copy props
+ newFields.add(newF);
+ }
+ result.setFields(newFields);
+ break;
+ case ENUM:
+ if (aliases.containsKey(name))
+ result = Schema.createEnum(aliases.get(name).full, s.getDoc(), null,
+ s.getEnumSymbols());
+ break;
+ case ARRAY:
+ Schema e = applyAliases(s.getElementType(), seen, aliases, fieldAliases);
+ if (e != s.getElementType())
+ result = Schema.createArray(e);
+ break;
+ case MAP:
+ Schema v = applyAliases(s.getValueType(), seen, aliases, fieldAliases);
+ if (v != s.getValueType())
+ result = Schema.createMap(v);
+ break;
+ case UNION:
+ List<Schema> types = new ArrayList<Schema>();
+ for (Schema branch : s.getTypes())
+ types.add(applyAliases(branch, seen, aliases, fieldAliases));
+ result = Schema.createUnion(types);
+ break;
+ case FIXED:
+ if (aliases.containsKey(name))
+ result = Schema.createFixed(aliases.get(name).full, s.getDoc(), null,
+ s.getFixedSize());
+ break;
+ }
+ if (result != s)
+ result.props.putAll(s.props); // copy props
+ return result;
+ }
+
+
+ private static void getAliases(Schema schema,
+ Map<Schema,Schema> seen,
+ Map<Name,Name> aliases,
+ Map<Name,Map<String,String>> fieldAliases) {
+ if (schema instanceof NamedSchema) {
+ NamedSchema namedSchema = (NamedSchema)schema;
+ if (namedSchema.aliases != null)
+ for (Name alias : namedSchema.aliases)
+ aliases.put(alias, namedSchema.name);
+ }
+ switch (schema.getType()) {
+ case RECORD:
+ if (seen.containsKey(schema)) return; // break loops
+ seen.put(schema, schema);
+ RecordSchema record = (RecordSchema)schema;
+ for (Field field : schema.getFields()) {
+ if (field.aliases != null)
+ for (String fieldAlias : field.aliases) {
+ Map<String,String> recordAliases = fieldAliases.get(record.name);
+ if (recordAliases == null)
+ fieldAliases.put(record.name,
+ recordAliases = new HashMap<String,String>());
+ recordAliases.put(fieldAlias, field.name);
+ }
+ getAliases(field.schema, seen, aliases, fieldAliases);
+ }
+ if (record.aliases != null && fieldAliases.containsKey(record.name))
+ for (Name recordAlias : record.aliases)
+ fieldAliases.put(recordAlias, fieldAliases.get(record.name));
+ break;
+ case ARRAY:
+ getAliases(schema.getElementType(), seen, aliases, fieldAliases);
+ break;
+ case MAP:
+ getAliases(schema.getValueType(), seen, aliases, fieldAliases);
+ break;
+ case UNION:
+ for (Schema s : schema.getTypes())
+ getAliases(s, seen, aliases, fieldAliases);
+ break;
+ }
+ }
+
+ private static String getFieldAlias
+ (Name record, String field, Map<Name,Map<String,String>> fieldAliases) {
+ Map<String,String> recordAliases = fieldAliases.get(record);
+ if (recordAliases == null)
+ return field;
+ return recordAliases.get(field);
+ }
+
/**
* No change is permitted on LockableArrayList once lock() has been
* called on it.
Modified: avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumReader.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumReader.java?rev=986455&r1=986454&r2=986455&view=diff
==============================================================================
--- avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumReader.java (original)
+++ avro/trunk/lang/java/src/java/org/apache/avro/generic/GenericDatumReader.java Tue Aug 17 19:09:44 2010
@@ -82,7 +82,8 @@ public class GenericDatumReader<D> imple
}
ResolvingDecoder resolver = cache.get(expected);
if (resolver == null) {
- resolver = new ResolvingDecoder(actual, expected, null);
+ resolver = new ResolvingDecoder(Schema.applyAliases(actual, expected),
+ expected, null);
cache.put(expected, resolver);
}
return resolver;
Modified: avro/trunk/lang/java/src/test/java/org/apache/avro/TestSchema.java
URL: http://svn.apache.org/viewvc/avro/trunk/lang/java/src/test/java/org/apache/avro/TestSchema.java?rev=986455&r1=986454&r2=986455&view=diff
==============================================================================
--- avro/trunk/lang/java/src/test/java/org/apache/avro/TestSchema.java (original)
+++ avro/trunk/lang/java/src/test/java/org/apache/avro/TestSchema.java Tue Aug 17 19:09:44 2010
@@ -372,6 +372,39 @@ public class TestSchema {
assertEquals("Inner Union", schema.getField("inner_union").doc());
}
+ @Test
+ public void testAliases() throws Exception {
+ String t1 = "{\"type\":\"record\",\"name\":\"a.b\","
+ +"\"fields\":[{\"name\":\"f\",\"type\":\"long\"}]}";
+ String t2 = "{\"type\":\"record\",\"name\":\"x.y\",\"aliases\":[\"a.b\"],"
+ +"\"fields\":[{\"name\":\"g\",\"type\":\"long\",\"aliases\":[\"f\"]}]}";
+ Schema s1 = Schema.parse(t1);
+ Schema s2 = Schema.parse(t2);
+ Schema s3 = Schema.applyAliases(s1,s2);
+ assertFalse(s2 == s3);
+ assertEquals(s2, s3);
+
+ t1 = "{\"type\":\"enum\",\"name\":\"a.b\","
+ +"\"symbols\":[\"x\"]}";
+ t2 = "{\"type\":\"enum\",\"name\":\"a.c\",\"aliases\":[\"b\"],"
+ +"\"symbols\":[\"x\"]}";
+ s1 = Schema.parse(t1);
+ s2 = Schema.parse(t2);
+ s3 = Schema.applyAliases(s1,s2);
+ assertFalse(s2 == s3);
+ assertEquals(s2, s3);
+
+ t1 = "{\"type\":\"fixed\",\"name\":\"a\","
+ +"\"size\": 5}";
+ t2 = "{\"type\":\"fixed\",\"name\":\"b\",\"aliases\":[\"a\"],"
+ +"\"size\": 5}";
+ s1 = Schema.parse(t1);
+ s2 = Schema.parse(t2);
+ s3 = Schema.applyAliases(s1,s2);
+ assertFalse(s2 == s3);
+ assertEquals(s2, s3);
+ }
+
private static void check(String schemaJson, String defaultJson,
Object defaultValue) throws Exception {
check(schemaJson, defaultJson, defaultValue, true);