You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by th...@apache.org on 2019/05/01 19:11:18 UTC
[avro] 08/14: Changes for what is needed for arrays and maps.
This is an automated email from the ASF dual-hosted git repository.
thiru pushed a commit to branch fast-decoder-thiru
in repository https://gitbox.apache.org/repos/asf/avro.git
commit 32999b8bd7872aaf5488a52966b5605cdf98d82d
Author: rstata <rs...@yahoo.com>
AuthorDate: Tue Apr 30 11:05:49 2019 -0700
Changes for what is needed for arrays and maps.
---
.../java/org/apache/avro/generic/Advancer.java | 88 +++++++++++++++-------
1 file changed, 60 insertions(+), 28 deletions(-)
diff --git a/lang/java/avro/src/main/java/org/apache/avro/generic/Advancer.java b/lang/java/avro/src/main/java/org/apache/avro/generic/Advancer.java
index d75e9ee..4fe3d7b 100644
--- a/lang/java/avro/src/main/java/org/apache/avro/generic/Advancer.java
+++ b/lang/java/avro/src/main/java/org/apache/avro/generic/Advancer.java
@@ -46,10 +46,9 @@ import org.apache.avro.util.Utf8;
*
* When traversing an Array or Map in the reader schema, the decoder
* should call {@link getElementAdvancer} to retrieve the advancer
- * object for the contained element-schema, value-schema, or non-null
- * schema respectively. ({@link next} cannot be called on {@link
- * Advancer.Record} objects -- decoders must decode them field by
- * field.)
+ * object for the contained element-schema or value-schema. See the
+ * JavaDoc for {@link getElementAdvancer} for instructions on how to
+ * decode these types.
*
* For unions, the decoder should call {@link nextIndex} to fetch the
* branch and then {@link getBranchAdvancer} to get the advancer of
@@ -57,11 +56,11 @@ import org.apache.avro.util.Utf8;
* index, pick the right advancer based on the index, and then read
* and return the actual value.)
*
- * Traversing an record is more involved. The decoder should call
- * {@link getRecordAdvancer} and proceed as described in the
- * documentation for {@link Advancer.Record}. ({@link next} cannot
- * be called on {@link Advancer.Record} objects -- decoders must
- * decode them field by field.)
+ * Traversing records, arrays, and maps is more involved. In the
+ * case of an array or map, call {@link getContainerAdvancer} and
+ * proceed as described in the documentation for {@link
+ * Advancer.Container}. For records, best to just look at the
+ * implementation of {@link GenericDatumReader2}.
**/
abstract class Advancer {
protected Exception exception() {
@@ -100,12 +99,6 @@ abstract class Advancer {
return nextFixed(in, bytes, 0, bytes.length);
}
- /** Access to contained advancer (for Array and Map types). */
- public Advancer getElementAdvancer(Decoder in) throws IOException {
- exception();
- return null;
- }
-
/** Get index for a union. */
public int nextIndex(Decoder in) throws IOException { exception(); return 0; }
@@ -116,7 +109,12 @@ abstract class Advancer {
return null;
}
- /** Access to contained advancer (for Array, Map, and Union types). */
+ /** Access to advancer for array or map type. */
+ public Container getContainerAdvancer(Decoder in) throws IOException {
+ exception();
+ return null;
+ }
+ /** Access to advancer for record type. */
public Record getRecordAdvancer(Decoder in) throws IOException {
exception();
return null;
@@ -165,7 +163,9 @@ abstract class Advancer {
else return new EnumWithAdjustments(e.adjustments);
case CONTAINER:
- return new Container(Advancer.from(((Resolver.Container)a).elementAction));
+ Advancer ea = Advancer.from(((Resolver.Container)a).elementAction);
+ if (a.writer.getType() == Schema.Type.ARRAY) return new ArrayContainer(ea);
+ else return new MapContainer(ea);
case RECORD:
return Advancer.Record.from((Resolver.RecordAdjust)a);
@@ -219,14 +219,46 @@ abstract class Advancer {
}
}
- /** Used for Array, Map, and Union. In case of Union, since we only
- * support "nullable" unions (ie, two-branch unions in which one
- * branch is null), the element advancer is for the non-null branch
- * of the union. */
- private static class Container extends Advancer {
+ /** Used for Array and Map. The following fragment illustrates how
+ * to use to read an array of int:
+ *
+ * <pre>
+ * Advancer.Container c = advancer.getContainerAdvancer(in);
+ * Advancer.Container ec = c.getElementAdvancer(in);
+ * for(long i = c.firstChunk(in); i != 0; i = c.nextChunk(in)) {
+ * for (long j = 0; j < i; j++) {
+ * int element = c.readInt(in);
+ * // .. do something with this element
+ * }
+ * }
+ * </pre>
+ * See the implementation of {@link GenericDatumReader2} for more
+ * illustrations. */
+ public abstract static class Container extends Advancer {
private final Advancer elementAdvancer;
public Container(Advancer elementAdvancer) { this.elementAdvancer = elementAdvancer; }
+ public Container getContainerAdvancer(Decoder in) { return this; }
public Advancer getElementAdvancer(Decoder in) { return elementAdvancer; }
+ public abstract long firstChunk(Decoder in) throws IOException;
+ public abstract long nextChunk(Decoder in) throws IOException;
+ }
+
+ private static class ArrayContainer extends Container {
+ private final Advancer elementAdvancer;
+ public ArrayContainer(Advancer elementAdvancer) { super(elementAdvancer); }
+ public long firstChunk(Decoder in) throws IOException
+ { return in.readArrayStart(); }
+ public long nextChunk(Decoder in) throws IOException
+ { return in.arrayNext(); }
+ }
+
+ private static class MapContainer extends Container {
+ private final Advancer elementAdvancer;
+ public MapContainer(Advancer elementAdvancer) { super(elementAdvancer); }
+ public long firstChunk(Decoder in) throws IOException
+ { return in.readMapStart(); }
+ public long nextChunk(Decoder in) throws IOException
+ { return in.mapNext(); }
}
//// The following set of subclasses are for when there is no
@@ -449,13 +481,13 @@ abstract class Advancer {
public byte[] nextFixed(Decoder in, byte[] bytes, int start, int length) throws IOException
{ return b(in).nextFixed(in, bytes, start, length); }
- public Advancer getElementAdvancer(Decoder in) throws IOException
- { return b(in).getElementAdvancer(in); }
-
public int nextIndex(Decoder in) throws IOException { return b(in).nextIndex(in); }
public Advancer getBranchAdvancer(Decoder in, int branch) throws IOException
{ return b(in).getBranchAdvancer(in, branch); }
+ public Container getContainerAdvancer(Decoder in) throws IOException
+ { return b(in).getContainerAdvancer(in); }
+
public Record getRecordAdvancer(Decoder in) throws IOException
{ return b(in).getRecordAdvancer(in); }
}
@@ -630,15 +662,15 @@ abstract class Advancer {
public byte[] nextFixed(Decoder in, byte[] bytes, int start, int len) throws IOException
{ ignore(toSkip, in); return field.nextFixed(in, bytes, start, len); }
- public Advancer getElementAdvancer(Decoder in) throws IOException
- { ignore(toSkip, in); return field.getElementAdvancer(in); }
-
public int nextIndex(Decoder in) throws IOException
{ ignore(toSkip, in); return field.nextIndex(in); }
public Advancer getBranchAdvancer(Decoder in, int branch) throws IOException
{ ignore(toSkip, in); return field.getBranchAdvancer(in, branch); }
+ public Container getContainerAdvancer(Decoder in) throws IOException
+ { ignore(toSkip, in); return field.getContainerAdvancer(in); }
+
public Record getRecordAdvancer(Decoder in) throws IOException
{ ignore(toSkip, in); return field.getRecordAdvancer(in); }
}