You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@arrow.apache.org by "Rock Wang (JIRA)" <ji...@apache.org> on 2017/02/01 21:41:52 UTC

[jira] [Commented] (ARROW-522) VectorLoader throws exception data schema contains list of maps.

    [ https://issues.apache.org/jira/browse/ARROW-522?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15848971#comment-15848971 ] 

Rock Wang commented on ARROW-522:
---------------------------------

It seems fixed the exception by adding the following to /org/apache/arrow/vector/complex/ListVector.java
{code:java}
    if (!field.getChildren().isEmpty()) {
      addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren());
    }
{code}

as in 

{code:java}
  @Override
  public void initializeChildrenFromFields(List<Field> children) {
    if (children.size() != 1) {
      throw new IllegalArgumentException("Lists have only one child. Found: " + children);
    }
    Field field = children.get(0);
    MinorType minorType = Types.getMinorTypeForArrowType(field.getType());
    AddOrGetResult<FieldVector> addOrGetVector = addOrGetVector(minorType);
    if (!addOrGetVector.isCreated()) {
      throw new IllegalArgumentException("Child vector already existed: " + addOrGetVector.getVector());
    }
    if (!field.getChildren().isEmpty()) {
      addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren());
    }
  }
{code}

> VectorLoader throws exception data schema contains list of maps.
> ----------------------------------------------------------------
>
>                 Key: ARROW-522
>                 URL: https://issues.apache.org/jira/browse/ARROW-522
>             Project: Apache Arrow
>          Issue Type: Bug
>          Components: Java - Vectors
>    Affects Versions: 0.1.0
>            Reporter: Rock Wang
>            Priority: Critical
>
> I encountered this exception
> {code:java}
> Exception in thread "main" java.lang.IllegalArgumentException: should have as many children as in the schema: found 0 expected 2
>     at com.google.common.base.Preconditions.checkArgument(Preconditions.java:122)
>     at org.apache.arrow.vector.VectorLoader.loadBuffers(VectorLoader.java:91)
>     at org.apache.arrow.vector.VectorLoader.loadBuffers(VectorLoader.java:95)
>     at org.apache.arrow.vector.VectorLoader.load(VectorLoader.java:69)
> {code}
> The test code is
> {code:java}
> public class ArrowTest {
>     public static class ByteArrayReadableSeekableByteChannel implements SeekableByteChannel {
>         private byte[] byteArray;
>         private int position = 0;
>         public ByteArrayReadableSeekableByteChannel(byte[] byteArray) {
>             if (byteArray == null) {
>                 throw new NullPointerException();
>             }
>             this.byteArray = byteArray;
>         }
>         @Override
>         public boolean isOpen() {
>             return byteArray != null;
>         }
>         @Override
>         public void close() throws IOException {
>             byteArray = null;
>         }
>         @Override
>         public int read(final ByteBuffer dst) throws IOException {
>             int remainingInBuf = byteArray.length - this.position;
>             int length = Math.min(dst.remaining(), remainingInBuf);
>             dst.put(this.byteArray, this.position, length);
>             this.position += length;
>             return length;
>         }
>         @Override
>         public long position() throws IOException {
>             return this.position;
>         }
>         @Override
>         public SeekableByteChannel position(final long newPosition) throws IOException {
>             this.position = (int) newPosition;
>             return this;
>         }
>         @Override
>         public long size() throws IOException {
>             return this.byteArray.length;
>         }
>         @Override
>         public int write(final ByteBuffer src) throws IOException {
>             throw new UnsupportedOperationException("Read only");
>         }
>         @Override
>         public SeekableByteChannel truncate(final long size) throws IOException {
>             throw new UnsupportedOperationException("Read only");
>         }
>     }
>     public static void main(String[] argv) throws Exception {
>         ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
>         // write
>         try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
>                 BufferAllocator originalVectorAllocator = allocator
>                         .newChildAllocator("child allocator", 1024, Integer.MAX_VALUE);
>                 MapVector parent = new MapVector("parent", originalVectorAllocator, null)
>         ) {
>             writeData(10, parent);
>             write(parent.getChild("root"), Channels.newChannel(byteArrayOutputStream));
>         }
>         byte[] data = byteArrayOutputStream.toByteArray();
>         // read
>         try (BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
>                 BufferAllocator readerAllocator = allocator.newChildAllocator("reader", 0, Integer.MAX_VALUE);
>                 ArrowReader arrowReader = new ArrowReader(new ByteArrayReadableSeekableByteChannel(data),
>                         readerAllocator);
>                 BufferAllocator vectorAllocator = allocator.newChildAllocator("final vectors", 0, Integer.MAX_VALUE);
>                 MapVector parent = new MapVector("parent", vectorAllocator, null)
>         ) {
>             ArrowFooter footer = arrowReader.readFooter();
>             Schema schema = footer.getSchema();
>             NullableMapVector root = parent.addOrGet("root", Types.MinorType.MAP, NullableMapVector.class);
>             VectorLoader vectorLoader = new VectorLoader(schema, root);
>             List<ArrowBlock> recordBatches = footer.getRecordBatches();
>             for (ArrowBlock rbBlock : recordBatches) {
>                 try (ArrowRecordBatch recordBatch = arrowReader.readRecordBatch(rbBlock)) {
>                     vectorLoader.load(recordBatch);
>                 }
>                 readData(10, parent);
>             }
>         }
>     }
>     private static void writeData(int count, MapVector parent) throws Exception {
>         BaseWriter.ComplexWriter writer = new ComplexWriterImpl("root", parent, true);
>         BaseWriter.MapWriter rootWriter = writer.rootAsMap();
>         IntWriter intWriter = rootWriter.integer("id");
>         BaseWriter.ListWriter listWriter = rootWriter.list("list");
>         BaseWriter.MapWriter mapFromList = listWriter.map();
>         for (int i = 0; i < count; i++) {
>             rootWriter.start();
>             intWriter.setPosition(i);
>             intWriter.writeInt(i);
>             listWriter.setPosition(i);
>             listWriter.startList();
>             for (int j = 0; j < 2; j++) {
>                 mapFromList.start();
>                 mapFromList.integer("type").writeInt(j);
>                 mapFromList.bigInt("id").writeBigInt(j * 1000L);
>                 mapFromList.end();
>             }
>             listWriter.endList();
>             rootWriter.end();
>         }
>         writer.setValueCount(count);
>     }
>     private static void readData(int count, MapVector parent) {
>         BaseReader.MapReader rootReader = new SingleMapReaderImpl(parent).reader("root");
>         FieldReader listReader = rootReader.reader("list");
>         for (int i = 0; i < count; i++) {
>             listReader.setPosition(i);
>             while (listReader.next()) {
>                 System.out.println(i + " id " + listReader.reader().reader("id").readLong());
>                 System.out.println(i + " type " + listReader.reader().reader("type").readInteger());
>             }
>         }
>     }
>     private static void write(FieldVector parent, WritableByteChannel out) throws IOException {
>         VectorUnloader vectorUnloader = new VectorUnloader(parent);
>         Schema schema = vectorUnloader.getSchema();
>         try (
>                 ArrowWriter arrowWriter = new ArrowWriter(out, schema);
>                 ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch();
>         ) {
>             arrowWriter.writeRecordBatch(recordBatch);
>         }
>     }
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)