You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@arrow.apache.org by ALBERTO Bocchinfuso <al...@hotmail.it> on 2018/10/04 09:05:53 UTC

[Scala/Java] Writing a RecordBatch on a file using scala

Hi guys,

I’m trying to write a RecordBatch on a file using scala. To do so, I use the java API.
Aside from all the imports, my code is:

imports …
object Test {
             def main(args: Array[String]) {
                         val allocator = new RootAllocator(8192);

                         val fields = List[Field](new Field("names", new FieldType(false, ArrowType.Utf8.INSTANCE, null), null), new Field("names", new FieldType(false, new ArrowType.Int(64, false), null), null));

                         val fieldVectors = List[FieldVector](fields(0).getFieldType().createNewSingleVector("names", allocator, null), fields(1).getFieldType().createNewSingleVector("numbers", allocator, null));

                         fieldVectors(0).initializeChildrenFromFields(fields(0).getChildren());
                         fieldVectors(0).initializeChildrenFromFields(fields(1).getChildren());

val names = List[String]("Name1", "Name2", "Name3", "Name4", "Name5", "Name6", "Name7", "Name8");
                         val numbers = List[Int](1, 2, 3, 4, 5, 6, 7, 8);
                         val bitmap: ArrowBuf = allocator.buffer(1);
                          bitmap.writeByte(255);

                         var fos: FileOutputStream = new FileOutputStream("TEST");
          val vsr: VectorSchemaRoot = new VectorSchemaRoot(fields.asJava, fieldVectors.asJava, 8);
                         val afw: ArrowFileWriter = new ArrowFileWriter(vsr, null , fos.getChannel());

                         val namesBuf: ArrowBuf = allocator.buffer(2048);
                         for(i <- names){
                                      namesBuf.writeBytes(i.getBytes());
                         }

                         var numbersBuf: ArrowBuf = allocator.buffer(2048);
                         for(i <- numbers){
numbersBuf.writeInt(i);
}

                         val btch = new ArrowRecordBatch(8192, List(new ArrowFieldNode(8, 0), new ArrowFieldNode(8, 0)).asJava, List(bitmap, namesBuf, bitmap, numbersBuf).asJava);

                         val loader: VectorLoader = new VectorLoader (vsr);

                         afw.start();
                         loader.load(btch);
                         afw.writeBatch();
                         afw.end();
             }
}

After many debugging messages I get:

[error] (run-main-e) java.util.NoSuchElementException: next on empty iterator
[error] java.util.NoSuchElementException: next on empty iterator
[error]             at scala.collection.Iterator$$anon$2.next(Iterator.scala:38)
[error]             at scala.collection.Iterator$$anon$2.next(Iterator.scala:36)
[error]             at scala.collection.LinearSeqLike$$anon$1.next(LinearSeqLike.scala:47)
[error]             at scala.collection.convert.Wrappers$IteratorWrapper.next(Wrappers.scala:28)
[error]             at org.apache.arrow.vector.VectorLoader.loadBuffers(VectorLoader.java:76)
[error]             at org.apache.arrow.vector.VectorLoader.load(VectorLoader.java:61)
[error]             at Test$.main(RecordBatchWriteTest.scala:53)
[error]             at Test.main(RecordBatchWriteTest.scala)
[error]             at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
[error]             at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
[error]             at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
[error]             at java.lang.reflect.Method.invoke(Method.java:498)
[error]             at sbt.Run.invokeMain(Run.scala:93)
[error]             at sbt.Run.run0(Run.scala:87)
[error]             at sbt.Run.execute$1(Run.scala:65)
[error]             at sbt.Run.$anonfun$run$4(Run.scala:77)
[error]             at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
[error]             at sbt.util.InterfaceUtil$$anon$1.get(InterfaceUtil.scala:10)
[error]             at sbt.TrapExit$App.run(TrapExit.scala:252)
[error]             at java.lang.Thread.run(Thread.java:745)
[error] java.lang.RuntimeException: Nonzero exit code: 1
[error]             at sbt.Run$.executeTrapExit(Run.scala:124)
[error]             at sbt.Run.run(Run.scala:77)
[error]             at sbt.Defaults$.$anonfun$bgRunTask$5(Defaults.scala:1185)
[error]             at sbt.Defaults$.$anonfun$bgRunTask$5$adapted(Defaults.scala:1180)
[error]             at sbt.internal.BackgroundThreadPool.$anonfun$run$1(DefaultBackgroundJobService.scala:366)
[error]             at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
[error]             at scala.util.Try$.apply(Try.scala:209)
[error]             at sbt.internal.BackgroundThreadPool$BackgroundRunnable.run(DefaultBackgroundJobService.scala:289)
[error]             at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
[error]             at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
[error]             at java.lang.Thread.run(Thread.java:745)
[error] (Compile / run) Nonzero exit code: 1

To write the code I tried to follow step by step the procedure here:
https://github.com/apache/arrow/blob/master/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java

Can someone help me?

Thanks in advance,
Alberto


Re: I: [Scala/Java] Writing a RecordBatch on a file using scala

Posted by Wes McKinney <we...@gmail.com>.
hi Alberto,

in

val btch = new ArrowRecordBatch(8192, List(new ArrowFieldNode(8, 0),
new ArrowFieldNode(8, 0)).asJava, List(bitmap, namesBuf, bitmap,
numbersBuf).asJava);

this doesn't look right. With a string/utf8 field and an int64 field,
there should be 5 buffers. You're missing a buffer containing offsets
for the string values.

- Wes
On Sat, Oct 6, 2018 at 4:09 PM ALBERTO Bocchinfuso
<al...@hotmail.it> wrote:
>
> Hi guys,
>
> I’m trying to write a RecordBatch on a file using scala. To do so, I use the java API.
> Aside from all the imports, my code is:
>
> imports …
> object Test {
>              def main(args: Array[String]) {
>                          val allocator = new RootAllocator(8192);
>
>                          val fields = List[Field](new Field("names", new FieldType(false, ArrowType.Utf8.INSTANCE, null), null), new Field("names", new FieldType(false, new ArrowType.Int(64, false), null), null));
>
>                          val fieldVectors = List[FieldVector](fields(0).getFieldType().createNewSingleVector("names", allocator, null), fields(1).getFieldType().createNewSingleVector("numbers", allocator, null));
>
>                          fieldVectors(0).initializeChildrenFromFields(fields(0).getChildren());
>                          fieldVectors(0).initializeChildrenFromFields(fields(1).getChildren());
>
> val names = List[String]("Name1", "Name2", "Name3", "Name4", "Name5", "Name6", "Name7", "Name8");
>                          val numbers = List[Int](1, 2, 3, 4, 5, 6, 7, 8);
>                          val bitmap: ArrowBuf = allocator.buffer(1);
>                           bitmap.writeByte(255);
>
>                          var fos: FileOutputStream = new FileOutputStream("TEST");
>           val vsr: VectorSchemaRoot = new VectorSchemaRoot(fields.asJava, fieldVectors.asJava, 8);
>                          val afw: ArrowFileWriter = new ArrowFileWriter(vsr, null , fos.getChannel());
>
>                          val namesBuf: ArrowBuf = allocator.buffer(2048);
>                          for(i <- names){
>                                       namesBuf.writeBytes(i.getBytes());
>                          }
>
>                          var numbersBuf: ArrowBuf = allocator.buffer(2048);
>                          for(i <- numbers){
> numbersBuf.writeInt(i);
> }
>
>                          val btch = new ArrowRecordBatch(8192, List(new ArrowFieldNode(8, 0), new ArrowFieldNode(8, 0)).asJava, List(bitmap, namesBuf, bitmap, numbersBuf).asJava);
>
>                          val loader: VectorLoader = new VectorLoader (vsr);
>
>                          afw.start();
>                          loader.load(btch);
>                          afw.writeBatch();
>                          afw.end();
>              }
> }
>
> After many debugging messages I get:
>
> [error] (run-main-e) java.util.NoSuchElementException: next on empty iterator
> [error] java.util.NoSuchElementException: next on empty iterator
> [error]             at scala.collection.Iterator$$anon$2.next(Iterator.scala:38)
> [error]             at scala.collection.Iterator$$anon$2.next(Iterator.scala:36)
> [error]             at scala.collection.LinearSeqLike$$anon$1.next(LinearSeqLike.scala:47)
> [error]             at scala.collection.convert.Wrappers$IteratorWrapper.next(Wrappers.scala:28)
> [error]             at org.apache.arrow.vector.VectorLoader.loadBuffers(VectorLoader.java:76)
> [error]             at org.apache.arrow.vector.VectorLoader.load(VectorLoader.java:61)
> [error]             at Test$.main(RecordBatchWriteTest.scala:53)
> [error]             at Test.main(RecordBatchWriteTest.scala)
> [error]             at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> [error]             at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> [error]             at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> [error]             at java.lang.reflect.Method.invoke(Method.java:498)
> [error]             at sbt.Run.invokeMain(Run.scala:93)
> [error]             at sbt.Run.run0(Run.scala:87)
> [error]             at sbt.Run.execute$1(Run.scala:65)
> [error]             at sbt.Run.$anonfun$run$4(Run.scala:77)
> [error]             at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
> [error]             at sbt.util.InterfaceUtil$$anon$1.get(InterfaceUtil.scala:10)
> [error]             at sbt.TrapExit$App.run(TrapExit.scala:252)
> [error]             at java.lang.Thread.run(Thread.java:745)
> [error] java.lang.RuntimeException: Nonzero exit code: 1
> [error]             at sbt.Run$.executeTrapExit(Run.scala:124)
> [error]             at sbt.Run.run(Run.scala:77)
> [error]             at sbt.Defaults$.$anonfun$bgRunTask$5(Defaults.scala:1185)
> [error]             at sbt.Defaults$.$anonfun$bgRunTask$5$adapted(Defaults.scala:1180)
> [error]             at sbt.internal.BackgroundThreadPool.$anonfun$run$1(DefaultBackgroundJobService.scala:366)
> [error]             at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
> [error]             at scala.util.Try$.apply(Try.scala:209)
> [error]             at sbt.internal.BackgroundThreadPool$BackgroundRunnable.run(DefaultBackgroundJobService.scala:289)
> [error]             at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> [error]             at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> [error]             at java.lang.Thread.run(Thread.java:745)
> [error] (Compile / run) Nonzero exit code: 1
>
> To write the code I tried to follow step by step the procedure here:
> https://github.com/apache/arrow/blob/master/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java
>
> Can someone help me?
>
> Thanks in advance,
> Alberto

I: [Scala/Java] Writing a RecordBatch on a file using scala

Posted by ALBERTO Bocchinfuso <al...@hotmail.it>.
Hi guys,

I’m trying to write a RecordBatch on a file using scala. To do so, I use the java API.
Aside from all the imports, my code is:

imports …
object Test {
             def main(args: Array[String]) {
                         val allocator = new RootAllocator(8192);

                         val fields = List[Field](new Field("names", new FieldType(false, ArrowType.Utf8.INSTANCE, null), null), new Field("names", new FieldType(false, new ArrowType.Int(64, false), null), null));

                         val fieldVectors = List[FieldVector](fields(0).getFieldType().createNewSingleVector("names", allocator, null), fields(1).getFieldType().createNewSingleVector("numbers", allocator, null));

                         fieldVectors(0).initializeChildrenFromFields(fields(0).getChildren());
                         fieldVectors(0).initializeChildrenFromFields(fields(1).getChildren());

val names = List[String]("Name1", "Name2", "Name3", "Name4", "Name5", "Name6", "Name7", "Name8");
                         val numbers = List[Int](1, 2, 3, 4, 5, 6, 7, 8);
                         val bitmap: ArrowBuf = allocator.buffer(1);
                          bitmap.writeByte(255);

                         var fos: FileOutputStream = new FileOutputStream("TEST");
          val vsr: VectorSchemaRoot = new VectorSchemaRoot(fields.asJava, fieldVectors.asJava, 8);
                         val afw: ArrowFileWriter = new ArrowFileWriter(vsr, null , fos.getChannel());

                         val namesBuf: ArrowBuf = allocator.buffer(2048);
                         for(i <- names){
                                      namesBuf.writeBytes(i.getBytes());
                         }

                         var numbersBuf: ArrowBuf = allocator.buffer(2048);
                         for(i <- numbers){
numbersBuf.writeInt(i);
}

                         val btch = new ArrowRecordBatch(8192, List(new ArrowFieldNode(8, 0), new ArrowFieldNode(8, 0)).asJava, List(bitmap, namesBuf, bitmap, numbersBuf).asJava);

                         val loader: VectorLoader = new VectorLoader (vsr);

                         afw.start();
                         loader.load(btch);
                         afw.writeBatch();
                         afw.end();
             }
}

After many debugging messages I get:

[error] (run-main-e) java.util.NoSuchElementException: next on empty iterator
[error] java.util.NoSuchElementException: next on empty iterator
[error]             at scala.collection.Iterator$$anon$2.next(Iterator.scala:38)
[error]             at scala.collection.Iterator$$anon$2.next(Iterator.scala:36)
[error]             at scala.collection.LinearSeqLike$$anon$1.next(LinearSeqLike.scala:47)
[error]             at scala.collection.convert.Wrappers$IteratorWrapper.next(Wrappers.scala:28)
[error]             at org.apache.arrow.vector.VectorLoader.loadBuffers(VectorLoader.java:76)
[error]             at org.apache.arrow.vector.VectorLoader.load(VectorLoader.java:61)
[error]             at Test$.main(RecordBatchWriteTest.scala:53)
[error]             at Test.main(RecordBatchWriteTest.scala)
[error]             at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
[error]             at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
[error]             at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
[error]             at java.lang.reflect.Method.invoke(Method.java:498)
[error]             at sbt.Run.invokeMain(Run.scala:93)
[error]             at sbt.Run.run0(Run.scala:87)
[error]             at sbt.Run.execute$1(Run.scala:65)
[error]             at sbt.Run.$anonfun$run$4(Run.scala:77)
[error]             at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
[error]             at sbt.util.InterfaceUtil$$anon$1.get(InterfaceUtil.scala:10)
[error]             at sbt.TrapExit$App.run(TrapExit.scala:252)
[error]             at java.lang.Thread.run(Thread.java:745)
[error] java.lang.RuntimeException: Nonzero exit code: 1
[error]             at sbt.Run$.executeTrapExit(Run.scala:124)
[error]             at sbt.Run.run(Run.scala:77)
[error]             at sbt.Defaults$.$anonfun$bgRunTask$5(Defaults.scala:1185)
[error]             at sbt.Defaults$.$anonfun$bgRunTask$5$adapted(Defaults.scala:1180)
[error]             at sbt.internal.BackgroundThreadPool.$anonfun$run$1(DefaultBackgroundJobService.scala:366)
[error]             at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
[error]             at scala.util.Try$.apply(Try.scala:209)
[error]             at sbt.internal.BackgroundThreadPool$BackgroundRunnable.run(DefaultBackgroundJobService.scala:289)
[error]             at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
[error]             at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
[error]             at java.lang.Thread.run(Thread.java:745)
[error] (Compile / run) Nonzero exit code: 1

To write the code I tried to follow step by step the procedure here:
https://github.com/apache/arrow/blob/master/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java

Can someone help me?

Thanks in advance,
Alberto