You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@arrow.apache.org by ALBERTO Bocchinfuso <al...@hotmail.it> on 2018/10/04 09:05:53 UTC
[Scala/Java] Writing a RecordBatch on a file using scala
Hi guys,
I’m trying to write a RecordBatch on a file using scala. To do so, I use the java API.
Aside from all the imports, my code is:
imports …
object Test {
def main(args: Array[String]) {
val allocator = new RootAllocator(8192);
val fields = List[Field](new Field("names", new FieldType(false, ArrowType.Utf8.INSTANCE, null), null), new Field("names", new FieldType(false, new ArrowType.Int(64, false), null), null));
val fieldVectors = List[FieldVector](fields(0).getFieldType().createNewSingleVector("names", allocator, null), fields(1).getFieldType().createNewSingleVector("numbers", allocator, null));
fieldVectors(0).initializeChildrenFromFields(fields(0).getChildren());
fieldVectors(0).initializeChildrenFromFields(fields(1).getChildren());
val names = List[String]("Name1", "Name2", "Name3", "Name4", "Name5", "Name6", "Name7", "Name8");
val numbers = List[Int](1, 2, 3, 4, 5, 6, 7, 8);
val bitmap: ArrowBuf = allocator.buffer(1);
bitmap.writeByte(255);
var fos: FileOutputStream = new FileOutputStream("TEST");
val vsr: VectorSchemaRoot = new VectorSchemaRoot(fields.asJava, fieldVectors.asJava, 8);
val afw: ArrowFileWriter = new ArrowFileWriter(vsr, null , fos.getChannel());
val namesBuf: ArrowBuf = allocator.buffer(2048);
for(i <- names){
namesBuf.writeBytes(i.getBytes());
}
var numbersBuf: ArrowBuf = allocator.buffer(2048);
for(i <- numbers){
numbersBuf.writeInt(i);
}
val btch = new ArrowRecordBatch(8192, List(new ArrowFieldNode(8, 0), new ArrowFieldNode(8, 0)).asJava, List(bitmap, namesBuf, bitmap, numbersBuf).asJava);
val loader: VectorLoader = new VectorLoader (vsr);
afw.start();
loader.load(btch);
afw.writeBatch();
afw.end();
}
}
After many debugging messages I get:
[error] (run-main-e) java.util.NoSuchElementException: next on empty iterator
[error] java.util.NoSuchElementException: next on empty iterator
[error] at scala.collection.Iterator$$anon$2.next(Iterator.scala:38)
[error] at scala.collection.Iterator$$anon$2.next(Iterator.scala:36)
[error] at scala.collection.LinearSeqLike$$anon$1.next(LinearSeqLike.scala:47)
[error] at scala.collection.convert.Wrappers$IteratorWrapper.next(Wrappers.scala:28)
[error] at org.apache.arrow.vector.VectorLoader.loadBuffers(VectorLoader.java:76)
[error] at org.apache.arrow.vector.VectorLoader.load(VectorLoader.java:61)
[error] at Test$.main(RecordBatchWriteTest.scala:53)
[error] at Test.main(RecordBatchWriteTest.scala)
[error] at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
[error] at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
[error] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
[error] at java.lang.reflect.Method.invoke(Method.java:498)
[error] at sbt.Run.invokeMain(Run.scala:93)
[error] at sbt.Run.run0(Run.scala:87)
[error] at sbt.Run.execute$1(Run.scala:65)
[error] at sbt.Run.$anonfun$run$4(Run.scala:77)
[error] at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
[error] at sbt.util.InterfaceUtil$$anon$1.get(InterfaceUtil.scala:10)
[error] at sbt.TrapExit$App.run(TrapExit.scala:252)
[error] at java.lang.Thread.run(Thread.java:745)
[error] java.lang.RuntimeException: Nonzero exit code: 1
[error] at sbt.Run$.executeTrapExit(Run.scala:124)
[error] at sbt.Run.run(Run.scala:77)
[error] at sbt.Defaults$.$anonfun$bgRunTask$5(Defaults.scala:1185)
[error] at sbt.Defaults$.$anonfun$bgRunTask$5$adapted(Defaults.scala:1180)
[error] at sbt.internal.BackgroundThreadPool.$anonfun$run$1(DefaultBackgroundJobService.scala:366)
[error] at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
[error] at scala.util.Try$.apply(Try.scala:209)
[error] at sbt.internal.BackgroundThreadPool$BackgroundRunnable.run(DefaultBackgroundJobService.scala:289)
[error] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
[error] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
[error] at java.lang.Thread.run(Thread.java:745)
[error] (Compile / run) Nonzero exit code: 1
To write the code I tried to follow step by step the procedure here:
https://github.com/apache/arrow/blob/master/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java
Can someone help me?
Thanks in advance,
Alberto
Re: I: [Scala/Java] Writing a RecordBatch on a file using scala
Posted by Wes McKinney <we...@gmail.com>.
hi Alberto,
in
val btch = new ArrowRecordBatch(8192, List(new ArrowFieldNode(8, 0),
new ArrowFieldNode(8, 0)).asJava, List(bitmap, namesBuf, bitmap,
numbersBuf).asJava);
this doesn't look right. With a string/utf8 field and an int64 field,
there should be 5 buffers. You're missing a buffer containing offsets
for the string values.
- Wes
On Sat, Oct 6, 2018 at 4:09 PM ALBERTO Bocchinfuso
<al...@hotmail.it> wrote:
>
> Hi guys,
>
> I’m trying to write a RecordBatch on a file using scala. To do so, I use the java API.
> Aside from all the imports, my code is:
>
> imports …
> object Test {
> def main(args: Array[String]) {
> val allocator = new RootAllocator(8192);
>
> val fields = List[Field](new Field("names", new FieldType(false, ArrowType.Utf8.INSTANCE, null), null), new Field("names", new FieldType(false, new ArrowType.Int(64, false), null), null));
>
> val fieldVectors = List[FieldVector](fields(0).getFieldType().createNewSingleVector("names", allocator, null), fields(1).getFieldType().createNewSingleVector("numbers", allocator, null));
>
> fieldVectors(0).initializeChildrenFromFields(fields(0).getChildren());
> fieldVectors(0).initializeChildrenFromFields(fields(1).getChildren());
>
> val names = List[String]("Name1", "Name2", "Name3", "Name4", "Name5", "Name6", "Name7", "Name8");
> val numbers = List[Int](1, 2, 3, 4, 5, 6, 7, 8);
> val bitmap: ArrowBuf = allocator.buffer(1);
> bitmap.writeByte(255);
>
> var fos: FileOutputStream = new FileOutputStream("TEST");
> val vsr: VectorSchemaRoot = new VectorSchemaRoot(fields.asJava, fieldVectors.asJava, 8);
> val afw: ArrowFileWriter = new ArrowFileWriter(vsr, null , fos.getChannel());
>
> val namesBuf: ArrowBuf = allocator.buffer(2048);
> for(i <- names){
> namesBuf.writeBytes(i.getBytes());
> }
>
> var numbersBuf: ArrowBuf = allocator.buffer(2048);
> for(i <- numbers){
> numbersBuf.writeInt(i);
> }
>
> val btch = new ArrowRecordBatch(8192, List(new ArrowFieldNode(8, 0), new ArrowFieldNode(8, 0)).asJava, List(bitmap, namesBuf, bitmap, numbersBuf).asJava);
>
> val loader: VectorLoader = new VectorLoader (vsr);
>
> afw.start();
> loader.load(btch);
> afw.writeBatch();
> afw.end();
> }
> }
>
> After many debugging messages I get:
>
> [error] (run-main-e) java.util.NoSuchElementException: next on empty iterator
> [error] java.util.NoSuchElementException: next on empty iterator
> [error] at scala.collection.Iterator$$anon$2.next(Iterator.scala:38)
> [error] at scala.collection.Iterator$$anon$2.next(Iterator.scala:36)
> [error] at scala.collection.LinearSeqLike$$anon$1.next(LinearSeqLike.scala:47)
> [error] at scala.collection.convert.Wrappers$IteratorWrapper.next(Wrappers.scala:28)
> [error] at org.apache.arrow.vector.VectorLoader.loadBuffers(VectorLoader.java:76)
> [error] at org.apache.arrow.vector.VectorLoader.load(VectorLoader.java:61)
> [error] at Test$.main(RecordBatchWriteTest.scala:53)
> [error] at Test.main(RecordBatchWriteTest.scala)
> [error] at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
> [error] at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
> [error] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
> [error] at java.lang.reflect.Method.invoke(Method.java:498)
> [error] at sbt.Run.invokeMain(Run.scala:93)
> [error] at sbt.Run.run0(Run.scala:87)
> [error] at sbt.Run.execute$1(Run.scala:65)
> [error] at sbt.Run.$anonfun$run$4(Run.scala:77)
> [error] at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
> [error] at sbt.util.InterfaceUtil$$anon$1.get(InterfaceUtil.scala:10)
> [error] at sbt.TrapExit$App.run(TrapExit.scala:252)
> [error] at java.lang.Thread.run(Thread.java:745)
> [error] java.lang.RuntimeException: Nonzero exit code: 1
> [error] at sbt.Run$.executeTrapExit(Run.scala:124)
> [error] at sbt.Run.run(Run.scala:77)
> [error] at sbt.Defaults$.$anonfun$bgRunTask$5(Defaults.scala:1185)
> [error] at sbt.Defaults$.$anonfun$bgRunTask$5$adapted(Defaults.scala:1180)
> [error] at sbt.internal.BackgroundThreadPool.$anonfun$run$1(DefaultBackgroundJobService.scala:366)
> [error] at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
> [error] at scala.util.Try$.apply(Try.scala:209)
> [error] at sbt.internal.BackgroundThreadPool$BackgroundRunnable.run(DefaultBackgroundJobService.scala:289)
> [error] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
> [error] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
> [error] at java.lang.Thread.run(Thread.java:745)
> [error] (Compile / run) Nonzero exit code: 1
>
> To write the code I tried to follow step by step the procedure here:
> https://github.com/apache/arrow/blob/master/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java
>
> Can someone help me?
>
> Thanks in advance,
> Alberto
I: [Scala/Java] Writing a RecordBatch on a file using scala
Posted by ALBERTO Bocchinfuso <al...@hotmail.it>.
Hi guys,
I’m trying to write a RecordBatch on a file using scala. To do so, I use the java API.
Aside from all the imports, my code is:
imports …
object Test {
def main(args: Array[String]) {
val allocator = new RootAllocator(8192);
val fields = List[Field](new Field("names", new FieldType(false, ArrowType.Utf8.INSTANCE, null), null), new Field("names", new FieldType(false, new ArrowType.Int(64, false), null), null));
val fieldVectors = List[FieldVector](fields(0).getFieldType().createNewSingleVector("names", allocator, null), fields(1).getFieldType().createNewSingleVector("numbers", allocator, null));
fieldVectors(0).initializeChildrenFromFields(fields(0).getChildren());
fieldVectors(0).initializeChildrenFromFields(fields(1).getChildren());
val names = List[String]("Name1", "Name2", "Name3", "Name4", "Name5", "Name6", "Name7", "Name8");
val numbers = List[Int](1, 2, 3, 4, 5, 6, 7, 8);
val bitmap: ArrowBuf = allocator.buffer(1);
bitmap.writeByte(255);
var fos: FileOutputStream = new FileOutputStream("TEST");
val vsr: VectorSchemaRoot = new VectorSchemaRoot(fields.asJava, fieldVectors.asJava, 8);
val afw: ArrowFileWriter = new ArrowFileWriter(vsr, null , fos.getChannel());
val namesBuf: ArrowBuf = allocator.buffer(2048);
for(i <- names){
namesBuf.writeBytes(i.getBytes());
}
var numbersBuf: ArrowBuf = allocator.buffer(2048);
for(i <- numbers){
numbersBuf.writeInt(i);
}
val btch = new ArrowRecordBatch(8192, List(new ArrowFieldNode(8, 0), new ArrowFieldNode(8, 0)).asJava, List(bitmap, namesBuf, bitmap, numbersBuf).asJava);
val loader: VectorLoader = new VectorLoader (vsr);
afw.start();
loader.load(btch);
afw.writeBatch();
afw.end();
}
}
After many debugging messages I get:
[error] (run-main-e) java.util.NoSuchElementException: next on empty iterator
[error] java.util.NoSuchElementException: next on empty iterator
[error] at scala.collection.Iterator$$anon$2.next(Iterator.scala:38)
[error] at scala.collection.Iterator$$anon$2.next(Iterator.scala:36)
[error] at scala.collection.LinearSeqLike$$anon$1.next(LinearSeqLike.scala:47)
[error] at scala.collection.convert.Wrappers$IteratorWrapper.next(Wrappers.scala:28)
[error] at org.apache.arrow.vector.VectorLoader.loadBuffers(VectorLoader.java:76)
[error] at org.apache.arrow.vector.VectorLoader.load(VectorLoader.java:61)
[error] at Test$.main(RecordBatchWriteTest.scala:53)
[error] at Test.main(RecordBatchWriteTest.scala)
[error] at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
[error] at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
[error] at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
[error] at java.lang.reflect.Method.invoke(Method.java:498)
[error] at sbt.Run.invokeMain(Run.scala:93)
[error] at sbt.Run.run0(Run.scala:87)
[error] at sbt.Run.execute$1(Run.scala:65)
[error] at sbt.Run.$anonfun$run$4(Run.scala:77)
[error] at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
[error] at sbt.util.InterfaceUtil$$anon$1.get(InterfaceUtil.scala:10)
[error] at sbt.TrapExit$App.run(TrapExit.scala:252)
[error] at java.lang.Thread.run(Thread.java:745)
[error] java.lang.RuntimeException: Nonzero exit code: 1
[error] at sbt.Run$.executeTrapExit(Run.scala:124)
[error] at sbt.Run.run(Run.scala:77)
[error] at sbt.Defaults$.$anonfun$bgRunTask$5(Defaults.scala:1185)
[error] at sbt.Defaults$.$anonfun$bgRunTask$5$adapted(Defaults.scala:1180)
[error] at sbt.internal.BackgroundThreadPool.$anonfun$run$1(DefaultBackgroundJobService.scala:366)
[error] at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:12)
[error] at scala.util.Try$.apply(Try.scala:209)
[error] at sbt.internal.BackgroundThreadPool$BackgroundRunnable.run(DefaultBackgroundJobService.scala:289)
[error] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
[error] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
[error] at java.lang.Thread.run(Thread.java:745)
[error] (Compile / run) Nonzero exit code: 1
To write the code I tried to follow step by step the procedure here:
https://github.com/apache/arrow/blob/master/java/vector/src/test/java/org/apache/arrow/vector/ipc/TestArrowReaderWriter.java
Can someone help me?
Thanks in advance,
Alberto