You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2020/03/27 20:47:48 UTC

[orc] branch branch-1.6 updated (23f0c1a -> 23314e5)

This is an automated email from the ASF dual-hosted git repository.

omalley pushed a change to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git.


    from 23f0c1a  ORC-610: Updated Copyright year in the NOTICE file
     new 3b96847  Minor fix for docker script.
     new 23314e5  ORC-613: Fix OrcMapredRecordReader when dealing with union of multiple structs with different schema

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 docker/run-all.sh                                  |  2 +-
 .../apache/orc/mapred/OrcMapredRecordReader.java   | 17 ++++-
 .../test/org/apache/orc/mapred/TestOrcStruct.java  | 89 +++++++++++++++++++++-
 3 files changed, 103 insertions(+), 5 deletions(-)


[orc] 02/02: ORC-613: Fix OrcMapredRecordReader when dealing with union of multiple structs with different schema

Posted by om...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git

commit 23314e5b159108329627cadf2769b0b2b800d502
Author: Lei Sun <le...@apache.org>
AuthorDate: Tue Mar 17 16:20:39 2020 -0700

    ORC-613: Fix OrcMapredRecordReader when dealing with union of multiple
    structs with different schema
    
    Fixes #498
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 .../apache/orc/mapred/OrcMapredRecordReader.java   | 17 ++++-
 .../test/org/apache/orc/mapred/TestOrcStruct.java  | 89 +++++++++++++++++++++-
 2 files changed, 102 insertions(+), 4 deletions(-)

diff --git a/java/mapreduce/src/java/org/apache/orc/mapred/OrcMapredRecordReader.java b/java/mapreduce/src/java/org/apache/orc/mapred/OrcMapredRecordReader.java
index ea49788..0a58774 100644
--- a/java/mapreduce/src/java/org/apache/orc/mapred/OrcMapredRecordReader.java
+++ b/java/mapreduce/src/java/org/apache/orc/mapred/OrcMapredRecordReader.java
@@ -394,10 +394,10 @@ public class OrcMapredRecordReader<V extends WritableComparable>
       OrcStruct result;
       List<TypeDescription> childrenTypes = schema.getChildren();
       int numChildren = childrenTypes.size();
-      if (previous == null || previous.getClass() != OrcStruct.class) {
-        result = new OrcStruct(schema);
-      } else {
+      if (isReusable(previous, schema)) {
         result = (OrcStruct) previous;
+      } else {
+        result = new OrcStruct(schema);
       }
       StructColumnVector struct = (StructColumnVector) vector;
       for(int f=0; f < numChildren; ++f) {
@@ -410,6 +410,17 @@ public class OrcMapredRecordReader<V extends WritableComparable>
     }
   }
 
+  /**
+   * Determine if a OrcStruct object is reusable.
+   */
+  private static boolean isReusable(Object previous, TypeDescription schema) {
+    if (previous == null || previous.getClass() != OrcStruct.class) {
+      return false;
+    }
+
+    return ((OrcStruct) previous).getSchema().equals(schema);
+  }
+
   static OrcUnion nextUnion(ColumnVector vector,
                             int row,
                             TypeDescription schema,
diff --git a/java/mapreduce/src/test/org/apache/orc/mapred/TestOrcStruct.java b/java/mapreduce/src/test/org/apache/orc/mapred/TestOrcStruct.java
index 82699ed..b579d9a 100644
--- a/java/mapreduce/src/test/org/apache/orc/mapred/TestOrcStruct.java
+++ b/java/mapreduce/src/test/org/apache/orc/mapred/TestOrcStruct.java
@@ -18,20 +18,39 @@
 
 package org.apache.orc.mapred;
 
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.io.FileMetadataCache;
+import org.apache.hadoop.io.BooleanWritable;
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
+import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.orc.CompressionKind;
+import org.apache.orc.FileMetadata;
+import org.apache.orc.OrcFile;
+import org.apache.orc.OrcProto;
+import org.apache.orc.Reader;
+import org.apache.orc.StripeInformation;
 import org.apache.orc.TypeDescription;
+import org.apache.orc.Writer;
+import org.junit.Assert;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
 
-import java.io.IOException;
+import com.google.common.io.Files;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertNotEquals;
 
+
 public class TestOrcStruct {
 
   @Rule
@@ -67,6 +86,74 @@ public class TestOrcStruct {
   }
 
   @Test
+  public void testMapredRead() throws Exception {
+    TypeDescription internalStruct_0 = TypeDescription.createStruct()
+        .addField("field0", TypeDescription.createString())
+        .addField("field1", TypeDescription.createBoolean());
+    TypeDescription internalStruct_1 = TypeDescription.createStruct();
+    TypeDescription internalStruct_2 = TypeDescription.createStruct().addField("f0", TypeDescription.createInt());
+
+    TypeDescription unionWithMultipleStruct = TypeDescription.createUnion()
+        .addUnionChild(internalStruct_0)
+        .addUnionChild(internalStruct_1)
+        .addUnionChild(internalStruct_2);
+
+    OrcStruct o1 = new OrcStruct(internalStruct_0);
+    o1.setFieldValue("field0", new Text("key"));
+    o1.setFieldValue("field1", new BooleanWritable(true));
+
+    OrcStruct o2 = new OrcStruct(internalStruct_0);
+    o2.setFieldValue("field0", new Text("key_1"));
+    o2.setFieldValue("field1", new BooleanWritable(false));
+
+    OrcStruct o3 = new OrcStruct(TypeDescription.createStruct());
+
+    OrcStruct o4 = new OrcStruct(internalStruct_2);
+    o4.setFieldValue("f0", new IntWritable(1));
+
+    OrcUnion u1 = new OrcUnion(unionWithMultipleStruct);
+    u1.set(0, o1);
+    OrcUnion u2 = new OrcUnion(unionWithMultipleStruct);
+    u2.set(0, o2);
+    OrcUnion u3 = new OrcUnion(unionWithMultipleStruct);
+    u3.set(1, o3);
+    OrcUnion u4 = new OrcUnion(unionWithMultipleStruct);
+    u4.set(2, o4);
+
+    File testFolder = Files.createTempDir();
+    testFolder.deleteOnExit();
+    Path testFilePath = new Path(testFolder.getAbsolutePath(), "testFile");
+    Configuration conf = new Configuration();
+
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).setSchema(unionWithMultipleStruct)
+            .stripeSize(100000).bufferSize(10000)
+            .version(OrcFile.Version.CURRENT));
+
+    OrcMapredRecordWriter<OrcUnion> recordWriter =
+        new OrcMapredRecordWriter<>(writer);
+    recordWriter.write(NullWritable.get(), u1);
+    recordWriter.write(NullWritable.get(), u2);
+    recordWriter.write(NullWritable.get(), u3);
+    recordWriter.write(NullWritable.get(), u4);
+    recordWriter.close(null);
+
+    Reader reader = OrcFile.createReader(testFilePath, OrcFile.readerOptions(conf).filesystem(FileSystem.getLocal(conf)));
+    Reader.Options options = reader.options().schema(unionWithMultipleStruct);
+
+    OrcMapredRecordReader<OrcUnion> recordReader = new OrcMapredRecordReader<>(reader,options);
+    OrcUnion result = recordReader.createValue();
+    recordReader.next(recordReader.createKey(), result);
+    Assert.assertEquals(result, u1);
+    recordReader.next(recordReader.createKey(), result);
+    Assert.assertEquals(result, u2);
+    recordReader.next(recordReader.createKey(), result);
+    Assert.assertEquals(result, u3);
+    recordReader.next(recordReader.createKey(), result);
+    Assert.assertEquals(result, u4);
+  }
+
+  @Test
   public void testFieldAccess() {
     OrcStruct struct = new OrcStruct(TypeDescription.fromString
         ("struct<i:int,j:double,k:string>"));


[orc] 01/02: Minor fix for docker script.

Posted by om...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

omalley pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git

commit 3b968478d8e80d84a4b9f799a7457d91a71a9966
Author: Owen O'Malley <om...@apache.org>
AuthorDate: Fri Mar 27 12:51:30 2020 -0700

    Minor fix for docker script.
    
    Signed-off-by: Owen O'Malley <om...@apache.org>
---
 docker/run-all.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/run-all.sh b/docker/run-all.sh
index 8f1436b..81876f4 100755
--- a/docker/run-all.sh
+++ b/docker/run-all.sh
@@ -29,6 +29,7 @@ function failure {
     grep -h "FAILED " logs/*-test.log
     exit 1
 }
+rm -f logs/pids.txt logs/*.log
 
 start=`date`
 for os in `cat os-list.txt`; do
@@ -36,7 +37,6 @@ for os in `cat os-list.txt`; do
   ( cd $os && docker build -t "orc-$os" . ) > logs/$os-build.log 2>&1 || exit 1
 done
 testStart=`date`
-rm -f logs/pids.txt
 
 for os in `cat os-list.txt`; do
     ./run-one.sh $1 $2 $os > logs/$os-test.log 2>&1 &