You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2016/12/06 23:23:27 UTC
orc git commit: ORC-117 ConvertTreeReaderFactory gets an
ArrayIndexOutOfBoundsException when columns are added and types changed.
Repository: orc
Updated Branches:
refs/heads/master 04f479c98 -> ebae3044e
ORC-117 ConvertTreeReaderFactory gets an
ArrayIndexOutOfBoundsException when columns are added and types changed.
Fixes #70
Signed-off-by: Owen O'Malley <om...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/ebae3044
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/ebae3044
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/ebae3044
Branch: refs/heads/master
Commit: ebae3044e90570026139a1c2437a0db3eb53c1fb
Parents: 04f479c
Author: Owen O'Malley <om...@apache.org>
Authored: Tue Dec 6 12:43:27 2016 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Tue Dec 6 15:22:37 2016 -0800
----------------------------------------------------------------------
.../orc/impl/ConvertTreeReaderFactory.java | 4 +-
.../apache/orc/impl/TestSchemaEvolution.java | 77 ++++++++++++++++++++
2 files changed, 79 insertions(+), 2 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/ebae3044/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
index ee9e68e..aabd54a 100644
--- a/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java
@@ -2693,8 +2693,8 @@ public class ConvertTreeReaderFactory extends TreeReaderFactory {
Context context) throws IOException {
final SchemaEvolution evolution = context.getSchemaEvolution();
- int columnId = readerType.getId();
- TypeDescription fileType = evolution.getFileType(readerType);
+ TypeDescription fileType = evolution.getFileType(readerType.getId());
+ int columnId = fileType.getId();
switch (fileType.getCategory()) {
http://git-wip-us.apache.org/repos/asf/orc/blob/ebae3044/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
index f654fbe..c542711 100644
--- a/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
+++ b/java/core/src/test/org/apache/orc/impl/TestSchemaEvolution.java
@@ -24,14 +24,23 @@ import static org.junit.Assert.assertTrue;
import java.io.File;
import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.io.DiskRange;
+import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector;
+import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.orc.OrcFile;
+import org.apache.orc.OrcProto;
import org.apache.orc.Reader;
import org.apache.orc.RecordReader;
import org.apache.orc.TypeDescription;
@@ -996,4 +1005,72 @@ public class TestSchemaEvolution {
assertTrue(fileInclude[2]);
assertFalse(fileInclude[3]);
}
+
+ static void createStream(Map<StreamName, InStream> streams,
+ int id,
+ OrcProto.Stream.Kind kind,
+ int... values) throws IOException {
+ StreamName name = new StreamName(id, kind);
+ List<DiskRange> ranges = new ArrayList<>();
+ byte[] buffer = new byte[values.length];
+ for(int i=0; i < values.length; ++i) {
+ buffer[i] = (byte) values[i];
+ }
+ ranges.add(new BufferChunk(ByteBuffer.wrap(buffer), 0));
+ streams.put(name, InStream.create(name.toString(), ranges, values.length, null,
+ values.length));
+ }
+
+ @Test
+ public void testTypeConversion() throws IOException {
+ TypeDescription fileType = TypeDescription.fromString("struct<x:int,y:string>");
+ TypeDescription readType = TypeDescription.fromString("struct<z:int,y:string,x:bigint>");
+ SchemaEvolution evo = new SchemaEvolution(fileType, readType, options);
+
+ // check to make sure the fields are mapped correctly
+ assertEquals(null, evo.getFileType(1));
+ assertEquals(2, evo.getFileType(2).getId());
+ assertEquals(1, evo.getFileType(3).getId());
+
+ TreeReaderFactory.Context treeContext =
+ new TreeReaderFactory.ReaderContext().setSchemaEvolution(evo);
+ TreeReaderFactory.TreeReader reader =
+ TreeReaderFactory.createTreeReader(readType, treeContext);
+
+ // check to make sure the tree reader is built right
+ assertEquals(TreeReaderFactory.StructTreeReader.class, reader.getClass());
+ TreeReaderFactory.TreeReader[] children =
+ ((TreeReaderFactory.StructTreeReader) reader).getChildReaders();
+ assertEquals(3, children.length);
+ assertEquals(TreeReaderFactory.NullTreeReader.class, children[0].getClass());
+ assertEquals(TreeReaderFactory.StringTreeReader.class, children[1].getClass());
+ assertEquals(ConvertTreeReaderFactory.AnyIntegerFromAnyIntegerTreeReader.class,
+ children[2].getClass());
+
+ // check to make sure the data is read correctly
+ OrcProto.StripeFooter.Builder footer = OrcProto.StripeFooter.newBuilder();
+ OrcProto.ColumnEncoding DIRECT =
+ OrcProto.ColumnEncoding.newBuilder()
+ .setKind(OrcProto.ColumnEncoding.Kind.DIRECT).build();
+ footer.addColumns(DIRECT);
+ footer.addColumns(DIRECT);
+ footer.addColumns(DIRECT);
+ Map<StreamName, InStream> streams = new HashMap<>();
+ createStream(streams, 1, OrcProto.Stream.Kind.DATA, 7, 1, 0);
+ createStream(streams, 2, OrcProto.Stream.Kind.DATA,
+ 65, 66, 67, 68, 69, 70, 71, 72, 73, 74);
+ createStream(streams, 2, OrcProto.Stream.Kind.LENGTH, 7, 0, 1);
+ reader.startStripe(streams, footer.build());
+ VectorizedRowBatch batch = readType.createRowBatch();
+ reader.nextBatch(batch, 10);
+ final String EXPECTED = "ABCDEFGHIJ";
+ assertEquals(true, batch.cols[0].isRepeating);
+ assertEquals(true, batch.cols[0].isNull[0]);
+ for(int r=0; r < 10; ++r) {
+ assertEquals("col1." + r, EXPECTED.substring(r, r+1),
+ ((BytesColumnVector) batch.cols[1]).toString(r));
+ assertEquals("col2." + r, r,
+ ((LongColumnVector) batch.cols[2]).vector[r]);
+ }
+ }
}