You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2018/02/06 21:09:31 UTC

[1/4] orc git commit: ORC-285. Empty vector batches of floats or doubles get java.io.EOFException

Repository: orc
Updated Branches:
  refs/heads/branch-1.4 daefe685e -> e8c21fd38


ORC-285. Empty vector batches of floats or doubles get java.io.EOFException

Fixes #205

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/9d3434fe
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/9d3434fe
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/9d3434fe

Branch: refs/heads/branch-1.4
Commit: 9d3434fe841531483497bf9721d3fc00e958fc8a
Parents: daefe68
Author: Owen O'Malley <om...@apache.org>
Authored: Wed Dec 27 09:13:50 2017 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Tue Jan 23 15:13:55 2018 -0800

----------------------------------------------------------------------
 .../org/apache/orc/impl/TreeReaderFactory.java  | 125 ++++++++++---------
 .../test/org/apache/orc/TestVectorOrcFile.java  |  35 ++++++
 2 files changed, 99 insertions(+), 61 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/9d3434fe/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
index 4b369af..9649be9 100644
--- a/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
+++ b/java/core/src/java/org/apache/orc/impl/TreeReaderFactory.java
@@ -640,40 +640,42 @@ public class TreeReaderFactory {
       final boolean hasNulls = !result.noNulls;
       boolean allNulls = hasNulls;
 
-      if (hasNulls) {
-        // conditions to ensure bounds checks skips
-        for (int i = 0; batchSize <= result.isNull.length && i < batchSize; i++) {
-          allNulls = allNulls & result.isNull[i];
-        }
-        if (allNulls) {
-          result.vector[0] = Double.NaN;
-          result.isRepeating = true;
-        } else {
-          // some nulls
-          result.isRepeating = false;
+      if (batchSize > 0) {
+        if (hasNulls) {
           // conditions to ensure bounds checks skips
-          for (int i = 0; batchSize <= result.isNull.length
-              && batchSize <= result.vector.length && i < batchSize; i++) {
-            if (!result.isNull[i]) {
-              result.vector[i] = utils.readFloat(stream);
-            } else {
-              // If the value is not present then set NaN
-              result.vector[i] = Double.NaN;
+          for (int i = 0; batchSize <= result.isNull.length && i < batchSize; i++) {
+            allNulls = allNulls & result.isNull[i];
+          }
+          if (allNulls) {
+            result.vector[0] = Double.NaN;
+            result.isRepeating = true;
+          } else {
+            // some nulls
+            result.isRepeating = false;
+            // conditions to ensure bounds checks skips
+            for (int i = 0; batchSize <= result.isNull.length
+                && batchSize <= result.vector.length && i < batchSize; i++) {
+              if (!result.isNull[i]) {
+                result.vector[i] = utils.readFloat(stream);
+              } else {
+                // If the value is not present then set NaN
+                result.vector[i] = Double.NaN;
+              }
             }
           }
+        } else {
+          // no nulls & > 1 row (check repeating)
+          boolean repeating = (batchSize > 1);
+          final float f1 = utils.readFloat(stream);
+          result.vector[0] = f1;
+          // conditions to ensure bounds checks skips
+          for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) {
+            final float f2 = utils.readFloat(stream);
+            repeating = repeating && (f1 == f2);
+            result.vector[i] = f2;
+          }
+          result.isRepeating = repeating;
         }
-      } else {
-        // no nulls & > 1 row (check repeating)
-        boolean repeating = (batchSize > 1);
-        final float f1 = utils.readFloat(stream);
-        result.vector[0] = f1;
-        // conditions to ensure bounds checks skips
-        for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) {
-          final float f2 = utils.readFloat(stream);
-          repeating = repeating && (f1 == f2);
-          result.vector[i] = f2;
-        }
-        result.isRepeating = repeating;
       }
     }
 
@@ -733,41 +735,42 @@ public class TreeReaderFactory {
 
       final boolean hasNulls = !result.noNulls;
       boolean allNulls = hasNulls;
-
-      if (hasNulls) {
-        // conditions to ensure bounds checks skips
-        for (int i = 0; i < batchSize && batchSize <= result.isNull.length; i++) {
-          allNulls = allNulls & result.isNull[i];
-        }
-        if (allNulls) {
-          result.vector[0] = Double.NaN;
-          result.isRepeating = true;
-        } else {
-          // some nulls
-          result.isRepeating = false;
+      if (batchSize != 0) {
+        if (hasNulls) {
           // conditions to ensure bounds checks skips
-          for (int i = 0; batchSize <= result.isNull.length
-              && batchSize <= result.vector.length && i < batchSize; i++) {
-            if (!result.isNull[i]) {
-              result.vector[i] = utils.readDouble(stream);
-            } else {
-              // If the value is not present then set NaN
-              result.vector[i] = Double.NaN;
+          for (int i = 0; i < batchSize && batchSize <= result.isNull.length; i++) {
+            allNulls = allNulls & result.isNull[i];
+          }
+          if (allNulls) {
+            result.vector[0] = Double.NaN;
+            result.isRepeating = true;
+          } else {
+            // some nulls
+            result.isRepeating = false;
+            // conditions to ensure bounds checks skips
+            for (int i = 0; batchSize <= result.isNull.length
+                && batchSize <= result.vector.length && i < batchSize; i++) {
+              if (!result.isNull[i]) {
+                result.vector[i] = utils.readDouble(stream);
+              } else {
+                // If the value is not present then set NaN
+                result.vector[i] = Double.NaN;
+              }
             }
           }
+        } else {
+          // no nulls
+          boolean repeating = (batchSize > 1);
+          final double d1 = utils.readDouble(stream);
+          result.vector[0] = d1;
+          // conditions to ensure bounds checks skips
+          for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) {
+            final double d2 = utils.readDouble(stream);
+            repeating = repeating && (d1 == d2);
+            result.vector[i] = d2;
+          }
+          result.isRepeating = repeating;
         }
-      } else {
-        // no nulls
-        boolean repeating = (batchSize > 1);
-        final double d1 = utils.readDouble(stream);
-        result.vector[0] = d1;
-        // conditions to ensure bounds checks skips
-        for (int i = 1; i < batchSize && batchSize <= result.vector.length; i++) {
-          final double d2 = utils.readDouble(stream);
-          repeating = repeating && (d1 == d2);
-          result.vector[i] = d2;
-        }
-        result.isRepeating = repeating;
       }
     }
 

http://git-wip-us.apache.org/repos/asf/orc/blob/9d3434fe/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index bb4e3a9..4ca4a40 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -3248,4 +3248,39 @@ public class TestVectorOrcFile {
       assertEquals(OrcProto.CompressionKind.NONE, ps.getCompression());
     }
   }
+
+  @Test
+  public void testEmptyDoubleStream() throws Exception {
+    TypeDescription schema =
+        TypeDescription.fromString("struct<list1:array<double>," +
+            "list2:array<float>>");
+    Writer writer = OrcFile.createWriter(testFilePath,
+        OrcFile.writerOptions(conf).setSchema(schema));
+    VectorizedRowBatch batch = schema.createRowBatch();
+    batch.size = 2;
+    ListColumnVector list1 = (ListColumnVector) batch.cols[0];
+    ListColumnVector list2 = (ListColumnVector) batch.cols[1];
+    for(int r=0; r < batch.size; ++r) {
+      list1.offsets[r] = 0;
+      list1.lengths[r] = 0;
+      list2.offsets[r] = 0;
+      list2.lengths[r] = 0;
+    }
+    writer.addRowBatch(batch);
+    writer.close();
+    Reader reader = OrcFile.createReader(testFilePath,
+        OrcFile.readerOptions(conf));
+    RecordReader rows = reader.rows();
+    batch = reader.getSchema().createRowBatch();
+    assertTrue(rows.nextBatch(batch));
+    assertEquals(2, batch.size);
+    list1 = (ListColumnVector) batch.cols[0];
+    list2 = (ListColumnVector) batch.cols[1];
+    for(int r=0; r < batch.size; ++r) {
+      assertEquals(0, list1.lengths[r]);
+      assertEquals(0, list2.lengths[r]);
+    }
+    assertFalse(rows.nextBatch(batch));
+    rows.close();
+  }
 }


[3/4] orc git commit: ORC-281. Take only the travis-ci changes to use the clang 4.0 compiler.

Posted by om...@apache.org.
ORC-281. Take only the travis-ci changes to use the clang 4.0 compiler.


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/7cb5001b
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/7cb5001b
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/7cb5001b

Branch: refs/heads/branch-1.4
Commit: 7cb5001b314bdc02d4e9247b6d5a2c1a5dc65933
Parents: b5a30bb
Author: Owen O'Malley <om...@apache.org>
Authored: Tue Feb 6 12:52:10 2018 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Tue Feb 6 12:52:10 2018 -0800

----------------------------------------------------------------------
 .travis.yml | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/7cb5001b/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 36570b7..77d48a7 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,17 +9,33 @@ matrix:
     os: linux
   - compiler: clang
     os: linux
+    addons:
+      apt:
+         sources:
+           - llvm-toolchain-trusty-4.0
+         packages:
+           - clang-4.0
+    before_script:
+      - export CC=clang-4.0
+      - export CXX=clang++-4.0
   - compiler: clang
     os: osx
     osx_image: xcode6.4
+  - compiler: clang
+    os: osx
+    osx_image: xcode9.2
+    script:
+    - mkdir build
+    - cd build
+    - cmake -DOPENSSL_ROOT_DIR=`brew --prefix openssl` ..
+    - make package test-out
 
 jdk:
   - openjdk7
-before_script:
 env:
   - MAVEN_OPTS=-Xmx2g MAVEN_SKIP_RC=true
 script:
   - mkdir build
   - cd build
-  - cmake ..
-  - make package test-out
+  - cmake -DANALYZE_JAVA=ON ..
+  - make package test-out
\ No newline at end of file


[2/4] orc git commit: ORC-296 : work around HADOOP-15171; also fix stream contract (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan)

Posted by om...@apache.org.
ORC-296 : work around HADOOP-15171; also fix stream contract (Sergey Shelukhin, reviewed by Gopal Vijayaraghavan)


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/b5a30bbb
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/b5a30bbb
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/b5a30bbb

Branch: refs/heads/branch-1.4
Commit: b5a30bbbd443d537e5c48427a0e042f82fbc26e0
Parents: 9d3434f
Author: sergey <se...@apache.org>
Authored: Fri Feb 2 16:23:28 2018 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Tue Feb 6 12:48:33 2018 -0800

----------------------------------------------------------------------
 .../org/apache/orc/impl/HadoopShimsCurrent.java | 12 ++++++++
 .../src/java/org/apache/orc/impl/InStream.java  | 31 ++++++++++----------
 2 files changed, 28 insertions(+), 15 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/b5a30bbb/java/core/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/HadoopShimsCurrent.java b/java/core/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
index 9f40272..3d4875c 100644
--- a/java/core/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
+++ b/java/core/src/java/org/apache/orc/impl/HadoopShimsCurrent.java
@@ -36,12 +36,18 @@ public class HadoopShimsCurrent implements HadoopShims {
 
   private static class SnappyDirectDecompressWrapper implements DirectDecompressor {
     private final SnappyDirectDecompressor root;
+    private boolean isFirstCall = true;
 
     SnappyDirectDecompressWrapper(SnappyDirectDecompressor root) {
       this.root = root;
     }
 
     public void decompress(ByteBuffer input, ByteBuffer output) throws IOException {
+      if (!isFirstCall) {
+        root.reset();
+      } else {
+        isFirstCall = false;
+      }
       root.decompress(input, output);
     }
 
@@ -58,12 +64,18 @@ public class HadoopShimsCurrent implements HadoopShims {
 
   private static class ZlibDirectDecompressWrapper implements DirectDecompressor {
     private final ZlibDecompressor.ZlibDirectDecompressor root;
+    private boolean isFirstCall = true;
 
     ZlibDirectDecompressWrapper(ZlibDecompressor.ZlibDirectDecompressor root) {
       this.root = root;
     }
 
     public void decompress(ByteBuffer input, ByteBuffer output) throws IOException {
+      if (!isFirstCall) {
+        root.reset();
+      } else {
+        isFirstCall = false;
+      }
       root.decompress(input, output);
     }
 

http://git-wip-us.apache.org/repos/asf/orc/blob/b5a30bbb/java/core/src/java/org/apache/orc/impl/InStream.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/InStream.java b/java/core/src/java/org/apache/orc/impl/InStream.java
index c85aee5..94e9232 100644
--- a/java/core/src/java/org/apache/orc/impl/InStream.java
+++ b/java/core/src/java/org/apache/orc/impl/InStream.java
@@ -239,36 +239,37 @@ public abstract class InStream extends InputStream {
 
     @Override
     public int read() throws IOException {
-      if (uncompressed == null || uncompressed.remaining() == 0) {
-        if (currentOffset == length) {
-          return -1;
-        }
-        readHeader();
+      if (!ensureUncompressed()) {
+        return -1;
       }
       return 0xff & uncompressed.get();
     }
 
     @Override
     public int read(byte[] data, int offset, int length) throws IOException {
-      if (uncompressed == null || uncompressed.remaining() == 0) {
-        if (currentOffset == this.length) {
-          return -1;
-        }
-        readHeader();
+      if (!ensureUncompressed()) {
+        return -1;
       }
       int actualLength = Math.min(length, uncompressed.remaining());
       uncompressed.get(data, offset, actualLength);
       return actualLength;
     }
 
-    @Override
-    public int available() throws IOException {
-      if (uncompressed == null || uncompressed.remaining() == 0) {
-        if (currentOffset == length) {
-          return 0;
+    private boolean ensureUncompressed() throws IOException {
+      while (uncompressed == null || uncompressed.remaining() == 0) {
+        if (currentOffset == this.length) {
+          return false;
         }
         readHeader();
       }
+      return true;
+    }
+
+    @Override
+    public int available() throws IOException {
+      if (!ensureUncompressed()) {
+        return 0;
+      }
       return uncompressed.remaining();
     }
 


[4/4] orc git commit: Add cache for $HOME/.m2 to travis.

Posted by om...@apache.org.
Add cache for $HOME/.m2 to travis.

Signed-off-by: Owen O'Malley <om...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/e8c21fd3
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/e8c21fd3
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/e8c21fd3

Branch: refs/heads/branch-1.4
Commit: e8c21fd3814e8c3f2ff672f7d5bab6f0870417a4
Parents: 7cb5001
Author: Owen O'Malley <om...@apache.org>
Authored: Sat Jan 6 07:58:18 2018 -0800
Committer: Owen O'Malley <om...@apache.org>
Committed: Tue Feb 6 12:55:07 2018 -0800

----------------------------------------------------------------------
 .travis.yml | 6 ++++++
 1 file changed, 6 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/orc/blob/e8c21fd3/.travis.yml
----------------------------------------------------------------------
diff --git a/.travis.yml b/.travis.yml
index 77d48a7..73537de 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -32,8 +32,14 @@ matrix:
 
 jdk:
   - openjdk7
+
 env:
   - MAVEN_OPTS=-Xmx2g MAVEN_SKIP_RC=true
+
+cache:
+  directories:
+  - $HOME/.m2
+
 script:
   - mkdir build
   - cd build