You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/04/20 00:46:00 UTC
svn commit: r1470082 - in /hive/branches/branch-0.11: ./ data/files/ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ ql/src/test/queries/clientpositive/ ql/src/test/results/clientpositive/

Author: omalley
Date: Fri Apr 19 22:45:59 2013
New Revision: 1470082

URL: http://svn.apache.org/r1470082
Log:
HIVE-4189 : ORC fails with String column that ends in lots of nulls (Kevin
Wilfong)

Added:
    hive/branches/branch-0.11/data/files/nulls.txt
      - copied unchanged from r1470080, hive/trunk/data/files/nulls.txt
    hive/branches/branch-0.11/ql/src/test/queries/clientpositive/orc_ends_with_nulls.q
      - copied unchanged from r1470080, hive/trunk/ql/src/test/queries/clientpositive/orc_ends_with_nulls.q
    hive/branches/branch-0.11/ql/src/test/results/clientpositive/orc_ends_with_nulls.q.out
      - copied unchanged from r1470080, hive/trunk/ql/src/test/results/clientpositive/orc_ends_with_nulls.q.out
Modified:
    hive/branches/branch-0.11/   (props changed)
    hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java

Propchange: hive/branches/branch-0.11/
------------------------------------------------------------------------------
  Merged /hive/trunk:r1470080

Modified: hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1470082&r1=1470081&r2=1470082&view=diff
==============================================================================
--- hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/branches/branch-0.11/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Fri Apr 19 22:45:59 2013
@@ -18,8 +18,15 @@
 
 package org.apache.hadoop.hive.ql.io.orc;
 
-import com.google.protobuf.ByteString;
-import com.google.protobuf.CodedOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.sql.Timestamp;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -44,14 +51,8 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector;
 import org.apache.hadoop.io.BytesWritable;
 
-import java.io.IOException;
-import java.io.OutputStream;
-import java.nio.ByteBuffer;
-import java.sql.Timestamp;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.TreeMap;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.CodedOutputStream;
 
 /**
  * An ORC file writer. The file is divided into stripes, which is the natural
@@ -734,19 +735,8 @@ class WriterImpl implements Writer {
       int length = rows.size();
       int rowIndexEntry = 0;
       OrcProto.RowIndex.Builder rowIndex = getRowIndex();
-      // need to build the first index entry out here, to handle the case of
-      // not having any values.
-      if (buildIndex) {
-        while (0 == rowIndexValueCount.get(rowIndexEntry) &&
-            rowIndexEntry < savedRowIndex.size()) {
-          OrcProto.RowIndexEntry.Builder base =
-              savedRowIndex.get(rowIndexEntry++).toBuilder();
-          rowOutput.getPosition(new RowIndexPositionRecorder(base));
-          rowIndex.addEntry(base.build());
-        }
-      }
       // write the values translated into the dump order.
-      for(int i = 0; i < length; ++i) {
+      for(int i = 0; i <= length; ++i) {
         // now that we are writing out the row values, we can finalize the
         // row index
         if (buildIndex) {
@@ -758,7 +748,9 @@ class WriterImpl implements Writer {
             rowIndex.addEntry(base.build());
           }
         }
-        rowOutput.write(dumpOrder[rows.get(i)]);
+        if (i != length) {
+          rowOutput.write(dumpOrder[rows.get(i)]);
+        }
       }
       // we need to build the rowindex before calling super, since it
       // writes it out.