You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/07/20 20:30:47 UTC

svn commit: r1505187 - in /hive/trunk: ./ ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/ ql/src/java/org/apache/hadoop/hive/ql/io/orc/ ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/

Author: omalley
Date: Sat Jul 20 18:30:47 2013
New Revision: 1505187

URL: http://svn.apache.org/r1505187
Log:
HIVE-4724 Better detection of non-ORC files in the ORC reader (omalley)

Modified:
    hive/trunk/   (props changed)
    hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
    hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto

Propchange: hive/trunk/
------------------------------------------------------------------------------
  Merged /hive/branches/branch-0.11:r1505184

Modified: hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1505187&r1=1505186&r2=1505187&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original)
+++ hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Sat Jul 20 18:30:47 2013
@@ -9791,6 +9791,15 @@ public final class OrcProto {
     // optional uint64 compressionBlockSize = 3;
     boolean hasCompressionBlockSize();
     long getCompressionBlockSize();
+    
+    // repeated uint32 version = 4 [packed = true];
+    java.util.List<java.lang.Integer> getVersionList();
+    int getVersionCount();
+    int getVersion(int index);
+    
+    // optional string magic = 8000;
+    boolean hasMagic();
+    String getMagic();
   }
   public static final class PostScript extends
       com.google.protobuf.GeneratedMessage
@@ -9851,10 +9860,59 @@ public final class OrcProto {
       return compressionBlockSize_;
     }
     
+    // repeated uint32 version = 4 [packed = true];
+    public static final int VERSION_FIELD_NUMBER = 4;
+    private java.util.List<java.lang.Integer> version_;
+    public java.util.List<java.lang.Integer>
+        getVersionList() {
+      return version_;
+    }
+    public int getVersionCount() {
+      return version_.size();
+    }
+    public int getVersion(int index) {
+      return version_.get(index);
+    }
+    private int versionMemoizedSerializedSize = -1;
+    
+    // optional string magic = 8000;
+    public static final int MAGIC_FIELD_NUMBER = 8000;
+    private java.lang.Object magic_;
+    public boolean hasMagic() {
+      return ((bitField0_ & 0x00000008) == 0x00000008);
+    }
+    public String getMagic() {
+      java.lang.Object ref = magic_;
+      if (ref instanceof String) {
+        return (String) ref;
+      } else {
+        com.google.protobuf.ByteString bs = 
+            (com.google.protobuf.ByteString) ref;
+        String s = bs.toStringUtf8();
+        if (com.google.protobuf.Internal.isValidUtf8(bs)) {
+          magic_ = s;
+        }
+        return s;
+      }
+    }
+    private com.google.protobuf.ByteString getMagicBytes() {
+      java.lang.Object ref = magic_;
+      if (ref instanceof String) {
+        com.google.protobuf.ByteString b = 
+            com.google.protobuf.ByteString.copyFromUtf8((String) ref);
+        magic_ = b;
+        return b;
+      } else {
+        return (com.google.protobuf.ByteString) ref;
+      }
+    }
+    
     private void initFields() {
       footerLength_ = 0L;
       compression_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.CompressionKind.NONE;
       compressionBlockSize_ = 0L;
+      version_ = java.util.Collections.emptyList();;
+      magic_ = "";
     }
     private byte memoizedIsInitialized = -1;
     public final boolean isInitialized() {
@@ -9877,6 +9935,16 @@ public final class OrcProto {
       if (((bitField0_ & 0x00000004) == 0x00000004)) {
         output.writeUInt64(3, compressionBlockSize_);
       }
+      if (getVersionList().size() > 0) {
+        output.writeRawVarint32(34);
+        output.writeRawVarint32(versionMemoizedSerializedSize);
+      }
+      for (int i = 0; i < version_.size(); i++) {
+        output.writeUInt32NoTag(version_.get(i));
+      }
+      if (((bitField0_ & 0x00000008) == 0x00000008)) {
+        output.writeBytes(8000, getMagicBytes());
+      }
       getUnknownFields().writeTo(output);
     }
     
@@ -9898,6 +9966,24 @@ public final class OrcProto {
         size += com.google.protobuf.CodedOutputStream
           .computeUInt64Size(3, compressionBlockSize_);
       }
+      {
+        int dataSize = 0;
+        for (int i = 0; i < version_.size(); i++) {
+          dataSize += com.google.protobuf.CodedOutputStream
+            .computeUInt32SizeNoTag(version_.get(i));
+        }
+        size += dataSize;
+        if (!getVersionList().isEmpty()) {
+          size += 1;
+          size += com.google.protobuf.CodedOutputStream
+              .computeInt32SizeNoTag(dataSize);
+        }
+        versionMemoizedSerializedSize = dataSize;
+      }
+      if (((bitField0_ & 0x00000008) == 0x00000008)) {
+        size += com.google.protobuf.CodedOutputStream
+          .computeBytesSize(8000, getMagicBytes());
+      }
       size += getUnknownFields().getSerializedSize();
       memoizedSerializedSize = size;
       return size;
@@ -10028,6 +10114,10 @@ public final class OrcProto {
         bitField0_ = (bitField0_ & ~0x00000002);
         compressionBlockSize_ = 0L;
         bitField0_ = (bitField0_ & ~0x00000004);
+        version_ = java.util.Collections.emptyList();;
+        bitField0_ = (bitField0_ & ~0x00000008);
+        magic_ = "";
+        bitField0_ = (bitField0_ & ~0x00000010);
         return this;
       }
       
@@ -10078,6 +10168,15 @@ public final class OrcProto {
           to_bitField0_ |= 0x00000004;
         }
         result.compressionBlockSize_ = compressionBlockSize_;
+        if (((bitField0_ & 0x00000008) == 0x00000008)) {
+          version_ = java.util.Collections.unmodifiableList(version_);
+          bitField0_ = (bitField0_ & ~0x00000008);
+        }
+        result.version_ = version_;
+        if (((from_bitField0_ & 0x00000010) == 0x00000010)) {
+          to_bitField0_ |= 0x00000008;
+        }
+        result.magic_ = magic_;
         result.bitField0_ = to_bitField0_;
         onBuilt();
         return result;
@@ -10103,6 +10202,19 @@ public final class OrcProto {
         if (other.hasCompressionBlockSize()) {
           setCompressionBlockSize(other.getCompressionBlockSize());
         }
+        if (!other.version_.isEmpty()) {
+          if (version_.isEmpty()) {
+            version_ = other.version_;
+            bitField0_ = (bitField0_ & ~0x00000008);
+          } else {
+            ensureVersionIsMutable();
+            version_.addAll(other.version_);
+          }
+          onChanged();
+        }
+        if (other.hasMagic()) {
+          setMagic(other.getMagic());
+        }
         this.mergeUnknownFields(other.getUnknownFields());
         return this;
       }
@@ -10155,6 +10267,25 @@ public final class OrcProto {
               compressionBlockSize_ = input.readUInt64();
               break;
             }
+            case 32: {
+              ensureVersionIsMutable();
+              version_.add(input.readUInt32());
+              break;
+            }
+            case 34: {
+              int length = input.readRawVarint32();
+              int limit = input.pushLimit(length);
+              while (input.getBytesUntilLimit() > 0) {
+                addVersion(input.readUInt32());
+              }
+              input.popLimit(limit);
+              break;
+            }
+            case 64002: {
+              bitField0_ |= 0x00000010;
+              magic_ = input.readBytes();
+              break;
+            }
           }
         }
       }
@@ -10227,6 +10358,87 @@ public final class OrcProto {
         return this;
       }
       
+      // repeated uint32 version = 4 [packed = true];
+      private java.util.List<java.lang.Integer> version_ = java.util.Collections.emptyList();;
+      private void ensureVersionIsMutable() {
+        if (!((bitField0_ & 0x00000008) == 0x00000008)) {
+          version_ = new java.util.ArrayList<java.lang.Integer>(version_);
+          bitField0_ |= 0x00000008;
+         }
+      }
+      public java.util.List<java.lang.Integer>
+          getVersionList() {
+        return java.util.Collections.unmodifiableList(version_);
+      }
+      public int getVersionCount() {
+        return version_.size();
+      }
+      public int getVersion(int index) {
+        return version_.get(index);
+      }
+      public Builder setVersion(
+          int index, int value) {
+        ensureVersionIsMutable();
+        version_.set(index, value);
+        onChanged();
+        return this;
+      }
+      public Builder addVersion(int value) {
+        ensureVersionIsMutable();
+        version_.add(value);
+        onChanged();
+        return this;
+      }
+      public Builder addAllVersion(
+          java.lang.Iterable<? extends java.lang.Integer> values) {
+        ensureVersionIsMutable();
+        super.addAll(values, version_);
+        onChanged();
+        return this;
+      }
+      public Builder clearVersion() {
+        version_ = java.util.Collections.emptyList();;
+        bitField0_ = (bitField0_ & ~0x00000008);
+        onChanged();
+        return this;
+      }
+      
+      // optional string magic = 8000;
+      private java.lang.Object magic_ = "";
+      public boolean hasMagic() {
+        return ((bitField0_ & 0x00000010) == 0x00000010);
+      }
+      public String getMagic() {
+        java.lang.Object ref = magic_;
+        if (!(ref instanceof String)) {
+          String s = ((com.google.protobuf.ByteString) ref).toStringUtf8();
+          magic_ = s;
+          return s;
+        } else {
+          return (String) ref;
+        }
+      }
+      public Builder setMagic(String value) {
+        if (value == null) {
+    throw new NullPointerException();
+  }
+  bitField0_ |= 0x00000010;
+        magic_ = value;
+        onChanged();
+        return this;
+      }
+      public Builder clearMagic() {
+        bitField0_ = (bitField0_ & ~0x00000010);
+        magic_ = getDefaultInstance().getMagic();
+        onChanged();
+        return this;
+      }
+      void setMagic(com.google.protobuf.ByteString value) {
+        bitField0_ |= 0x00000010;
+        magic_ = value;
+        onChanged();
+      }
+      
       // @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.PostScript)
     }
     
@@ -10384,12 +10596,13 @@ public final class OrcProto {
       ".hive.ql.io.orc.UserMetadataItem\022\024\n\014numb" +
       "erOfRows\030\006 \001(\004\022F\n\nstatistics\030\007 \003(\01322.org" +
       ".apache.hadoop.hive.ql.io.orc.ColumnStat" +
-      "istics\022\026\n\016rowIndexStride\030\010 \001(\r\"\210\001\n\nPostS" +
+      "istics\022\026\n\016rowIndexStride\030\010 \001(\r\"\255\001\n\nPostS" +
       "cript\022\024\n\014footerLength\030\001 \001(\004\022F\n\013compressi" +
       "on\030\002 \001(\01621.org.apache.hadoop.hive.ql.io.",
       "orc.CompressionKind\022\034\n\024compressionBlockS" +
-      "ize\030\003 \001(\004*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010" +
-      "\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
+      "ize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\016\n\005magic\030" +
+      "\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010\n\004Z" +
+      "LIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -10521,7 +10734,7 @@ public final class OrcProto {
           internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_fieldAccessorTable = new
             com.google.protobuf.GeneratedMessage.FieldAccessorTable(
               internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_descriptor,
-              new java.lang.String[] { "FooterLength", "Compression", "CompressionBlockSize", },
+              new java.lang.String[] { "FooterLength", "Compression", "CompressionBlockSize", "Version", "Magic", },
               org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.class,
               org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder.class);
           return null;

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1505187&r1=1505186&r2=1505187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Sat Jul 20 18:30:47 2013
@@ -31,6 +31,26 @@ import java.io.IOException;
 public final class OrcFile {
 
   public static final String MAGIC = "ORC";
+
+  /**
+   * Create a version number for the ORC file format, so that we can add
+   * non-forward compatible changes in the future. To make it easier for users
+   * to understand the version numbers, we use the Hive release number that
+   * first wrote that version of ORC files.
+   *
+   * Thus, if you add new encodings or other non-forward compatible changes
+   * to ORC files, which prevent the old reader from reading the new format,
+   * you should change these variable to reflect the next Hive release number.
+   * Non-forward compatible changes should never be added in patch releases.
+   *
+   * Do not make any changes that break backwards compatibility, which would
+   * prevent the new reader from reading ORC files generated by any released
+   * version of Hive.
+   */
+  public static final int MAJOR_VERSION = 0;
+  public static final int MINOR_VERSION = 11;
+
+  // the table properties that control ORC files
   public static final String COMPRESSION = "orc.compress";
   static final String DEFAULT_COMPRESSION = "ZLIB";
   public static final String COMPRESSION_BLOCK_SIZE = "orc.compress.size";

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1505187&r1=1505186&r2=1505187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Sat Jul 20 18:30:47 2013
@@ -19,10 +19,13 @@
 package org.apache.hadoop.hive.ql.io.orc;
 
 import com.google.protobuf.CodedInputStream;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Text;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -33,6 +36,8 @@ import java.util.List;
 
 final class ReaderImpl implements Reader {
 
+  private static final Log LOG = LogFactory.getLog(ReaderImpl.class);
+
   private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
 
   private final FileSystem fileSystem;
@@ -176,6 +181,81 @@ final class ReaderImpl implements Reader
     return result;
   }
 
+  /**
+   * Ensure this is an ORC file to prevent users from trying to read text
+   * files or RC files as ORC files.
+   * @param in the file being read
+   * @param path the filename for error messages
+   * @param psLen the postscript length
+   * @param buffer the tail of the file
+   * @throws IOException
+   */
+  static void ensureOrcFooter(FSDataInputStream in,
+                                      Path path,
+                                      int psLen,
+                                      ByteBuffer buffer) throws IOException {
+    int len = OrcFile.MAGIC.length();
+    if (psLen < len + 1) {
+      throw new IOException("Malformed ORC file " + path +
+          ". Invalid postscript length " + psLen);
+    }
+    int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - 1
+        - len;
+    byte[] array = buffer.array();
+    // now look for the magic string at the end of the postscript.
+    if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) {
+      // If it isn't there, this may be the 0.11.0 version of ORC.
+      // Read the first 3 bytes of the file to check for the header
+      in.seek(0);
+      byte[] header = new byte[len];
+      in.readFully(header, 0, len);
+      // if it isn't there, this isn't an ORC file
+      if (!Text.decode(header, 0 , len).equals(OrcFile.MAGIC)) {
+        throw new IOException("Malformed ORC file " + path +
+            ". Invalid postscript.");
+      }
+    }
+  }
+
+  /**
+   * Build a version string out of an array.
+   * @param version the version number as a list
+   * @return the human readable form of the version string
+   */
+  private static String versionString(List<Integer> version) {
+    StringBuilder buffer = new StringBuilder();
+    for(int i=0; i < version.size(); ++i) {
+      if (i != 0) {
+        buffer.append('.');
+      }
+      buffer.append(version.get(i));
+    }
+    return buffer.toString();
+  }
+
+  /**
+   * Check to see if this ORC file is from a future version and if so,
+   * warn the user that we may not be able to read all of the column encodings.
+   * @param log the logger to write any error message to
+   * @param path the filename for error messages
+   * @param version the version of hive that wrote the file.
+   */
+  static void checkOrcVersion(Log log, Path path, List<Integer> version) {
+    if (version.size() >= 1) {
+      int major = version.get(0);
+      int minor = 0;
+      if (version.size() >= 2) {
+        minor = version.get(1);
+      }
+      if (major > OrcFile.MAJOR_VERSION ||
+          (major == OrcFile.MAJOR_VERSION && minor > OrcFile.MINOR_VERSION)) {
+        log.warn("ORC file " + path + " was written by a future Hive version " +
+            versionString(version) + ". This file may not be readable by " +
+            "this version of Hive.");
+      }
+    }
+  }
+
   ReaderImpl(FileSystem fs, Path path) throws IOException {
     this.fileSystem = fs;
     this.path = path;
@@ -187,10 +267,12 @@ final class ReaderImpl implements Reader
     file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(),
       buffer.remaining());
     int psLen = buffer.get(readSize - 1);
+    ensureOrcFooter(file, path, psLen, buffer);
     int psOffset = readSize - 1 - psLen;
     CodedInputStream in = CodedInputStream.newInstance(buffer.array(),
       buffer.arrayOffset() + psOffset, psLen);
     OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
+    checkOrcVersion(LOG, path, ps.getVersionList());
     int footerSize = (int) ps.getFooterLength();
     bufferSize = (int) ps.getCompressionBlockSize();
     switch (ps.getCompression()) {

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1505187&r1=1505186&r2=1505187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Sat Jul 20 18:30:47 2013
@@ -87,7 +87,7 @@ class RecordReaderImpl implements Record
     }
     firstRow = skippedRows;
     totalRowCount = rows;
-    reader = createTreeReader(0, types, included);
+    reader = createTreeReader(path, 0, types, included);
     indexes = new OrcProto.RowIndex[types.size()];
     rowIndexStride = strideRate;
     if (this.stripes.size() > 0) {
@@ -110,17 +110,27 @@ class RecordReaderImpl implements Record
   }
 
   private abstract static class TreeReader {
+    protected final Path path;
     protected final int columnId;
     private BitFieldReader present = null;
     protected boolean valuePresent = false;
 
-    TreeReader(int columnId) {
+    TreeReader(Path path, int columnId) {
+      this.path = path;
       this.columnId = columnId;
     }
 
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId + " of " + path);
+      }
+    }
+
     void startStripe(Map<StreamName, InStream> streams,
                      List<OrcProto.ColumnEncoding> encoding
                     ) throws IOException {
+      checkEncoding(encoding.get(columnId));
       InStream in = streams.get(new StreamName(columnId,
           OrcProto.Stream.Kind.PRESENT));
       if (in == null) {
@@ -169,8 +179,8 @@ class RecordReaderImpl implements Record
   private static class BooleanTreeReader extends TreeReader{
     private BitFieldReader reader = null;
 
-    BooleanTreeReader(int columnId) {
-      super(columnId);
+    BooleanTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -212,8 +222,8 @@ class RecordReaderImpl implements Record
   private static class ByteTreeReader extends TreeReader{
     private RunLengthByteReader reader = null;
 
-    ByteTreeReader(int columnId) {
-      super(columnId);
+    ByteTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -255,8 +265,8 @@ class RecordReaderImpl implements Record
   private static class ShortTreeReader extends TreeReader{
     private RunLengthIntegerReader reader = null;
 
-    ShortTreeReader(int columnId) {
-      super(columnId);
+    ShortTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -299,8 +309,8 @@ class RecordReaderImpl implements Record
   private static class IntTreeReader extends TreeReader{
     private RunLengthIntegerReader reader = null;
 
-    IntTreeReader(int columnId) {
-      super(columnId);
+    IntTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -343,8 +353,8 @@ class RecordReaderImpl implements Record
   private static class LongTreeReader extends TreeReader{
     private RunLengthIntegerReader reader = null;
 
-    LongTreeReader(int columnId) {
-      super(columnId);
+    LongTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -387,8 +397,8 @@ class RecordReaderImpl implements Record
   private static class FloatTreeReader extends TreeReader{
     private InStream stream;
 
-    FloatTreeReader(int columnId) {
-      super(columnId);
+    FloatTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -434,8 +444,8 @@ class RecordReaderImpl implements Record
   private static class DoubleTreeReader extends TreeReader{
     private InStream stream;
 
-    DoubleTreeReader(int columnId) {
-      super(columnId);
+    DoubleTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -481,8 +491,8 @@ class RecordReaderImpl implements Record
     private InStream stream;
     private RunLengthIntegerReader lengths;
 
-    BinaryTreeReader(int columnId) {
-      super(columnId);
+    BinaryTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -545,8 +555,8 @@ class RecordReaderImpl implements Record
     private RunLengthIntegerReader data;
     private RunLengthIntegerReader nanos;
 
-    TimestampTreeReader(int columnId) {
-      super(columnId);
+    TimestampTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -615,8 +625,8 @@ class RecordReaderImpl implements Record
     private InStream valueStream;
     private RunLengthIntegerReader scaleStream;
 
-    DecimalTreeReader(int columnId) {
-      super(columnId);
+    DecimalTreeReader(Path path, int columnId) {
+      super(path, columnId);
     }
 
     @Override
@@ -663,8 +673,15 @@ class RecordReaderImpl implements Record
     private int[] dictionaryOffsets;
     private RunLengthIntegerReader reader;
 
-    StringTreeReader(int columnId) {
-      super(columnId);
+    StringTreeReader(Path path, int columnId) {
+      super(path, columnId);
+    }
+
+    void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+      if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY) {
+        throw new IOException("Unknown encoding " + encoding + " in column " +
+            columnId + " of " + path);
+      }
     }
 
     @Override
@@ -755,10 +772,10 @@ class RecordReaderImpl implements Record
     private final TreeReader[] fields;
     private final String[] fieldNames;
 
-    StructTreeReader(int columnId,
+    StructTreeReader(Path path, int columnId,
                      List<OrcProto.Type> types,
                      boolean[] included) throws IOException {
-      super(columnId);
+      super(path, columnId);
       OrcProto.Type type = types.get(columnId);
       int fieldCount = type.getFieldNamesCount();
       this.fields = new TreeReader[fieldCount];
@@ -766,7 +783,7 @@ class RecordReaderImpl implements Record
       for(int i=0; i < fieldCount; ++i) {
         int subtype = type.getSubtypes(i);
         if (included == null || included[subtype]) {
-          this.fields[i] = createTreeReader(subtype, types, included);
+          this.fields[i] = createTreeReader(path, subtype, types, included);
         }
         this.fieldNames[i] = type.getFieldNames(i);
       }
@@ -831,17 +848,17 @@ class RecordReaderImpl implements Record
     private final TreeReader[] fields;
     private RunLengthByteReader tags;
 
-    UnionTreeReader(int columnId,
-                     List<OrcProto.Type> types,
-                     boolean[] included) throws IOException {
-      super(columnId);
+    UnionTreeReader(Path path, int columnId,
+                    List<OrcProto.Type> types,
+                    boolean[] included) throws IOException {
+      super(path, columnId);
       OrcProto.Type type = types.get(columnId);
       int fieldCount = type.getSubtypesCount();
       this.fields = new TreeReader[fieldCount];
       for(int i=0; i < fieldCount; ++i) {
         int subtype = type.getSubtypes(i);
         if (included == null || included[subtype]) {
-          this.fields[i] = createTreeReader(subtype, types, included);
+          this.fields[i] = createTreeReader(path, subtype, types, included);
         }
       }
     }
@@ -904,12 +921,13 @@ class RecordReaderImpl implements Record
     private final TreeReader elementReader;
     private RunLengthIntegerReader lengths;
 
-    ListTreeReader(int columnId,
-                    List<OrcProto.Type> types,
-                    boolean[] included) throws IOException {
-      super(columnId);
+    ListTreeReader(Path path, int columnId,
+                   List<OrcProto.Type> types,
+                   boolean[] included) throws IOException {
+      super(path, columnId);
       OrcProto.Type type = types.get(columnId);
-      elementReader = createTreeReader(type.getSubtypes(0), types, included);
+      elementReader = createTreeReader(path, type.getSubtypes(0), types,
+          included);
     }
 
     @Override
@@ -977,20 +995,21 @@ class RecordReaderImpl implements Record
     private final TreeReader valueReader;
     private RunLengthIntegerReader lengths;
 
-    MapTreeReader(int columnId,
-                   List<OrcProto.Type> types,
-                   boolean[] included) throws IOException {
-      super(columnId);
+    MapTreeReader(Path path,
+                  int columnId,
+                  List<OrcProto.Type> types,
+                  boolean[] included) throws IOException {
+      super(path, columnId);
       OrcProto.Type type = types.get(columnId);
       int keyColumn = type.getSubtypes(0);
       int valueColumn = type.getSubtypes(1);
       if (included == null || included[keyColumn]) {
-        keyReader = createTreeReader(keyColumn, types, included);
+        keyReader = createTreeReader(path, keyColumn, types, included);
       } else {
         keyReader = null;
       }
       if (included == null || included[valueColumn]) {
-        valueReader = createTreeReader(valueColumn, types, included);
+        valueReader = createTreeReader(path, valueColumn, types, included);
       } else {
         valueReader = null;
       }
@@ -1053,42 +1072,43 @@ class RecordReaderImpl implements Record
     }
   }
 
-  private static TreeReader createTreeReader(int columnId,
+  private static TreeReader createTreeReader(Path path,
+                                             int columnId,
                                              List<OrcProto.Type> types,
                                              boolean[] included
                                             ) throws IOException {
     OrcProto.Type type = types.get(columnId);
     switch (type.getKind()) {
       case BOOLEAN:
-        return new BooleanTreeReader(columnId);
+        return new BooleanTreeReader(path, columnId);
       case BYTE:
-        return new ByteTreeReader(columnId);
+        return new ByteTreeReader(path, columnId);
       case DOUBLE:
-        return new DoubleTreeReader(columnId);
+        return new DoubleTreeReader(path, columnId);
       case FLOAT:
-        return new FloatTreeReader(columnId);
+        return new FloatTreeReader(path, columnId);
       case SHORT:
-        return new ShortTreeReader(columnId);
+        return new ShortTreeReader(path, columnId);
       case INT:
-        return new IntTreeReader(columnId);
+        return new IntTreeReader(path, columnId);
       case LONG:
-        return new LongTreeReader(columnId);
+        return new LongTreeReader(path, columnId);
       case STRING:
-        return new StringTreeReader(columnId);
+        return new StringTreeReader(path, columnId);
       case BINARY:
-        return new BinaryTreeReader(columnId);
+        return new BinaryTreeReader(path, columnId);
       case TIMESTAMP:
-        return new TimestampTreeReader(columnId);
+        return new TimestampTreeReader(path, columnId);
       case DECIMAL:
-        return new DecimalTreeReader(columnId);
+        return new DecimalTreeReader(path, columnId);
       case STRUCT:
-        return new StructTreeReader(columnId, types, included);
+        return new StructTreeReader(path, columnId, types, included);
       case LIST:
-        return new ListTreeReader(columnId, types, included);
+        return new ListTreeReader(path, columnId, types, included);
       case MAP:
-        return new MapTreeReader(columnId, types, included);
+        return new MapTreeReader(path, columnId, types, included);
       case UNION:
-        return new UnionTreeReader(columnId, types, included);
+        return new UnionTreeReader(path, columnId, types, included);
       default:
         throw new IllegalArgumentException("Unsupported type " +
           type.getKind());

Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1505187&r1=1505186&r2=1505187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Sat Jul 20 18:30:47 2013
@@ -1484,7 +1484,10 @@ class WriterImpl implements Writer, Memo
     OrcProto.PostScript.Builder builder =
       OrcProto.PostScript.newBuilder()
         .setCompression(writeCompressionKind(compress))
-        .setFooterLength(footerLength);
+        .setFooterLength(footerLength)
+        .setMagic(OrcFile.MAGIC)
+        .addVersion(OrcFile.MAJOR_VERSION)
+        .addVersion(OrcFile.MINOR_VERSION);
     if (compress != CompressionKind.NONE) {
       builder.setCompressionBlockSize(bufferSize);
     }

Modified: hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1505187&r1=1505186&r2=1505187&view=diff
==============================================================================
--- hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original)
+++ hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Sat Jul 20 18:30:47 2013
@@ -135,4 +135,7 @@ message PostScript {
   optional uint64 footerLength = 1;
   optional CompressionKind compression = 2;
   optional uint64 compressionBlockSize = 3;
+  repeated uint32 version = 4 [packed = true];
+  // Leave this last in the record
+  optional string magic = 8000;
 }