You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by om...@apache.org on 2013/07/20 20:30:47 UTC
svn commit: r1505187 - in /hive/trunk: ./
ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/
ql/src/java/org/apache/hadoop/hive/ql/io/orc/
ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/
Author: omalley
Date: Sat Jul 20 18:30:47 2013
New Revision: 1505187
URL: http://svn.apache.org/r1505187
Log:
HIVE-4724 Better detection of non-ORC files in the ORC reader (omalley)
Modified:
hive/trunk/ (props changed)
hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
Propchange: hive/trunk/
------------------------------------------------------------------------------
Merged /hive/branches/branch-0.11:r1505184
Modified: hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java?rev=1505187&r1=1505186&r2=1505187&view=diff
==============================================================================
--- hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java (original)
+++ hive/trunk/ql/src/gen/protobuf/gen-java/org/apache/hadoop/hive/ql/io/orc/OrcProto.java Sat Jul 20 18:30:47 2013
@@ -9791,6 +9791,15 @@ public final class OrcProto {
// optional uint64 compressionBlockSize = 3;
boolean hasCompressionBlockSize();
long getCompressionBlockSize();
+
+ // repeated uint32 version = 4 [packed = true];
+ java.util.List<java.lang.Integer> getVersionList();
+ int getVersionCount();
+ int getVersion(int index);
+
+ // optional string magic = 8000;
+ boolean hasMagic();
+ String getMagic();
}
public static final class PostScript extends
com.google.protobuf.GeneratedMessage
@@ -9851,10 +9860,59 @@ public final class OrcProto {
return compressionBlockSize_;
}
+ // repeated uint32 version = 4 [packed = true];
+ public static final int VERSION_FIELD_NUMBER = 4;
+ private java.util.List<java.lang.Integer> version_;
+ public java.util.List<java.lang.Integer>
+ getVersionList() {
+ return version_;
+ }
+ public int getVersionCount() {
+ return version_.size();
+ }
+ public int getVersion(int index) {
+ return version_.get(index);
+ }
+ private int versionMemoizedSerializedSize = -1;
+
+ // optional string magic = 8000;
+ public static final int MAGIC_FIELD_NUMBER = 8000;
+ private java.lang.Object magic_;
+ public boolean hasMagic() {
+ return ((bitField0_ & 0x00000008) == 0x00000008);
+ }
+ public String getMagic() {
+ java.lang.Object ref = magic_;
+ if (ref instanceof String) {
+ return (String) ref;
+ } else {
+ com.google.protobuf.ByteString bs =
+ (com.google.protobuf.ByteString) ref;
+ String s = bs.toStringUtf8();
+ if (com.google.protobuf.Internal.isValidUtf8(bs)) {
+ magic_ = s;
+ }
+ return s;
+ }
+ }
+ private com.google.protobuf.ByteString getMagicBytes() {
+ java.lang.Object ref = magic_;
+ if (ref instanceof String) {
+ com.google.protobuf.ByteString b =
+ com.google.protobuf.ByteString.copyFromUtf8((String) ref);
+ magic_ = b;
+ return b;
+ } else {
+ return (com.google.protobuf.ByteString) ref;
+ }
+ }
+
private void initFields() {
footerLength_ = 0L;
compression_ = org.apache.hadoop.hive.ql.io.orc.OrcProto.CompressionKind.NONE;
compressionBlockSize_ = 0L;
+ version_ = java.util.Collections.emptyList();;
+ magic_ = "";
}
private byte memoizedIsInitialized = -1;
public final boolean isInitialized() {
@@ -9877,6 +9935,16 @@ public final class OrcProto {
if (((bitField0_ & 0x00000004) == 0x00000004)) {
output.writeUInt64(3, compressionBlockSize_);
}
+ if (getVersionList().size() > 0) {
+ output.writeRawVarint32(34);
+ output.writeRawVarint32(versionMemoizedSerializedSize);
+ }
+ for (int i = 0; i < version_.size(); i++) {
+ output.writeUInt32NoTag(version_.get(i));
+ }
+ if (((bitField0_ & 0x00000008) == 0x00000008)) {
+ output.writeBytes(8000, getMagicBytes());
+ }
getUnknownFields().writeTo(output);
}
@@ -9898,6 +9966,24 @@ public final class OrcProto {
size += com.google.protobuf.CodedOutputStream
.computeUInt64Size(3, compressionBlockSize_);
}
+ {
+ int dataSize = 0;
+ for (int i = 0; i < version_.size(); i++) {
+ dataSize += com.google.protobuf.CodedOutputStream
+ .computeUInt32SizeNoTag(version_.get(i));
+ }
+ size += dataSize;
+ if (!getVersionList().isEmpty()) {
+ size += 1;
+ size += com.google.protobuf.CodedOutputStream
+ .computeInt32SizeNoTag(dataSize);
+ }
+ versionMemoizedSerializedSize = dataSize;
+ }
+ if (((bitField0_ & 0x00000008) == 0x00000008)) {
+ size += com.google.protobuf.CodedOutputStream
+ .computeBytesSize(8000, getMagicBytes());
+ }
size += getUnknownFields().getSerializedSize();
memoizedSerializedSize = size;
return size;
@@ -10028,6 +10114,10 @@ public final class OrcProto {
bitField0_ = (bitField0_ & ~0x00000002);
compressionBlockSize_ = 0L;
bitField0_ = (bitField0_ & ~0x00000004);
+ version_ = java.util.Collections.emptyList();;
+ bitField0_ = (bitField0_ & ~0x00000008);
+ magic_ = "";
+ bitField0_ = (bitField0_ & ~0x00000010);
return this;
}
@@ -10078,6 +10168,15 @@ public final class OrcProto {
to_bitField0_ |= 0x00000004;
}
result.compressionBlockSize_ = compressionBlockSize_;
+ if (((bitField0_ & 0x00000008) == 0x00000008)) {
+ version_ = java.util.Collections.unmodifiableList(version_);
+ bitField0_ = (bitField0_ & ~0x00000008);
+ }
+ result.version_ = version_;
+ if (((from_bitField0_ & 0x00000010) == 0x00000010)) {
+ to_bitField0_ |= 0x00000008;
+ }
+ result.magic_ = magic_;
result.bitField0_ = to_bitField0_;
onBuilt();
return result;
@@ -10103,6 +10202,19 @@ public final class OrcProto {
if (other.hasCompressionBlockSize()) {
setCompressionBlockSize(other.getCompressionBlockSize());
}
+ if (!other.version_.isEmpty()) {
+ if (version_.isEmpty()) {
+ version_ = other.version_;
+ bitField0_ = (bitField0_ & ~0x00000008);
+ } else {
+ ensureVersionIsMutable();
+ version_.addAll(other.version_);
+ }
+ onChanged();
+ }
+ if (other.hasMagic()) {
+ setMagic(other.getMagic());
+ }
this.mergeUnknownFields(other.getUnknownFields());
return this;
}
@@ -10155,6 +10267,25 @@ public final class OrcProto {
compressionBlockSize_ = input.readUInt64();
break;
}
+ case 32: {
+ ensureVersionIsMutable();
+ version_.add(input.readUInt32());
+ break;
+ }
+ case 34: {
+ int length = input.readRawVarint32();
+ int limit = input.pushLimit(length);
+ while (input.getBytesUntilLimit() > 0) {
+ addVersion(input.readUInt32());
+ }
+ input.popLimit(limit);
+ break;
+ }
+ case 64002: {
+ bitField0_ |= 0x00000010;
+ magic_ = input.readBytes();
+ break;
+ }
}
}
}
@@ -10227,6 +10358,87 @@ public final class OrcProto {
return this;
}
+ // repeated uint32 version = 4 [packed = true];
+ private java.util.List<java.lang.Integer> version_ = java.util.Collections.emptyList();;
+ private void ensureVersionIsMutable() {
+ if (!((bitField0_ & 0x00000008) == 0x00000008)) {
+ version_ = new java.util.ArrayList<java.lang.Integer>(version_);
+ bitField0_ |= 0x00000008;
+ }
+ }
+ public java.util.List<java.lang.Integer>
+ getVersionList() {
+ return java.util.Collections.unmodifiableList(version_);
+ }
+ public int getVersionCount() {
+ return version_.size();
+ }
+ public int getVersion(int index) {
+ return version_.get(index);
+ }
+ public Builder setVersion(
+ int index, int value) {
+ ensureVersionIsMutable();
+ version_.set(index, value);
+ onChanged();
+ return this;
+ }
+ public Builder addVersion(int value) {
+ ensureVersionIsMutable();
+ version_.add(value);
+ onChanged();
+ return this;
+ }
+ public Builder addAllVersion(
+ java.lang.Iterable<? extends java.lang.Integer> values) {
+ ensureVersionIsMutable();
+ super.addAll(values, version_);
+ onChanged();
+ return this;
+ }
+ public Builder clearVersion() {
+ version_ = java.util.Collections.emptyList();;
+ bitField0_ = (bitField0_ & ~0x00000008);
+ onChanged();
+ return this;
+ }
+
+ // optional string magic = 8000;
+ private java.lang.Object magic_ = "";
+ public boolean hasMagic() {
+ return ((bitField0_ & 0x00000010) == 0x00000010);
+ }
+ public String getMagic() {
+ java.lang.Object ref = magic_;
+ if (!(ref instanceof String)) {
+ String s = ((com.google.protobuf.ByteString) ref).toStringUtf8();
+ magic_ = s;
+ return s;
+ } else {
+ return (String) ref;
+ }
+ }
+ public Builder setMagic(String value) {
+ if (value == null) {
+ throw new NullPointerException();
+ }
+ bitField0_ |= 0x00000010;
+ magic_ = value;
+ onChanged();
+ return this;
+ }
+ public Builder clearMagic() {
+ bitField0_ = (bitField0_ & ~0x00000010);
+ magic_ = getDefaultInstance().getMagic();
+ onChanged();
+ return this;
+ }
+ void setMagic(com.google.protobuf.ByteString value) {
+ bitField0_ |= 0x00000010;
+ magic_ = value;
+ onChanged();
+ }
+
// @@protoc_insertion_point(builder_scope:org.apache.hadoop.hive.ql.io.orc.PostScript)
}
@@ -10384,12 +10596,13 @@ public final class OrcProto {
".hive.ql.io.orc.UserMetadataItem\022\024\n\014numb" +
"erOfRows\030\006 \001(\004\022F\n\nstatistics\030\007 \003(\01322.org" +
".apache.hadoop.hive.ql.io.orc.ColumnStat" +
- "istics\022\026\n\016rowIndexStride\030\010 \001(\r\"\210\001\n\nPostS" +
+ "istics\022\026\n\016rowIndexStride\030\010 \001(\r\"\255\001\n\nPostS" +
"cript\022\024\n\014footerLength\030\001 \001(\004\022F\n\013compressi" +
"on\030\002 \001(\01621.org.apache.hadoop.hive.ql.io.",
"orc.CompressionKind\022\034\n\024compressionBlockS" +
- "ize\030\003 \001(\004*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010" +
- "\n\004ZLIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
+ "ize\030\003 \001(\004\022\023\n\007version\030\004 \003(\rB\002\020\001\022\016\n\005magic\030" +
+ "\300> \001(\t*:\n\017CompressionKind\022\010\n\004NONE\020\000\022\010\n\004Z" +
+ "LIB\020\001\022\n\n\006SNAPPY\020\002\022\007\n\003LZO\020\003"
};
com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {
@@ -10521,7 +10734,7 @@ public final class OrcProto {
internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_fieldAccessorTable = new
com.google.protobuf.GeneratedMessage.FieldAccessorTable(
internal_static_org_apache_hadoop_hive_ql_io_orc_PostScript_descriptor,
- new java.lang.String[] { "FooterLength", "Compression", "CompressionBlockSize", },
+ new java.lang.String[] { "FooterLength", "Compression", "CompressionBlockSize", "Version", "Magic", },
org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.class,
org.apache.hadoop.hive.ql.io.orc.OrcProto.PostScript.Builder.class);
return null;
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java?rev=1505187&r1=1505186&r2=1505187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/OrcFile.java Sat Jul 20 18:30:47 2013
@@ -31,6 +31,26 @@ import java.io.IOException;
public final class OrcFile {
public static final String MAGIC = "ORC";
+
+ /**
+ * Create a version number for the ORC file format, so that we can add
+ * non-forward compatible changes in the future. To make it easier for users
+ * to understand the version numbers, we use the Hive release number that
+ * first wrote that version of ORC files.
+ *
+ * Thus, if you add new encodings or other non-forward compatible changes
+ * to ORC files, which prevent the old reader from reading the new format,
+ * you should change these variable to reflect the next Hive release number.
+ * Non-forward compatible changes should never be added in patch releases.
+ *
+ * Do not make any changes that break backwards compatibility, which would
+ * prevent the new reader from reading ORC files generated by any released
+ * version of Hive.
+ */
+ public static final int MAJOR_VERSION = 0;
+ public static final int MINOR_VERSION = 11;
+
+ // the table properties that control ORC files
public static final String COMPRESSION = "orc.compress";
static final String DEFAULT_COMPRESSION = "ZLIB";
public static final String COMPRESSION_BLOCK_SIZE = "orc.compress.size";
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1505187&r1=1505186&r2=1505187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Sat Jul 20 18:30:47 2013
@@ -19,10 +19,13 @@
package org.apache.hadoop.hive.ql.io.orc;
import com.google.protobuf.CodedInputStream;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Text;
import java.io.IOException;
import java.io.InputStream;
@@ -33,6 +36,8 @@ import java.util.List;
final class ReaderImpl implements Reader {
+ private static final Log LOG = LogFactory.getLog(ReaderImpl.class);
+
private static final int DIRECTORY_SIZE_GUESS = 16 * 1024;
private final FileSystem fileSystem;
@@ -176,6 +181,81 @@ final class ReaderImpl implements Reader
return result;
}
+ /**
+ * Ensure this is an ORC file to prevent users from trying to read text
+ * files or RC files as ORC files.
+ * @param in the file being read
+ * @param path the filename for error messages
+ * @param psLen the postscript length
+ * @param buffer the tail of the file
+ * @throws IOException
+ */
+ static void ensureOrcFooter(FSDataInputStream in,
+ Path path,
+ int psLen,
+ ByteBuffer buffer) throws IOException {
+ int len = OrcFile.MAGIC.length();
+ if (psLen < len + 1) {
+ throw new IOException("Malformed ORC file " + path +
+ ". Invalid postscript length " + psLen);
+ }
+ int offset = buffer.arrayOffset() + buffer.position() + buffer.limit() - 1
+ - len;
+ byte[] array = buffer.array();
+ // now look for the magic string at the end of the postscript.
+ if (!Text.decode(array, offset, len).equals(OrcFile.MAGIC)) {
+ // If it isn't there, this may be the 0.11.0 version of ORC.
+ // Read the first 3 bytes of the file to check for the header
+ in.seek(0);
+ byte[] header = new byte[len];
+ in.readFully(header, 0, len);
+ // if it isn't there, this isn't an ORC file
+ if (!Text.decode(header, 0 , len).equals(OrcFile.MAGIC)) {
+ throw new IOException("Malformed ORC file " + path +
+ ". Invalid postscript.");
+ }
+ }
+ }
+
+ /**
+ * Build a version string out of an array.
+ * @param version the version number as a list
+ * @return the human readable form of the version string
+ */
+ private static String versionString(List<Integer> version) {
+ StringBuilder buffer = new StringBuilder();
+ for(int i=0; i < version.size(); ++i) {
+ if (i != 0) {
+ buffer.append('.');
+ }
+ buffer.append(version.get(i));
+ }
+ return buffer.toString();
+ }
+
+ /**
+ * Check to see if this ORC file is from a future version and if so,
+ * warn the user that we may not be able to read all of the column encodings.
+ * @param log the logger to write any error message to
+ * @param path the filename for error messages
+ * @param version the version of hive that wrote the file.
+ */
+ static void checkOrcVersion(Log log, Path path, List<Integer> version) {
+ if (version.size() >= 1) {
+ int major = version.get(0);
+ int minor = 0;
+ if (version.size() >= 2) {
+ minor = version.get(1);
+ }
+ if (major > OrcFile.MAJOR_VERSION ||
+ (major == OrcFile.MAJOR_VERSION && minor > OrcFile.MINOR_VERSION)) {
+ log.warn("ORC file " + path + " was written by a future Hive version " +
+ versionString(version) + ". This file may not be readable by " +
+ "this version of Hive.");
+ }
+ }
+ }
+
ReaderImpl(FileSystem fs, Path path) throws IOException {
this.fileSystem = fs;
this.path = path;
@@ -187,10 +267,12 @@ final class ReaderImpl implements Reader
file.readFully(buffer.array(), buffer.arrayOffset() + buffer.position(),
buffer.remaining());
int psLen = buffer.get(readSize - 1);
+ ensureOrcFooter(file, path, psLen, buffer);
int psOffset = readSize - 1 - psLen;
CodedInputStream in = CodedInputStream.newInstance(buffer.array(),
buffer.arrayOffset() + psOffset, psLen);
OrcProto.PostScript ps = OrcProto.PostScript.parseFrom(in);
+ checkOrcVersion(LOG, path, ps.getVersionList());
int footerSize = (int) ps.getFooterLength();
bufferSize = (int) ps.getCompressionBlockSize();
switch (ps.getCompression()) {
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1505187&r1=1505186&r2=1505187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Sat Jul 20 18:30:47 2013
@@ -87,7 +87,7 @@ class RecordReaderImpl implements Record
}
firstRow = skippedRows;
totalRowCount = rows;
- reader = createTreeReader(0, types, included);
+ reader = createTreeReader(path, 0, types, included);
indexes = new OrcProto.RowIndex[types.size()];
rowIndexStride = strideRate;
if (this.stripes.size() > 0) {
@@ -110,17 +110,27 @@ class RecordReaderImpl implements Record
}
private abstract static class TreeReader {
+ protected final Path path;
protected final int columnId;
private BitFieldReader present = null;
protected boolean valuePresent = false;
- TreeReader(int columnId) {
+ TreeReader(Path path, int columnId) {
+ this.path = path;
this.columnId = columnId;
}
+ void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+ if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DIRECT) {
+ throw new IOException("Unknown encoding " + encoding + " in column " +
+ columnId + " of " + path);
+ }
+ }
+
void startStripe(Map<StreamName, InStream> streams,
List<OrcProto.ColumnEncoding> encoding
) throws IOException {
+ checkEncoding(encoding.get(columnId));
InStream in = streams.get(new StreamName(columnId,
OrcProto.Stream.Kind.PRESENT));
if (in == null) {
@@ -169,8 +179,8 @@ class RecordReaderImpl implements Record
private static class BooleanTreeReader extends TreeReader{
private BitFieldReader reader = null;
- BooleanTreeReader(int columnId) {
- super(columnId);
+ BooleanTreeReader(Path path, int columnId) {
+ super(path, columnId);
}
@Override
@@ -212,8 +222,8 @@ class RecordReaderImpl implements Record
private static class ByteTreeReader extends TreeReader{
private RunLengthByteReader reader = null;
- ByteTreeReader(int columnId) {
- super(columnId);
+ ByteTreeReader(Path path, int columnId) {
+ super(path, columnId);
}
@Override
@@ -255,8 +265,8 @@ class RecordReaderImpl implements Record
private static class ShortTreeReader extends TreeReader{
private RunLengthIntegerReader reader = null;
- ShortTreeReader(int columnId) {
- super(columnId);
+ ShortTreeReader(Path path, int columnId) {
+ super(path, columnId);
}
@Override
@@ -299,8 +309,8 @@ class RecordReaderImpl implements Record
private static class IntTreeReader extends TreeReader{
private RunLengthIntegerReader reader = null;
- IntTreeReader(int columnId) {
- super(columnId);
+ IntTreeReader(Path path, int columnId) {
+ super(path, columnId);
}
@Override
@@ -343,8 +353,8 @@ class RecordReaderImpl implements Record
private static class LongTreeReader extends TreeReader{
private RunLengthIntegerReader reader = null;
- LongTreeReader(int columnId) {
- super(columnId);
+ LongTreeReader(Path path, int columnId) {
+ super(path, columnId);
}
@Override
@@ -387,8 +397,8 @@ class RecordReaderImpl implements Record
private static class FloatTreeReader extends TreeReader{
private InStream stream;
- FloatTreeReader(int columnId) {
- super(columnId);
+ FloatTreeReader(Path path, int columnId) {
+ super(path, columnId);
}
@Override
@@ -434,8 +444,8 @@ class RecordReaderImpl implements Record
private static class DoubleTreeReader extends TreeReader{
private InStream stream;
- DoubleTreeReader(int columnId) {
- super(columnId);
+ DoubleTreeReader(Path path, int columnId) {
+ super(path, columnId);
}
@Override
@@ -481,8 +491,8 @@ class RecordReaderImpl implements Record
private InStream stream;
private RunLengthIntegerReader lengths;
- BinaryTreeReader(int columnId) {
- super(columnId);
+ BinaryTreeReader(Path path, int columnId) {
+ super(path, columnId);
}
@Override
@@ -545,8 +555,8 @@ class RecordReaderImpl implements Record
private RunLengthIntegerReader data;
private RunLengthIntegerReader nanos;
- TimestampTreeReader(int columnId) {
- super(columnId);
+ TimestampTreeReader(Path path, int columnId) {
+ super(path, columnId);
}
@Override
@@ -615,8 +625,8 @@ class RecordReaderImpl implements Record
private InStream valueStream;
private RunLengthIntegerReader scaleStream;
- DecimalTreeReader(int columnId) {
- super(columnId);
+ DecimalTreeReader(Path path, int columnId) {
+ super(path, columnId);
}
@Override
@@ -663,8 +673,15 @@ class RecordReaderImpl implements Record
private int[] dictionaryOffsets;
private RunLengthIntegerReader reader;
- StringTreeReader(int columnId) {
- super(columnId);
+ StringTreeReader(Path path, int columnId) {
+ super(path, columnId);
+ }
+
+ void checkEncoding(OrcProto.ColumnEncoding encoding) throws IOException {
+ if (encoding.getKind() != OrcProto.ColumnEncoding.Kind.DICTIONARY) {
+ throw new IOException("Unknown encoding " + encoding + " in column " +
+ columnId + " of " + path);
+ }
}
@Override
@@ -755,10 +772,10 @@ class RecordReaderImpl implements Record
private final TreeReader[] fields;
private final String[] fieldNames;
- StructTreeReader(int columnId,
+ StructTreeReader(Path path, int columnId,
List<OrcProto.Type> types,
boolean[] included) throws IOException {
- super(columnId);
+ super(path, columnId);
OrcProto.Type type = types.get(columnId);
int fieldCount = type.getFieldNamesCount();
this.fields = new TreeReader[fieldCount];
@@ -766,7 +783,7 @@ class RecordReaderImpl implements Record
for(int i=0; i < fieldCount; ++i) {
int subtype = type.getSubtypes(i);
if (included == null || included[subtype]) {
- this.fields[i] = createTreeReader(subtype, types, included);
+ this.fields[i] = createTreeReader(path, subtype, types, included);
}
this.fieldNames[i] = type.getFieldNames(i);
}
@@ -831,17 +848,17 @@ class RecordReaderImpl implements Record
private final TreeReader[] fields;
private RunLengthByteReader tags;
- UnionTreeReader(int columnId,
- List<OrcProto.Type> types,
- boolean[] included) throws IOException {
- super(columnId);
+ UnionTreeReader(Path path, int columnId,
+ List<OrcProto.Type> types,
+ boolean[] included) throws IOException {
+ super(path, columnId);
OrcProto.Type type = types.get(columnId);
int fieldCount = type.getSubtypesCount();
this.fields = new TreeReader[fieldCount];
for(int i=0; i < fieldCount; ++i) {
int subtype = type.getSubtypes(i);
if (included == null || included[subtype]) {
- this.fields[i] = createTreeReader(subtype, types, included);
+ this.fields[i] = createTreeReader(path, subtype, types, included);
}
}
}
@@ -904,12 +921,13 @@ class RecordReaderImpl implements Record
private final TreeReader elementReader;
private RunLengthIntegerReader lengths;
- ListTreeReader(int columnId,
- List<OrcProto.Type> types,
- boolean[] included) throws IOException {
- super(columnId);
+ ListTreeReader(Path path, int columnId,
+ List<OrcProto.Type> types,
+ boolean[] included) throws IOException {
+ super(path, columnId);
OrcProto.Type type = types.get(columnId);
- elementReader = createTreeReader(type.getSubtypes(0), types, included);
+ elementReader = createTreeReader(path, type.getSubtypes(0), types,
+ included);
}
@Override
@@ -977,20 +995,21 @@ class RecordReaderImpl implements Record
private final TreeReader valueReader;
private RunLengthIntegerReader lengths;
- MapTreeReader(int columnId,
- List<OrcProto.Type> types,
- boolean[] included) throws IOException {
- super(columnId);
+ MapTreeReader(Path path,
+ int columnId,
+ List<OrcProto.Type> types,
+ boolean[] included) throws IOException {
+ super(path, columnId);
OrcProto.Type type = types.get(columnId);
int keyColumn = type.getSubtypes(0);
int valueColumn = type.getSubtypes(1);
if (included == null || included[keyColumn]) {
- keyReader = createTreeReader(keyColumn, types, included);
+ keyReader = createTreeReader(path, keyColumn, types, included);
} else {
keyReader = null;
}
if (included == null || included[valueColumn]) {
- valueReader = createTreeReader(valueColumn, types, included);
+ valueReader = createTreeReader(path, valueColumn, types, included);
} else {
valueReader = null;
}
@@ -1053,42 +1072,43 @@ class RecordReaderImpl implements Record
}
}
- private static TreeReader createTreeReader(int columnId,
+ private static TreeReader createTreeReader(Path path,
+ int columnId,
List<OrcProto.Type> types,
boolean[] included
) throws IOException {
OrcProto.Type type = types.get(columnId);
switch (type.getKind()) {
case BOOLEAN:
- return new BooleanTreeReader(columnId);
+ return new BooleanTreeReader(path, columnId);
case BYTE:
- return new ByteTreeReader(columnId);
+ return new ByteTreeReader(path, columnId);
case DOUBLE:
- return new DoubleTreeReader(columnId);
+ return new DoubleTreeReader(path, columnId);
case FLOAT:
- return new FloatTreeReader(columnId);
+ return new FloatTreeReader(path, columnId);
case SHORT:
- return new ShortTreeReader(columnId);
+ return new ShortTreeReader(path, columnId);
case INT:
- return new IntTreeReader(columnId);
+ return new IntTreeReader(path, columnId);
case LONG:
- return new LongTreeReader(columnId);
+ return new LongTreeReader(path, columnId);
case STRING:
- return new StringTreeReader(columnId);
+ return new StringTreeReader(path, columnId);
case BINARY:
- return new BinaryTreeReader(columnId);
+ return new BinaryTreeReader(path, columnId);
case TIMESTAMP:
- return new TimestampTreeReader(columnId);
+ return new TimestampTreeReader(path, columnId);
case DECIMAL:
- return new DecimalTreeReader(columnId);
+ return new DecimalTreeReader(path, columnId);
case STRUCT:
- return new StructTreeReader(columnId, types, included);
+ return new StructTreeReader(path, columnId, types, included);
case LIST:
- return new ListTreeReader(columnId, types, included);
+ return new ListTreeReader(path, columnId, types, included);
case MAP:
- return new MapTreeReader(columnId, types, included);
+ return new MapTreeReader(path, columnId, types, included);
case UNION:
- return new UnionTreeReader(columnId, types, included);
+ return new UnionTreeReader(path, columnId, types, included);
default:
throw new IllegalArgumentException("Unsupported type " +
type.getKind());
Modified: hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java?rev=1505187&r1=1505186&r2=1505187&view=diff
==============================================================================
--- hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java (original)
+++ hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/io/orc/WriterImpl.java Sat Jul 20 18:30:47 2013
@@ -1484,7 +1484,10 @@ class WriterImpl implements Writer, Memo
OrcProto.PostScript.Builder builder =
OrcProto.PostScript.newBuilder()
.setCompression(writeCompressionKind(compress))
- .setFooterLength(footerLength);
+ .setFooterLength(footerLength)
+ .setMagic(OrcFile.MAGIC)
+ .addVersion(OrcFile.MAJOR_VERSION)
+ .addVersion(OrcFile.MINOR_VERSION);
if (compress != CompressionKind.NONE) {
builder.setCompressionBlockSize(bufferSize);
}
Modified: hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto
URL: http://svn.apache.org/viewvc/hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto?rev=1505187&r1=1505186&r2=1505187&view=diff
==============================================================================
--- hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto (original)
+++ hive/trunk/ql/src/protobuf/org/apache/hadoop/hive/ql/io/orc/orc_proto.proto Sat Jul 20 18:30:47 2013
@@ -135,4 +135,7 @@ message PostScript {
optional uint64 footerLength = 1;
optional CompressionKind compression = 2;
optional uint64 compressionBlockSize = 3;
+ repeated uint32 version = 4 [packed = true];
+ // Leave this last in the record
+ optional string magic = 8000;
}