You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by om...@apache.org on 2017/06/18 02:44:11 UTC
orc git commit: ORC-202. Add writer implementation enum to record
which software wrote the file.
Repository: orc
Updated Branches:
refs/heads/master ded204a4a -> 7dd7dafa8
ORC-202. Add writer implementation enum to record which software wrote the
file.
Fixes #132
Signed-off-by: Owen O'Malley <om...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/orc/repo
Commit: http://git-wip-us.apache.org/repos/asf/orc/commit/7dd7dafa
Tree: http://git-wip-us.apache.org/repos/asf/orc/tree/7dd7dafa
Diff: http://git-wip-us.apache.org/repos/asf/orc/diff/7dd7dafa
Branch: refs/heads/master
Commit: 7dd7dafa8085109b71c43e3092f3b0eda8046991
Parents: ded204a
Author: Owen O'Malley <om...@apache.org>
Authored: Fri Jun 16 08:50:53 2017 -0700
Committer: Owen O'Malley <om...@apache.org>
Committed: Sat Jun 17 19:43:43 2017 -0700
----------------------------------------------------------------------
.../src/java/org/apache/orc/FileMetadata.java | 2 +
java/core/src/java/org/apache/orc/OrcFile.java | 118 ++++++++++++++-----
.../src/java/org/apache/orc/impl/OrcTail.java | 6 +-
.../java/org/apache/orc/impl/ReaderImpl.java | 4 +-
.../java/org/apache/orc/impl/WriterImpl.java | 1 +
.../test/org/apache/orc/TestVectorOrcFile.java | 47 +++++++-
.../resources/orc-file-dump-bloomfilter.out | 2 +-
.../resources/orc-file-dump-bloomfilter2.out | 2 +-
.../orc-file-dump-dictionary-threshold.out | 2 +-
.../tools/src/test/resources/orc-file-dump.json | 2 +-
java/tools/src/test/resources/orc-file-dump.out | 2 +-
.../src/test/resources/orc-file-has-null.out | 2 +-
proto/orc_proto.proto | 40 +++++--
13 files changed, 180 insertions(+), 50 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/core/src/java/org/apache/orc/FileMetadata.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/FileMetadata.java b/java/core/src/java/org/apache/orc/FileMetadata.java
index 807e696..347035b 100644
--- a/java/core/src/java/org/apache/orc/FileMetadata.java
+++ b/java/core/src/java/org/apache/orc/FileMetadata.java
@@ -50,6 +50,8 @@ public interface FileMetadata {
int getMetadataSize();
+ int getWriterImplementation();
+
int getWriterVersionNum();
List<OrcProto.Type> getTypes();
http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/core/src/java/org/apache/orc/OrcFile.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/OrcFile.java b/java/core/src/java/org/apache/orc/OrcFile.java
index 11281cb..d1fa3fb 100644
--- a/java/core/src/java/org/apache/orc/OrcFile.java
+++ b/java/core/src/java/org/apache/orc/OrcFile.java
@@ -30,7 +30,6 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.orc.MemoryManager;
import org.apache.orc.impl.MemoryManagerImpl;
import org.apache.orc.impl.OrcTail;
import org.apache.orc.impl.ReaderImpl;
@@ -108,47 +107,91 @@ public class OrcFile {
}
}
+ public enum WriterImplementation {
+ ORC_JAVA(0), // ORC Java writer
+ ORC_CPP(1), // ORC C++ writer
+ PRESTO(2), // Presto writer
+ UNKNOWN(Integer.MAX_VALUE);
+
+ private final int id;
+
+ WriterImplementation(int id) {
+ this.id = id;
+ }
+
+ public int getId() {
+ return id;
+ }
+
+ public static WriterImplementation from(int id) {
+ WriterImplementation[] values = values();
+ if (id >= 0 && id < values.length - 1) {
+ return values[id];
+ }
+ return UNKNOWN;
+ }
+ }
+
/**
* Records the version of the writer in terms of which bugs have been fixed.
- * For bugs in the writer, but the old readers already read the new data
- * correctly, bump this version instead of the Version.
+ * When you fix bugs in the writer (or make substantial changes) that don't
+ * change the file format, add a new version here instead of Version.
+ *
+ * The ids are assigned sequentially from 6 per a WriterImplementation so that
+ * readers that predate ORC-202 treat the other writers correctly.
*/
public enum WriterVersion {
- ORIGINAL(0),
- HIVE_8732(1), // corrupted stripe/file maximum column statistics
- HIVE_4243(2), // use real column names from Hive tables
- HIVE_12055(3), // vectorized writer
- HIVE_13083(4), // decimal writer updating present stream wrongly
- ORC_101(5), // bloom filters use utf8
- ORC_135(6), // timestamp stats use utc
+ // Java ORC Writer
+ ORIGINAL(WriterImplementation.ORC_JAVA, 0),
+ HIVE_8732(WriterImplementation.ORC_JAVA, 1), // fixed stripe/file maximum
+ // statistics & string statistics
+ // use utf8 for min/max
+ HIVE_4243(WriterImplementation.ORC_JAVA, 2), // use real column names from
+ // Hive tables
+ HIVE_12055(WriterImplementation.ORC_JAVA, 3), // vectorized writer
+ HIVE_13083(WriterImplementation.ORC_JAVA, 4), // decimals write present stream correctly
+ ORC_101(WriterImplementation.ORC_JAVA, 5), // bloom filters use utf8
+ ORC_135(WriterImplementation.ORC_JAVA, 6), // timestamp stats use utc
+
+ // C++ ORC Writer
+ ORC_CPP_ORIGINAL(WriterImplementation.ORC_CPP, 6),
+
+ // Presto Writer
+ PRESTO_ORIGINAL(WriterImplementation.PRESTO, 6),
// Don't use any magic numbers here except for the below:
- FUTURE(Integer.MAX_VALUE); // a version from a future writer
+ FUTURE(WriterImplementation.UNKNOWN, Integer.MAX_VALUE); // a version from a future writer
private final int id;
+ private final WriterImplementation writer;
+
+ public WriterImplementation getWriterImplementation() {
+ return writer;
+ }
public int getId() {
return id;
}
- WriterVersion(int id) {
+ WriterVersion(WriterImplementation writer, int id) {
+ this.writer = writer;
this.id = id;
}
- private static final WriterVersion[] values;
+ private static final WriterVersion[][] values =
+ new WriterVersion[WriterImplementation.values().length][];
+
static {
- // Assumes few non-negative values close to zero.
- int max = Integer.MIN_VALUE;
- for (WriterVersion v : WriterVersion.values()) {
- if (v.id < 0) throw new AssertionError();
- if (v.id > max && FUTURE.id != v.id) {
- max = v.id;
- }
- }
- values = new WriterVersion[max + 1];
- for (WriterVersion v : WriterVersion.values()) {
- if (v.id < values.length) {
- values[v.id] = v;
+ for(WriterVersion v: WriterVersion.values()) {
+ WriterImplementation writer = v.writer;
+ if (writer != WriterImplementation.UNKNOWN) {
+ if (values[writer.id] == null) {
+ values[writer.id] = new WriterVersion[WriterVersion.values().length];
+ }
+ if (values[writer.id][v.id] != null) {
+ throw new IllegalArgumentException("Duplicate WriterVersion id " + v);
+ }
+ values[writer.id][v.id] = v;
}
}
}
@@ -156,18 +199,33 @@ public class OrcFile {
/**
* Convert the integer from OrcProto.PostScript.writerVersion
* to the enumeration with unknown versions being mapped to FUTURE.
+ * @param writer the writer implementation
* @param val the serialized writer version
* @return the corresponding enumeration value
*/
- public static WriterVersion from(int val) {
- if (val >= values.length) {
+ public static WriterVersion from(WriterImplementation writer, int val) {
+ if (writer == WriterImplementation.UNKNOWN) {
+ return FUTURE;
+ }
+ if (writer != WriterImplementation.ORC_JAVA && val < 6) {
+ throw new IllegalArgumentException("ORC File with illegval version " +
+ val + " for writer " + writer);
+ }
+ WriterVersion[] versions = values[writer.id];
+ if (val < 0 || versions.length < val) {
return FUTURE;
}
- return values[val];
+ WriterVersion result = versions[val];
+ return result == null ? FUTURE : result;
}
- public boolean includes(WriterVersion other) {
- return id >= other.id;
+ /**
+ * Does this file include the given fix or come from a different writer?
+ * @param fix the required fix
+ * @return true if the required fix is present
+ */
+ public boolean includes(WriterVersion fix) {
+ return writer != fix.writer || id >= fix.id;
}
}
http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/core/src/java/org/apache/orc/impl/OrcTail.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/OrcTail.java b/java/core/src/java/org/apache/orc/impl/OrcTail.java
index f2f80a5..3c78874 100644
--- a/java/core/src/java/org/apache/orc/impl/OrcTail.java
+++ b/java/core/src/java/org/apache/orc/impl/OrcTail.java
@@ -70,8 +70,10 @@ public final class OrcTail {
public OrcFile.WriterVersion getWriterVersion() {
OrcProto.PostScript ps = fileTail.getPostscript();
- return (ps.hasWriterVersion()
- ? OrcFile.WriterVersion.from(ps.getWriterVersion()) : OrcFile.WriterVersion.ORIGINAL);
+ OrcProto.Footer footer = fileTail.getFooter();
+ OrcFile.WriterImplementation writer =
+ OrcFile.WriterImplementation.from(footer.getWriter());
+ return OrcFile.WriterVersion.from(writer, ps.getWriterVersion());
}
public List<StripeInformation> getStripes() {
http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index ad1bc1e..7702ac6 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -361,8 +361,10 @@ public class ReaderImpl implements Reader {
this.metadataSize = fileMetadata.getMetadataSize();
this.stripeStats = fileMetadata.getStripeStats();
this.versionList = fileMetadata.getVersionList();
+ OrcFile.WriterImplementation writer =
+ OrcFile.WriterImplementation.from(fileMetadata.getWriterImplementation());
this.writerVersion =
- OrcFile.WriterVersion.from(fileMetadata.getWriterVersionNum());
+ OrcFile.WriterVersion.from(writer, fileMetadata.getWriterVersionNum());
this.types = fileMetadata.getTypes();
this.rowIndexStride = fileMetadata.getRowIndexStride();
this.contentLength = fileMetadata.getContentLength();
http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/core/src/java/org/apache/orc/impl/WriterImpl.java
----------------------------------------------------------------------
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index a5d65dd..90eaf4f 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -492,6 +492,7 @@ public class WriterImpl implements Writer, MemoryManager.Callback {
builder.addMetadata(OrcProto.UserMetadataItem.newBuilder()
.setName(entry.getKey()).setValue(entry.getValue()));
}
+ builder.setWriter(OrcFile.WriterImplementation.ORC_JAVA.getId());
physicalWriter.writeFileFooter(builder);
return writePostScript();
}
http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
----------------------------------------------------------------------
diff --git a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
index f975b73..9373216 100644
--- a/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
+++ b/java/core/src/test/org/apache/orc/TestVectorOrcFile.java
@@ -3039,9 +3039,50 @@ public class TestVectorOrcFile {
@Test
public void testWriterVersion() throws Exception {
- assertEquals(OrcFile.WriterVersion.FUTURE, OrcFile.WriterVersion.from(99));
- assertEquals(OrcFile.WriterVersion.ORIGINAL, OrcFile.WriterVersion.from(0));
- assertEquals(OrcFile.WriterVersion.HIVE_4243, OrcFile.WriterVersion.from(2));
+ // test writer implementation serialization
+ assertEquals(OrcFile.WriterImplementation.ORC_JAVA,
+ OrcFile.WriterImplementation.from(0));
+ assertEquals(OrcFile.WriterImplementation.ORC_CPP,
+ OrcFile.WriterImplementation.from(1));
+ assertEquals(OrcFile.WriterImplementation.PRESTO,
+ OrcFile.WriterImplementation.from(2));
+ assertEquals(OrcFile.WriterImplementation.UNKNOWN,
+ OrcFile.WriterImplementation.from(99));
+
+ // test writer version serialization
+ assertEquals(OrcFile.WriterVersion.FUTURE,
+ OrcFile.WriterVersion.from(OrcFile.WriterImplementation.ORC_JAVA, 99));
+ assertEquals(OrcFile.WriterVersion.ORIGINAL,
+ OrcFile.WriterVersion.from(OrcFile.WriterImplementation.ORC_JAVA, 0));
+ assertEquals(OrcFile.WriterVersion.HIVE_4243,
+ OrcFile.WriterVersion.from(OrcFile.WriterImplementation.ORC_JAVA, 2));
+ assertEquals(OrcFile.WriterVersion.FUTURE,
+ OrcFile.WriterVersion.from(OrcFile.WriterImplementation.ORC_CPP, 99));
+ assertEquals(OrcFile.WriterVersion.ORC_CPP_ORIGINAL,
+ OrcFile.WriterVersion.from(OrcFile.WriterImplementation.ORC_CPP, 6));
+ assertEquals(OrcFile.WriterVersion.PRESTO_ORIGINAL,
+ OrcFile.WriterVersion.from(OrcFile.WriterImplementation.PRESTO, 6));
+ assertEquals(OrcFile.WriterVersion.FUTURE,
+ OrcFile.WriterVersion.from(OrcFile.WriterImplementation.UNKNOWN, 0));
+
+ // test compatibility
+ assertTrue(OrcFile.WriterVersion.FUTURE.includes(
+ OrcFile.WriterVersion.ORC_CPP_ORIGINAL));
+ assertTrue(OrcFile.WriterVersion.FUTURE.includes(
+ OrcFile.WriterVersion.HIVE_8732));
+ assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
+ OrcFile.WriterVersion.HIVE_4243));
+ assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
+ OrcFile.WriterVersion.HIVE_12055));
+ assertTrue(!OrcFile.WriterVersion.HIVE_4243.includes(
+ OrcFile.WriterVersion.HIVE_12055));
+ assertTrue(OrcFile.WriterVersion.HIVE_12055.includes(
+ OrcFile.WriterVersion.PRESTO_ORIGINAL));
+ }
+
+ @Test(expected=IllegalArgumentException.class)
+ public void testBadPrestoVersion() {
+ OrcFile.WriterVersion.from(OrcFile.WriterImplementation.PRESTO, 0);
}
/**
http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
index dcf29f7..5775500 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter.out
@@ -172,7 +172,7 @@ Stripes:
Entry 0: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
Stripe level merge: numHashFunctions: 4 bitCount: 6272 popCount: 138 loadFactor: 0.022 expectedFpp: 2.343647E-7
-File length: 272450 bytes
+File length: 272452 bytes
Padding length: 0 bytes
Padding ratio: 0%
________________________________________________________________________________________________________________________
http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
index 4ea04b5..8afddae 100644
--- a/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
+++ b/java/tools/src/test/resources/orc-file-dump-bloomfilter2.out
@@ -187,7 +187,7 @@ Stripes:
Entry 0: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294
Stripe level merge: numHashFunctions: 7 bitCount: 9600 popCount: 4948 loadFactor: 0.5154 expectedFpp: 0.00966294
-File length: 332511 bytes
+File length: 332513 bytes
Padding length: 0 bytes
Padding ratio: 0%
________________________________________________________________________________________________________________________
http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
index 78e0258..5989250 100644
--- a/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
+++ b/java/tools/src/test/resources/orc-file-dump-dictionary-threshold.out
@@ -183,7 +183,7 @@ Stripes:
Row group indices for column 3:
Entry 0: count: 1000 hasNull: false min: Darkness,-230-368-488-586-862-930-1686-2044-2636-2652-2872-3108-3162-3192-3404-3442-3508-3542-3550-3712-3980-4146-4204-4336-4390-4418-4424-4490-4512-4650-4768-4924-4950-5210-5524-5630-5678-5710-5758-5952-6238-6252-6300-6366-6668-6712-6926-6942-7100-7194-7802-8030-8452-8608-8640-8862-8868-9134-9234-9412-9602-9608-9642-9678-9740-9780-10426-10510-10514-10706-10814-10870-10942-11028-11244-11326-11462-11496-11656-11830-12022-12178-12418-12832-13304-13448-13590-13618-13908-14188-14246-14340-14364-14394-14762-14850-14964-15048-15494-15674-15726-16006-16056-16180-16304-16332-16452-16598-16730-16810-16994-17210-17268-17786-17962-18214-18444-18446-18724-18912-18952-19164-19348-19400-19546-19776-19896-20084 max: worst-54-290-346-648-908-996-1038-1080-1560-1584-1620-1744-1770-1798-1852-1966-2162-2244-2286-2296-2534-2660-3114-3676-3788-4068-4150-4706-4744-5350-5420-5582-5696-5726-6006-6020-6024-6098-6184-6568-6636-6802-6994-7004-7318-7498-7758-7780-
7798-7920-7952-7960-7988-8232-8256-8390-8416-8478-8620-8840-8984-9038-9128-9236-9248-9344-9594-9650-9714-9928-9938-10178-10368-10414-10502-10732-10876-11008-11158-11410-11722-11836-11964-12054-12096-12126-12136-12202-12246-12298-12616-12774-12782-12790-12802-12976-13216-13246-13502-13766-14454-14974-15004-15124-15252-15294-15356-15530-15610-16316-16936-17024-17122-17214-17310-17528-17682-17742-17870-17878-18010-18410-18524-18788-19204-19254-19518-19596-19786-19874-19904-20390-20752-20936 sum: 670762 positions: 0,0,0,0,0
-File length: 2217611 bytes
+File length: 2217614 bytes
Padding length: 0 bytes
Padding ratio: 0%
________________________________________________________________________________________________________________________
http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/tools/src/test/resources/orc-file-dump.json
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump.json b/java/tools/src/test/resources/orc-file-dump.json
index 3914f82..81c96df 100644
--- a/java/tools/src/test/resources/orc-file-dump.json
+++ b/java/tools/src/test/resources/orc-file-dump.json
@@ -1348,7 +1348,7 @@
}]
}
],
- "fileLength": 272434,
+ "fileLength": 272436,
"paddingLength": 0,
"paddingRatio": 0,
"status": "OK"
http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/tools/src/test/resources/orc-file-dump.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-dump.out b/java/tools/src/test/resources/orc-file-dump.out
index 51105f0..c8cf7ad 100644
--- a/java/tools/src/test/resources/orc-file-dump.out
+++ b/java/tools/src/test/resources/orc-file-dump.out
@@ -188,7 +188,7 @@ Stripes:
Row group indices for column 3:
Entry 0: count: 1000 hasNull: false min: Darkness, max: worst sum: 3866 positions: 0,0,0
-File length: 270923 bytes
+File length: 270925 bytes
Padding length: 0 bytes
Padding ratio: 0%
________________________________________________________________________________________________________________________
http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/java/tools/src/test/resources/orc-file-has-null.out
----------------------------------------------------------------------
diff --git a/java/tools/src/test/resources/orc-file-has-null.out b/java/tools/src/test/resources/orc-file-has-null.out
index a42a62d..d7e78f7 100644
--- a/java/tools/src/test/resources/orc-file-has-null.out
+++ b/java/tools/src/test/resources/orc-file-has-null.out
@@ -105,7 +105,7 @@ Stripes:
Entry 3: count: 0 hasNull: true positions: 0,4,115,0,0,0,0
Entry 4: count: 0 hasNull: true positions: 0,6,110,0,0,0,0
-File length: 1823 bytes
+File length: 1825 bytes
Padding length: 0 bytes
Padding ratio: 0%
________________________________________________________________________________________________________________________
http://git-wip-us.apache.org/repos/asf/orc/blob/7dd7dafa/proto/orc_proto.proto
----------------------------------------------------------------------
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index ee2ca45..fef207c 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -200,6 +200,12 @@ message Footer {
optional uint64 numberOfRows = 6;
repeated ColumnStatistics statistics = 7;
optional uint32 rowIndexStride = 8;
+
+ // Each implementation that writes ORC files should register for a code
+ // 0 = ORC Java
+ // 1 = ORC C++
+ // 2 = Presto
+ optional uint32 writer = 9;
}
enum CompressionKind {
@@ -221,15 +227,33 @@ message PostScript {
// [0, 12] = Hive 0.12
repeated uint32 version = 4 [packed = true];
optional uint64 metadataLength = 5;
- // Version of the writer:
- // 0 (or missing) = original
- // 1 = HIVE-8732 fixed
- // 2 = HIVE-4243 fixed
- // 3 = HIVE-12055 fixed
- // 4 = HIVE-13083 fixed
- // 5 = ORC-101 fixed
- // 6 = ORC-135 fixed
+
+ // The version of the writer that wrote the file. This number is
+ // updated when we make fixes or large changes to the writer so that
+ // readers can detect whether a given bug is present in the data.
+ //
+ // Only the Java ORC writer may use values under 6 (or missing) so that
+ // readers that predate ORC-202 treat the new writers correctly. Each
+ // writer should assign their own sequence of versions starting from 6.
+ //
+ // Version of the ORC Java writer:
+ // 0 = original
+ // 1 = HIVE-8732 fixed (fixed stripe/file maximum statistics &
+ // string statistics use utf8 for min/max)
+ // 2 = HIVE-4243 fixed (use real column names from Hive tables)
+ // 3 = HIVE-12055 fixed (vectorized writer implementation)
+ // 4 = HIVE-13083 fixed (decimals write present stream correctly)
+ // 5 = ORC-101 fixed (bloom filters use utf8 consistently)
+ // 6 = ORC-135 fixed (timestamp statistics use utc)
+ //
+ // Version of the ORC C++ writer:
+ // 6 = original
+ //
+ // Version of the Presto writer:
+ // 6 = original
+ //
optional uint32 writerVersion = 6;
+
// Leave this last in the record
optional string magic = 8000;
}