You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/09/12 19:17:17 UTC

[orc] branch branch-1.6 updated: ORC-984: Save the software version that wrote each ORC file. (#904)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.6
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.6 by this push:
     new 2aa9b06  ORC-984: Save the software version that wrote each ORC file. (#904)
2aa9b06 is described below

commit 2aa9b0651c08825287ffbc49325712ab0ef5327a
Author: Owen O'Malley <oo...@linkedin.com>
AuthorDate: Sun Sep 12 11:45:51 2021 -0700

    ORC-984: Save the software version that wrote each ORC file. (#904)
    
    I add a string to the file footer that records the version that wrote the file. We already had recorded the implementation that wrote the file as Footer.writer. I added a method that combines these two fields in the reader to produce a user facing string that describes the software version.
    
    I also add a field to the meta data tool to show the version that wrote the file.
    
    Because of that change and the fact that the files change size based on whether the ORC version is a snapshot or not, I had to extend the tests for TestFileDump to allow some slop for the size and to ignore the file version.
    
    It passes the unit tests after I updated the tools tests.
    
    (cherry picked from commit cf720d7b2333618c652445299486cb9600bdbe4f)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
    (cherry picked from commit 7e3d557a80f5ada1b2a927ee7e3ea9e91a115c6c)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 c++/include/orc/Common.hh                          |   5 ++
 c++/include/orc/Reader.hh                          |   6 ++
 c++/src/Common.cc                                  |  26 ++++++++
 c++/src/Reader.cc                                  |   9 +++
 c++/src/Reader.hh                                  |   2 +
 c++/src/Writer.cc                                  |   1 +
 examples/expected/orc_split_elim_cpp.jsn.gz        | Bin 0 -> 8980 bytes
 examples/orc_split_elim_cpp.orc                    | Bin 0 -> 3339 bytes
 examples/orc_split_elim_new.orc                    | Bin 2341 -> 2396 bytes
 java/core/src/findbugs/exclude.xml                 |   5 ++
 java/core/src/java/org/apache/orc/OrcUtils.java    |  70 +++++++++++++++++++++
 java/core/src/java/org/apache/orc/Reader.java      |  10 +++
 .../src/java/org/apache/orc/impl/ReaderImpl.java   |  15 ++++-
 .../src/java/org/apache/orc/impl/WriterImpl.java   |   1 +
 java/pom.xml                                       |  13 ++++
 .../src/java/org/apache/orc/tools/FileDump.java    |   3 +-
 .../java/org/apache/orc/tools/JsonFileDump.java    |   1 +
 .../test/org/apache/orc/tools/TestFileDump.java    |  60 ++++++++++++++----
 .../org/apache/orc/tools/TestJsonFileDump.java     |  22 +------
 java/tools/src/test/resources/orc-file-dump.json   |   3 +-
 proto/orc_proto.proto                              |   5 ++
 tools/src/FileMetadata.cc                          |   1 +
 tools/test/TestFileMetadata.cc                     |   5 +-
 tools/test/TestMatch.cc                            |  48 +++++++++++++-
 24 files changed, 272 insertions(+), 39 deletions(-)

diff --git a/c++/include/orc/Common.hh b/c++/include/orc/Common.hh
index d521fdd..4aa4a85 100644
--- a/c++/include/orc/Common.hh
+++ b/c++/include/orc/Common.hh
@@ -74,6 +74,8 @@ namespace orc {
     UNKNOWN_WRITER = INT32_MAX
   };
 
+  std::string writerIdToString(uint32_t id);
+
   enum CompressionKind {
     CompressionKind_NONE = 0,
     CompressionKind_ZLIB = 1,
@@ -97,6 +99,9 @@ namespace orc {
     WriterVersion_HIVE_13083 = 4,
     WriterVersion_ORC_101 = 5,
     WriterVersion_ORC_135 = 6,
+    WriterVersion_ORC_517 = 7,
+    WriterVersion_ORC_203 = 8,
+    WriterVersion_ORC_14 = 9,
     WriterVersion_MAX = INT32_MAX
   };
 
diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh
index 267ae93..5d9a532 100644
--- a/c++/include/orc/Reader.hh
+++ b/c++/include/orc/Reader.hh
@@ -272,6 +272,12 @@ namespace orc {
     virtual uint64_t getNumberOfRows() const = 0;
 
     /**
+     * Get the software instance and version that wrote this file.
+     * @return a user-facing string that specifies the software version
+     */
+    virtual std::string getSoftwareVersion() const = 0;
+
+    /**
      * Get the user metadata keys.
      * @return the set of user metadata keys
      */
diff --git a/c++/src/Common.cc b/c++/src/Common.cc
index 0a5256e..dbf0737 100644
--- a/c++/src/Common.cc
+++ b/c++/src/Common.cc
@@ -58,12 +58,38 @@ namespace orc {
         return "ORC-101";
       case WriterVersion_ORC_135:
         return "ORC-135";
+      case WriterVersion_ORC_517:
+        return "ORC-517";
+      case WriterVersion_ORC_203:
+        return "ORC-203";
+      case WriterVersion_ORC_14:
+        return "ORC-14";
     }
     std::stringstream buffer;
     buffer << "future - " << version;
     return buffer.str();
   }
 
+  std::string writerIdToString(uint32_t id) {
+    switch (id) {
+      case ORC_JAVA_WRITER:
+        return "ORC Java";
+      case ORC_CPP_WRITER:
+        return "ORC C++";
+      case PRESTO_WRITER:
+        return "Presto";
+      case SCRITCHLEY_GO:
+        return "Scritchley Go";
+      case TRINO_WRITER:
+        return "Trino";
+      default: {
+        std::ostringstream buffer;
+        buffer << "Unknown(" << id << ")";
+        return buffer.str();
+      }
+    }
+  }
+
   std::string streamKindToString(StreamKind kind) {
     switch (static_cast<int>(kind)) {
       case StreamKind_PRESENT:
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 33f2806..8b08559 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -508,6 +508,15 @@ namespace orc {
     }
   }
 
+  std::string ReaderImpl::getSoftwareVersion() const {
+    std::ostringstream buffer;
+    buffer << writerIdToString(getWriterIdValue());
+    if (footer->has_softwareversion()) {
+      buffer << " " << footer->softwareversion();
+    }
+    return buffer.str();
+  }
+
   WriterVersion ReaderImpl::getWriterVersion() const {
     if (!contents->postscript->has_writerversion()) {
       return WriterVersion_ORIGINAL;
diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh
index a381956..27f82b7 100644
--- a/c++/src/Reader.hh
+++ b/c++/src/Reader.hh
@@ -230,6 +230,8 @@ namespace orc {
 
     uint32_t getWriterIdValue() const override;
 
+    std::string getSoftwareVersion() const override;
+
     WriterVersion getWriterVersion() const override;
 
     uint64_t getNumberOfRows() const override;
diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc
index 8158990..092e580 100644
--- a/c++/src/Writer.cc
+++ b/c++/src/Writer.cc
@@ -379,6 +379,7 @@ namespace orc {
     fileFooter.set_rowindexstride(
                           static_cast<uint32_t>(options.getRowIndexStride()));
     fileFooter.set_writer(writerId);
+    fileFooter.set_softwareversion(ORC_VERSION);
 
     uint32_t index = 0;
     buildFooterType(type, fileFooter, index);
diff --git a/examples/expected/orc_split_elim_cpp.jsn.gz b/examples/expected/orc_split_elim_cpp.jsn.gz
new file mode 100644
index 0000000..5263699
Binary files /dev/null and b/examples/expected/orc_split_elim_cpp.jsn.gz differ
diff --git a/examples/orc_split_elim_cpp.orc b/examples/orc_split_elim_cpp.orc
new file mode 100644
index 0000000..86921f3
Binary files /dev/null and b/examples/orc_split_elim_cpp.orc differ
diff --git a/examples/orc_split_elim_new.orc b/examples/orc_split_elim_new.orc
index cdbe477..24e58f1 100644
Binary files a/examples/orc_split_elim_new.orc and b/examples/orc_split_elim_new.orc differ
diff --git a/java/core/src/findbugs/exclude.xml b/java/core/src/findbugs/exclude.xml
index feb275d..f83887b 100644
--- a/java/core/src/findbugs/exclude.xml
+++ b/java/core/src/findbugs/exclude.xml
@@ -43,6 +43,11 @@
   <!-- Java's try with resources causes a false positive.
        See https://github.com/SERG-Delft/jpacman/pull/27 . -->
   <Match>
+    <Bug pattern="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE"/>
+    <Class name="org.apache.orc.OrcUtils"/>
+    <Method name="getOrcVersion"/>
+  </Match>
+  <Match>
     <Bug pattern="RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE"/>
     <Class name="org.apache.orc.impl.OrcAcidUtils"/>
     <Method name="getLastFlushLength"/>
diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java b/java/core/src/java/org/apache/orc/OrcUtils.java
index e6f10a0..0d8a2e7 100644
--- a/java/core/src/java/org/apache/orc/OrcUtils.java
+++ b/java/core/src/java/org/apache/orc/OrcUtils.java
@@ -21,9 +21,11 @@ import org.apache.orc.impl.ReaderImpl;
 import org.apache.orc.impl.SchemaEvolution;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Properties;
 
 import static org.apache.hadoop.util.StringUtils.COMMA_STR;
 
@@ -387,4 +389,72 @@ public class OrcUtils {
     }
     return result;
   }
+
+  /**
+   * Get the user-facing version string for the software that wrote the file.
+   * @param writer the code for the writer from OrcProto.Footer
+   * @param version the orcVersion from OrcProto.Footer
+   * @return the version string
+   */
+  public static String getSoftwareVersion(int writer,
+                                          String version) {
+    String base;
+    switch (writer) {
+      case 0:
+        base = "ORC Java";
+        break;
+      case 1:
+        base = "ORC C++";
+        break;
+      case 2:
+        base = "Presto";
+        break;
+      case 3:
+        base = "Scritchley Go";
+        break;
+      case 4:
+        base = "Trino";
+        break;
+      default:
+        base = String.format("Unknown(%d)", writer);
+        break;
+    }
+    if (version == null) {
+      return base;
+    } else {
+      return base + " " + version;
+    }
+  }
+
+  /**
+   * Get the software version from Maven.
+   * @return The version of the software.
+   */
+  public static String getOrcVersion() {
+    Class<OrcFile> cls = OrcFile.class;
+    // try to load from maven properties first
+    try (InputStream is = cls.getResourceAsStream(
+        "/META-INF/maven/org.apache.orc/orc-core/pom.properties")) {
+      if (is != null) {
+        Properties p = new Properties();
+        p.load(is);
+        String version = p.getProperty("version", null);
+        if (version != null) {
+          return version;
+        }
+      }
+    } catch (IOException e) {
+      // ignore
+    }
+
+    // fallback to using Java API
+    Package aPackage = cls.getPackage();
+    if (aPackage != null) {
+      String version = aPackage.getImplementationVersion();
+      if (version != null) {
+        return version;
+      }
+    }
+    return "unknown";
+  }
 }
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index f0c613c..b452753 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -140,6 +140,16 @@ public interface Reader extends Closeable {
   OrcFile.WriterVersion getWriterVersion();
 
   /**
+   * Get the implementation and version of the software that wrote the file.
+   * It defaults to "ORC Java" for old files. For current files, we include the
+   * version also.
+   * @since 1.5.13
+   * @return returns the writer implementation and hopefully the version of the
+   *   software
+   */
+  String getSoftwareVersion();
+
+  /**
    * Get the file tail (footer + postscript)
    *
    * @return - file tail
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index f04e3fb..c9f110d 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -89,6 +89,7 @@ public class ReaderImpl implements Reader {
   protected final boolean useUTCTimestamp;
   private final List<Integer> versionList;
   private final OrcFile.WriterVersion writerVersion;
+  private final String softwareVersion;
 
   protected final OrcTail tail;
 
@@ -284,6 +285,11 @@ public class ReaderImpl implements Reader {
   }
 
   @Override
+  public String getSoftwareVersion() {
+    return softwareVersion;
+  }
+
+  @Override
   public OrcProto.FileTail getFileTail() {
     return tail.getFileTail();
   }
@@ -551,6 +557,7 @@ public class ReaderImpl implements Reader {
       this.userMetadata = null; // not cached and not needed here
       // FileMetadata is obsolete and doesn't support encryption
       this.encryption = new ReaderEncryption();
+      this.softwareVersion = null;
     } else {
       OrcTail orcTail = options.getOrcTail();
       if (orcTail == null) {
@@ -573,8 +580,12 @@ public class ReaderImpl implements Reader {
       this.writerVersion = tail.getWriterVersion();
       this.stripes = tail.getStripes();
       this.stripeStatistics = null;
-      this.encryption = new ReaderEncryption(tail.getFooter(), schema,
-          tail.getStripeStatisticsOffset(), tail.getTailBuffer(), stripes, options.getKeyProvider(), conf);
+      OrcProto.Footer footer = tail.getFooter();
+      this.encryption = new ReaderEncryption(footer, schema,
+          tail.getStripeStatisticsOffset(), tail.getTailBuffer(), stripes,
+          options.getKeyProvider(), conf);
+      this.softwareVersion = OrcUtils.getSoftwareVersion(footer.getWriter(),
+          footer.getSoftwareVersion());
     }
     this.types = OrcUtils.getOrcTypes(schema);
   }
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index fb1806d..f1af57b 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -659,6 +659,7 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
       builder.setEncryption(writeEncryptionFooter());
     }
     builder.setWriter(OrcFile.WriterImplementation.ORC_JAVA.getId());
+    builder.setSoftwareVersion(OrcUtils.getOrcVersion());
     physicalWriter.writeFileFooter(builder);
     return writePostScript();
   }
diff --git a/java/pom.xml b/java/pom.xml
index 15bf7d3..8c75937 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -228,6 +228,19 @@
     <pluginManagement>
       <plugins>
         <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-jar-plugin</artifactId>
+          <version>3.2.0</version>
+          <configuration>
+            <archive>                   
+              <manifest>
+                <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
+                <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
+              </manifest>
+            </archive>
+          </configuration>
+        </plugin>
+        <plugin>
           <groupId>org.codehaus.mojo</groupId>
           <artifactId>findbugs-maven-plugin</artifactId>
           <version>3.0.5</version>
diff --git a/java/tools/src/java/org/apache/orc/tools/FileDump.java b/java/tools/src/java/org/apache/orc/tools/FileDump.java
index 189c68a..129c8c0 100644
--- a/java/tools/src/java/org/apache/orc/tools/FileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/FileDump.java
@@ -338,7 +338,8 @@ public final class FileDump {
     TypeDescription schema = reader.getSchema();
     System.out.println("Structure for " + filename);
     System.out.println("File Version: " + reader.getFileVersion().getName() +
-        " with " + reader.getWriterVersion());
+        " with " + reader.getWriterVersion() + " by " +
+        reader.getSoftwareVersion());
     RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
     System.out.println("Rows: " + reader.getNumberOfRows());
     System.out.println("Compression: " + reader.getCompressionKind());
diff --git a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
index a2664f8..46d87b5 100644
--- a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
@@ -88,6 +88,7 @@ public class JsonFileDump {
         }
         writer.key("fileVersion").value(reader.getFileVersion().getName());
         writer.key("writerVersion").value(reader.getWriterVersion());
+        writer.key("softwareVersion").value(reader.getSoftwareVersion());
         RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
         writer.key("numberOfRows").value(reader.getNumberOfRows());
         writer.key("compression").value(reader.getCompressionKind());
diff --git a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
index 9c39842..9cedd31 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
@@ -19,6 +19,7 @@
 package org.apache.orc.tools;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assume.assumeTrue;
 
 import java.io.BufferedReader;
@@ -34,8 +35,11 @@ import java.text.SimpleDateFormat;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Random;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -63,6 +67,7 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
+
 public class TestFileDump {
 
   Path workDir = new Path(System.getProperty("test.tmp.dir"));
@@ -181,22 +186,55 @@ public class TestFileDump {
     ((BytesColumnVector) struct.fields[1]).setVal(row, sts.getBytes(StandardCharsets.UTF_8));
   }
 
+  private static final Pattern ignoreTailPattern =
+      Pattern.compile("^(?<head>File Version|\"softwareVersion\"): .*");
+  private static final Pattern fileSizePattern =
+      Pattern.compile("^(\"fileLength\"|File length): (?<size>[0-9]+).*");
+  // Allow file size to be up to 100 bytes larger.
+  private static final int SIZE_SLOP = 100;
+
+  /**
+   * Preprocess the string for matching.
+   * If it matches the fileSizePattern, we return the file size as a Long.
+   * @param line the input line
+   * @return the processed line or a Long with the file size
+   */
+  private static Object preprocessLine(String line) {
+    if (line == null) {
+      return line;
+    }
+    line = line.trim();
+    Matcher match = fileSizePattern.matcher(line);
+    if (match.matches()) {
+      return Long.parseLong(match.group("size"));
+    }
+    match = ignoreTailPattern.matcher(line);
+    if (match.matches()) {
+      return match.group("head");
+    }
+    return line;
+  }
+
+  /**
+   * Compare two files for equivalence.
+   * @param expected Loaded from the class path
+   * @param actual Loaded from the file system
+   */
   public static void checkOutput(String expected,
                                  String actual) throws Exception {
-    BufferedReader eStream = Files.newBufferedReader(Paths.get(TestJsonFileDump.getFileFromClasspath(expected)), StandardCharsets.UTF_8);
+    BufferedReader eStream = Files.newBufferedReader(Paths.get(
+        TestJsonFileDump.getFileFromClasspath(expected)), StandardCharsets.UTF_8);
     BufferedReader aStream = Files.newBufferedReader(Paths.get(actual), StandardCharsets.UTF_8);
-    String expectedLine = eStream.readLine();
-    if (expectedLine != null) {
-      expectedLine = expectedLine.trim();
-    }
+    Object expectedLine = preprocessLine(eStream.readLine());
     while (expectedLine != null) {
-      String actualLine = aStream.readLine();
-      if (actualLine != null) {
-        actualLine = actualLine.trim();
+      Object actualLine = preprocessLine(aStream.readLine());
+      if (expectedLine instanceof Long && actualLine instanceof Long) {
+        long diff = (Long) actualLine - (Long) expectedLine;
+        assertTrue("expected: " + expectedLine + ", actual: " + actualLine, diff < SIZE_SLOP);
+      } else {
+        assertEquals(expectedLine, actualLine);
       }
-      Assert.assertEquals(expectedLine, actualLine);
-      expectedLine = eStream.readLine();
-      expectedLine = expectedLine == null ? null : expectedLine.trim();
+      expectedLine = preprocessLine(eStream.readLine());
     }
     Assert.assertNull(eStream.readLine());
     Assert.assertNull(aStream.readLine());
diff --git a/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java b/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
index 1e8a540..9f5dce9 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
@@ -68,26 +68,6 @@ public class TestJsonFileDump {
     fs.delete(testFilePath, false);
   }
 
-  static void checkOutput(String expected,
-                                  String actual) throws Exception {
-//    BufferedReader eStream =
-//        new BufferedReader(new FileReader(getFileFromClasspath(expected)));
-//    new BufferedReader(new FileReader(getFileFromClasspath(expected)));
-    BufferedReader eStream  = Files
-        .newBufferedReader(Paths.get(getFileFromClasspath(expected)), StandardCharsets.UTF_8);
-//    BufferedReader aStream =
-//        new BufferedReader(new FileReader(actual));
-    BufferedReader aStream = Files.newBufferedReader(Paths.get(actual), StandardCharsets.UTF_8);
-    String expectedLine = eStream.readLine();
-    while (expectedLine != null) {
-      String actualLine = aStream.readLine();
-      assertEquals(expectedLine, actualLine);
-      expectedLine = eStream.readLine();
-    }
-    assertNull(eStream.readLine());
-    assertNull(aStream.readLine());
-  }
-
   @Test
   public void testJsonDump() throws Exception {
     TypeDescription schema =
@@ -150,6 +130,6 @@ public class TestJsonFileDump {
     System.setOut(origOut);
 
 
-    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+    TestFileDump.checkOutput(outputFilename, workDir + File.separator + outputFilename);
   }
 }
diff --git a/java/tools/src/test/resources/orc-file-dump.json b/java/tools/src/test/resources/orc-file-dump.json
index 3545efe..14e4316 100644
--- a/java/tools/src/test/resources/orc-file-dump.json
+++ b/java/tools/src/test/resources/orc-file-dump.json
@@ -2,6 +2,7 @@
   "fileName": "TestFileDump.testDump.orc",
   "fileVersion": "0.12",
   "writerVersion": "ORC_14",
+  "softwareVersion": "ORC Java 1.8.0-SNAPSHOT",
   "numberOfRows": 21000,
   "compression": "ZLIB",
   "compressionBufferSize": 4096,
@@ -1361,7 +1362,7 @@
       }]
     }
   ],
-  "fileLength": 272513,
+  "fileLength": 272529,
   "paddingLength": 0,
   "paddingRatio": 0,
   "status": "OK"
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index 5e4f3bc..e8b84db 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -371,6 +371,11 @@ message Footer {
   // information about the encryption in this file
   optional Encryption encryption = 10;
   optional CalendarKind calendar = 11;
+
+  // informative description about the version of the software that wrote
+  // the file. It is assumed to be within a given writer, so for example
+  // ORC 1.7.2 = "1.7.2". It may include suffixes, such as "-SNAPSHOT".
+  optional string softwareVersion = 12;
 }
 
 enum CompressionKind {
diff --git a/tools/src/FileMetadata.cc b/tools/src/FileMetadata.cc
index c4784fd..b3591dc 100644
--- a/tools/src/FileMetadata.cc
+++ b/tools/src/FileMetadata.cc
@@ -106,6 +106,7 @@ void printMetadata(std::ostream & out, const char*filename, bool verbose) {
   out << "  \"format\": \"" << reader->getFormatVersion().toString()
       << "\", \"writer version\": \""
             << orc::writerVersionToString(reader->getWriterVersion())
+            << "\", \"software version\": \"" << reader->getSoftwareVersion()
             << "\",\n";
   out << "  \"compression\": \""
             << orc::compressionKindToString(reader->getCompression())
diff --git a/tools/test/TestFileMetadata.cc b/tools/test/TestFileMetadata.cc
index 45d6d5b..649c2ac 100644
--- a/tools/test/TestFileMetadata.cc
+++ b/tools/test/TestFileMetadata.cc
@@ -166,7 +166,7 @@ TEST (TestFileMetadata, testJson) {
     "  \"type\": \"struct<userid:bigint,string1:string,subtype:double,decimal1:decimal(0,0),ts:timestamp>\",\n"
     "  \"rows\": 25000,\n"
     "  \"stripe count\": 5,\n"
-    "  \"format\": \"0.12\", \"writer version\": \"original\",\n"
+    "  \"format\": \"0.12\", \"writer version\": \"original\", \"software version\": \"ORC Java\",\n"
     "  \"compression\": \"none\",\n"
     "  \"file length\": 246402,\n"
     "  \"content\": 245568, \"stripe stats\": 526, \"footer\": 288, \"postscript\": 19,\n"
@@ -215,7 +215,7 @@ TEST (TestFileMetadata, testNoFormat) {
     "  \"type\": \"struct<_col0:array<string>,_col1:map<int,string>,_col2:struct<name:string,score:int>>\",\n"
     "  \"rows\": 5,\n"
     "  \"stripe count\": 1,\n"
-    "  \"format\": \"0.11\", \"writer version\": \"original\",\n"
+    "  \"format\": \"0.11\", \"writer version\": \"original\", \"software version\": \"ORC Java\",\n"
     "  \"compression\": \"zlib\", \"compression block\": 262144,\n"
     "  \"file length\": 745,\n"
     "  \"content\": 525, \"stripe stats\": 0, \"footer\": 210, \"postscript\": 9,\n"
@@ -237,3 +237,4 @@ TEST (TestFileMetadata, testNoFormat) {
   EXPECT_EQ(expected, output);
   EXPECT_EQ("", error);
 }
+
diff --git a/tools/test/TestMatch.cc b/tools/test/TestMatch.cc
index fc4f6f9..9a7de0d 100644
--- a/tools/test/TestMatch.cc
+++ b/tools/test/TestMatch.cc
@@ -38,6 +38,7 @@ namespace orc {
     std::string json;
     std::string typeString;
     std::string formatVersion;
+    std::string softwareVersion;
     uint64_t rowCount;
     uint64_t contentLength;
     uint64_t stripeCount;
@@ -50,6 +51,7 @@ namespace orc {
                        const std::string& _json,
                        const std::string& _typeString,
                        const std::string& _version,
+                       const std::string& _softwareVersion,
                        uint64_t _rowCount,
                        uint64_t _contentLength,
                        uint64_t _stripeCount,
@@ -61,6 +63,7 @@ namespace orc {
                           json(_json),
                           typeString(_typeString),
                           formatVersion(_version),
+                          softwareVersion(_softwareVersion),
                           rowCount(_rowCount),
                           contentLength(_contentLength),
                           stripeCount(_stripeCount),
@@ -111,6 +114,7 @@ namespace orc {
     EXPECT_EQ(GetParam().rowIndexStride, reader->getRowIndexStride());
     EXPECT_EQ(GetParam().contentLength, reader->getContentLength());
     EXPECT_EQ(GetParam().formatVersion, reader->getFormatVersion().toString());
+    EXPECT_EQ(GetParam().softwareVersion, reader->getSoftwareVersion());
     EXPECT_EQ(getFilename(), reader->getStreamName());
     EXPECT_EQ(GetParam().userMeta.size(), reader->getMetadataKeys().size());
     for(std::map<std::string, std::string>::const_iterator itr =
@@ -163,6 +167,7 @@ namespace orc {
                                        "TestOrcFile.columnProjection.jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        21000,
                                        428406,
                                        5,
@@ -182,6 +187,7 @@ namespace orc {
                                        "string>>,map:map<string,struct<int1:"
                                        "int,string1:string>>>",
                                        "0.12",
+                                       "ORC Java",
                                        0,
                                        3,
                                        0,
@@ -201,6 +207,7 @@ namespace orc {
                                        "string>>,map:map<string,struct<int1:"
                                        "int,string1:string>>>",
                                        "0.12",
+                                       "ORC Java",
                                        1,
                                        980,
                                        1,
@@ -220,6 +227,7 @@ namespace orc {
                                        "string>>,map:map<string,struct<int1:"
                                        "int,string1:string>>>",
                                        "0.12",
+                                       "ORC Java",
                                        2,
                                        1015,
                                        1,
@@ -233,6 +241,7 @@ namespace orc {
                                        ".jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.11",
+                                       "ORC Java",
                                        2500,
                                        18779,
                                        25,
@@ -246,6 +255,7 @@ namespace orc {
                                        ".jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        2500,
                                        10618,
                                        4,
@@ -258,6 +268,7 @@ namespace orc {
                                        ".jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        3500,
                                        15529,
                                        1,
@@ -276,6 +287,7 @@ namespace orc {
                                        "string1:string>>,map:map<string,"
                                        "struct<int1:int,string1:string>>>",
                                        "0.12",
+                                       "ORC Java",
                                        32768,
                                        1896379,
                                        7,
@@ -287,6 +299,7 @@ namespace orc {
                                        "TestOrcFile.testSnappy.jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        10000,
                                        126061,
                                        2,
@@ -300,6 +313,7 @@ namespace orc {
                                        "istics.jsn.gz",
                                        "struct<bytes1:binary,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        4,
                                        185,
                                        1,
@@ -312,6 +326,7 @@ namespace orc {
                                        ".jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        11000,
                                        597,
                                        3,
@@ -323,6 +338,7 @@ namespace orc {
                                        "TestOrcFile.testTimestamp.jsn.gz",
                                        "timestamp",
                                        "0.11",
+                                       "ORC Java",
                                        12,
                                        188,
                                        1,
@@ -336,6 +352,7 @@ namespace orc {
                                        "struct<time:timestamp,union:uniontype"
                                        "<int,string>,decimal:decimal(38,18)>",
                                        "0.12",
+                                       "ORC Java",
                                        5077,
                                        20906,
                                        2,
@@ -347,6 +364,7 @@ namespace orc {
                                        "TestOrcFile.testWithoutIndex.jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        50000,
                                        214643,
                                        10,
@@ -358,6 +376,7 @@ namespace orc {
                                        "decimal.jsn.gz",
                                        "struct<_col0:decimal(10,5)>",
                                        "0.12",
+                                       "ORC Java",
                                        6000,
                                        16186,
                                        1,
@@ -372,6 +391,7 @@ namespace orc {
                                         "_col5:string,_col6:int,_col7:int,"
                                         "_col8:int>"),
                                        "0.11",
+                                       "ORC Java",
                                        1920800,
                                        5069718,
                                        385,
@@ -386,6 +406,7 @@ namespace orc {
                                         "_col5:string,_col6:int,_col7:int,"
                                         "_col8:int>"),
                                        "0.11",
+                                       "ORC Java",
                                        1920800,
                                        396823,
                                        385,
@@ -400,6 +421,7 @@ namespace orc {
                                         "_col5:string,_col6:int,_col7:int,"
                                         "_col8:int>"),
                                        "0.12",
+                                       "ORC Java",
                                        1920800,
                                        45592,
                                        1,
@@ -413,6 +435,7 @@ namespace orc {
                                         "_col2:int,_col3:bigint,_col4:float,"
                                         "_col5:double,_col6:boolean>"),
                                        "0.12",
+                                       "ORC Java",
                                        70000,
                                        366347,
                                        1,
@@ -434,6 +457,7 @@ namespace orc {
                                         "string>>,ts:timestamp,"
                                         "decimal1:decimal(0,0)>"),
                                        "0.11",
+                                       "ORC Java",
                                        7500,
                                        372542,
                                        2,
@@ -448,17 +472,34 @@ namespace orc {
 					"decimal1:decimal(16,6),"
                                         "ts:timestamp>"),
                                        "0.12",
+                                       "ORC Java 1.8.0-SNAPSHOT",
                                        25000,
-                                       1981,
+                                       1980,
                                        1,
                                        CompressionKind_ZLIB,
                                        262144,
                                        10000,
                                        std::map<std::string, std::string>()),
+                    OrcFileDescription("orc_split_elim_cpp.orc",
+                                       "orc_split_elim_cpp.jsn.gz",
+                                       ("struct<userid:bigint,string1:string,"
+                                        "subtype:double,"
+                                        "decimal1:decimal(16,6),"
+                                        "ts:timestamp>"),
+                                       "0.12",
+                                       "ORC C++ 1.8.0-SNAPSHOT",
+                                       25000,
+                                       2942,
+                                       1,
+                                       CompressionKind_ZLIB,
+                                       65536,
+                                       10000,
+                                       std::map<std::string, std::string>()),
                     OrcFileDescription("orc_index_int_string.orc",
                                        "orc_index_int_string.jsn.gz",
                                        ("struct<_col0:int,_col1:varchar(4)>"),
                                        "0.12",
+                                       "ORC Java",
                                        6000,
                                        11280,
                                        1,
@@ -474,6 +515,7 @@ namespace orc {
                                        "_col7:string,_col8:timestamp,"
                                        "_col9:decimal(4,2),_col10:binary>",
                                        "0.12",
+                                       "ORC Java",
                                        2098,
                                        41780,
                                        2,
@@ -485,6 +527,7 @@ namespace orc {
                                        "TestVectorOrcFile.testLz4.jsn.gz",
                                        "struct<x:bigint,y:int,z:bigint>",
                                        "0.12",
+                                       "ORC Java",
                                        10000,
                                        120952,
                                        2,
@@ -496,6 +539,7 @@ namespace orc {
                                        "TestVectorOrcFile.testLzo.jsn.gz",
                                        "struct<x:bigint,y:int,z:bigint>",
                                        "0.12",
+                                       "ORC Java",
                                        10000,
                                        120955,
                                        2,
@@ -512,6 +556,7 @@ INSTANTIATE_TEST_CASE_P(TestMatch1900, FileParam,
                                        "TestOrcFile.testDate1900.jsn.gz",
                                        "struct<time:timestamp,date:date>",
                                        "0.12",
+                                       "ORC Java",
                                        70000,
                                        30478,
                                        8,
@@ -529,6 +574,7 @@ INSTANTIATE_TEST_CASE_P(TestMatch1900, FileParam,
                                        "TestOrcFile.testDate2038.jsn.gz",
                                        "struct<time:timestamp,date:date>",
                                        "0.12",
+                                       "ORC Java",
                                        212000,
                                        94762,
                                        28,