You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/09/12 21:16:47 UTC

[orc] branch branch-1.5 updated: ORC-984: Save the software version that wrote each ORC file. (#904)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/branch-1.5 by this push:
     new cfd6746  ORC-984: Save the software version that wrote each ORC file. (#904)
cfd6746 is described below

commit cfd67464d8b7b24bccde00a8153c23e6a466c0e1
Author: Owen O'Malley <oo...@linkedin.com>
AuthorDate: Sun Sep 12 11:45:51 2021 -0700

    ORC-984: Save the software version that wrote each ORC file. (#904)
    
    I add a string to the file footer that records the version that wrote the file. We already had recorded the implementation that wrote the file as Footer.writer. I added a method that combines these two fields in the reader to produce a user facing string that describes the software version.
    
    I also add a field to the meta data tool to show the version that wrote the file.
    
    Because of that change and the fact that the files change size based on whether the ORC version is a snapshot or not, I had to extend the tests for TestFileDump to allow some slop for the size and to ignore the file version.
    
    It passes the unit tests after I updated the tools tests.
    
    (cherry picked from commit cf720d7b2333618c652445299486cb9600bdbe4f)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
    (cherry picked from commit 7e3d557a80f5ada1b2a927ee7e3ea9e91a115c6c)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
    (cherry picked from commit 2aa9b0651c08825287ffbc49325712ab0ef5327a)
    Signed-off-by: Dongjoon Hyun <do...@apache.org>
---
 c++/include/orc/Common.hh                          |   5 ++
 c++/include/orc/Reader.hh                          |   6 ++
 c++/src/Common.cc                                  |  26 ++++++++
 c++/src/Reader.cc                                  |   9 +++
 c++/src/Reader.hh                                  |   2 +
 c++/src/Writer.cc                                  |   1 +
 examples/expected/orc_split_elim_cpp.jsn.gz        | Bin 0 -> 8980 bytes
 examples/orc_split_elim_cpp.orc                    | Bin 0 -> 3339 bytes
 examples/orc_split_elim_new.orc                    | Bin 2341 -> 2396 bytes
 java/core/src/findbugs/exclude.xml                 |   5 ++
 java/core/src/java/org/apache/orc/OrcUtils.java    |  69 +++++++++++++++++++++
 java/core/src/java/org/apache/orc/Reader.java      |  10 +++
 .../src/java/org/apache/orc/impl/ReaderImpl.java   |  10 +++
 .../src/java/org/apache/orc/impl/WriterImpl.java   |   1 +
 java/pom.xml                                       |  13 ++++
 .../src/java/org/apache/orc/tools/FileDump.java    |   3 +-
 .../java/org/apache/orc/tools/JsonFileDump.java    |   1 +
 .../test/org/apache/orc/tools/TestFileDump.java    |  63 ++++++++++++++++---
 .../org/apache/orc/tools/TestJsonFileDump.java     |  18 +-----
 java/tools/src/test/resources/orc-file-dump.json   |   3 +-
 proto/orc_proto.proto                              |   5 ++
 tools/src/FileMetadata.cc                          |   1 +
 tools/test/TestFileMetadata.cc                     |   2 +-
 tools/test/TestMatch.cc                            |  48 +++++++++++++-
 24 files changed, 270 insertions(+), 31 deletions(-)

diff --git a/c++/include/orc/Common.hh b/c++/include/orc/Common.hh
index 440c529..0fa8f22 100644
--- a/c++/include/orc/Common.hh
+++ b/c++/include/orc/Common.hh
@@ -71,6 +71,8 @@ namespace orc {
     UNKNOWN_WRITER = INT32_MAX
   };
 
+  std::string writerIdToString(uint32_t id);
+
   enum CompressionKind {
     CompressionKind_NONE = 0,
     CompressionKind_ZLIB = 1,
@@ -94,6 +96,9 @@ namespace orc {
     WriterVersion_HIVE_13083 = 4,
     WriterVersion_ORC_101 = 5,
     WriterVersion_ORC_135 = 6,
+    WriterVersion_ORC_517 = 7,
+    WriterVersion_ORC_203 = 8,
+    WriterVersion_ORC_14 = 9,
     WriterVersion_MAX = INT32_MAX
   };
 
diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh
index b5e2e7e..c0b1701 100644
--- a/c++/include/orc/Reader.hh
+++ b/c++/include/orc/Reader.hh
@@ -256,6 +256,12 @@ namespace orc {
     virtual uint64_t getNumberOfRows() const = 0;
 
     /**
+     * Get the software instance and version that wrote this file.
+     * @return a user-facing string that specifies the software version
+     */
+    virtual std::string getSoftwareVersion() const = 0;
+
+    /**
      * Get the user metadata keys.
      * @return the set of metadata keys
      */
diff --git a/c++/src/Common.cc b/c++/src/Common.cc
index 0542188..9183c27 100644
--- a/c++/src/Common.cc
+++ b/c++/src/Common.cc
@@ -58,12 +58,38 @@ namespace orc {
         return "ORC-101";
       case WriterVersion_ORC_135:
         return "ORC-135";
+      case WriterVersion_ORC_517:
+        return "ORC-517";
+      case WriterVersion_ORC_203:
+        return "ORC-203";
+      case WriterVersion_ORC_14:
+        return "ORC-14";
     }
     std::stringstream buffer;
     buffer << "future - " << version;
     return buffer.str();
   }
 
+  std::string writerIdToString(uint32_t id) {
+    switch (id) {
+      case ORC_JAVA_WRITER:
+        return "ORC Java";
+      case ORC_CPP_WRITER:
+        return "ORC C++";
+      case PRESTO_WRITER:
+        return "Presto";
+      case SCRITCHLEY_GO:
+        return "Scritchley Go";
+      case TRINO_WRITER:
+        return "Trino";
+      default: {
+        std::ostringstream buffer;
+        buffer << "Unknown(" << id << ")";
+        return buffer.str();
+      }
+    }
+  }
+
   std::string streamKindToString(StreamKind kind) {
     switch (static_cast<int>(kind)) {
       case StreamKind_PRESENT:
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index 5e80a75..2d8f5d9 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -437,6 +437,15 @@ namespace orc {
     }
   }
 
+  std::string ReaderImpl::getSoftwareVersion() const {
+    std::ostringstream buffer;
+    buffer << writerIdToString(getWriterIdValue());
+    if (footer->has_softwareversion()) {
+      buffer << " " << footer->softwareversion();
+    }
+    return buffer.str();
+  }
+
   WriterVersion ReaderImpl::getWriterVersion() const {
     if (!contents->postscript->has_writerversion()) {
       return WriterVersion_ORIGINAL;
diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh
index 03ef9dd..cdf5696 100644
--- a/c++/src/Reader.hh
+++ b/c++/src/Reader.hh
@@ -220,6 +220,8 @@ namespace orc {
 
     uint32_t getWriterIdValue() const override;
 
+    std::string getSoftwareVersion() const override;
+
     WriterVersion getWriterVersion() const override;
 
     uint64_t getNumberOfRows() const override;
diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc
index 016ce35..d28dc19 100644
--- a/c++/src/Writer.cc
+++ b/c++/src/Writer.cc
@@ -323,6 +323,7 @@ namespace orc {
     fileFooter.set_rowindexstride(
                           static_cast<uint32_t>(options.getRowIndexStride()));
     fileFooter.set_writer(writerId);
+    fileFooter.set_softwareversion(ORC_VERSION);
 
     uint32_t index = 0;
     buildFooterType(type, fileFooter, index);
diff --git a/examples/expected/orc_split_elim_cpp.jsn.gz b/examples/expected/orc_split_elim_cpp.jsn.gz
new file mode 100644
index 0000000..5263699
Binary files /dev/null and b/examples/expected/orc_split_elim_cpp.jsn.gz differ
diff --git a/examples/orc_split_elim_cpp.orc b/examples/orc_split_elim_cpp.orc
new file mode 100644
index 0000000..86921f3
Binary files /dev/null and b/examples/orc_split_elim_cpp.orc differ
diff --git a/examples/orc_split_elim_new.orc b/examples/orc_split_elim_new.orc
index cdbe477..24e58f1 100644
Binary files a/examples/orc_split_elim_new.orc and b/examples/orc_split_elim_new.orc differ
diff --git a/java/core/src/findbugs/exclude.xml b/java/core/src/findbugs/exclude.xml
index 2fe2408..8a77c14 100644
--- a/java/core/src/findbugs/exclude.xml
+++ b/java/core/src/findbugs/exclude.xml
@@ -34,6 +34,11 @@
   <!-- Java's try with resources causes a false positive.
        See https://github.com/SERG-Delft/jpacman/pull/27 . -->
   <Match>
+    <Bug pattern="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE"/>
+    <Class name="org.apache.orc.OrcUtils"/>
+    <Method name="getOrcVersion"/>
+  </Match>
+  <Match>
     <Bug pattern="RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE"/>
     <Class name="org.apache.orc.impl.OrcAcidUtils"/>
     <Method name="getLastFlushLength"/>
diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java b/java/core/src/java/org/apache/orc/OrcUtils.java
index 220fa13..203f189 100644
--- a/java/core/src/java/org/apache/orc/OrcUtils.java
+++ b/java/core/src/java/org/apache/orc/OrcUtils.java
@@ -21,9 +21,11 @@ import org.apache.orc.impl.ReaderImpl;
 import org.apache.orc.impl.SchemaEvolution;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Properties;
 
 import static org.apache.hadoop.util.StringUtils.COMMA_STR;
 
@@ -623,4 +625,71 @@ public class OrcUtils {
     return result;
   }
 
+  /**
+   * Get the user-facing version string for the software that wrote the file.
+   * @param writer the code for the writer from OrcProto.Footer
+   * @param version the orcVersion from OrcProto.Footer
+   * @return the version string
+   */
+  public static String getSoftwareVersion(int writer,
+                                          String version) {
+    String base;
+    switch (writer) {
+      case 0:
+        base = "ORC Java";
+        break;
+      case 1:
+        base = "ORC C++";
+        break;
+      case 2:
+        base = "Presto";
+        break;
+      case 3:
+        base = "Scritchley Go";
+        break;
+      case 4:
+        base = "Trino";
+        break;
+      default:
+        base = String.format("Unknown(%d)", writer);
+        break;
+    }
+    if (version == null) {
+      return base;
+    } else {
+      return base + " " + version;
+    }
+  }
+
+  /**
+   * Get the software version from Maven.
+   * @return The version of the software.
+   */
+  public static String getOrcVersion() {
+    Class<OrcFile> cls = OrcFile.class;
+    // try to load from maven properties first
+    try (InputStream is = cls.getResourceAsStream(
+        "/META-INF/maven/org.apache.orc/orc-core/pom.properties")) {
+      if (is != null) {
+        Properties p = new Properties();
+        p.load(is);
+        String version = p.getProperty("version", null);
+        if (version != null) {
+          return version;
+        }
+      }
+    } catch (IOException e) {
+      // ignore
+    }
+
+    // fallback to using Java API
+    Package aPackage = cls.getPackage();
+    if (aPackage != null) {
+      String version = aPackage.getImplementationVersion();
+      if (version != null) {
+        return version;
+      }
+    }
+    return "unknown";
+  }
 }
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index 8403e99..82376a6 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -140,6 +140,16 @@ public interface Reader extends Closeable {
   OrcFile.WriterVersion getWriterVersion();
 
   /**
+   * Get the implementation and version of the software that wrote the file.
+   * It defaults to "ORC Java" for old files. For current files, we include the
+   * version also.
+   * @since 1.5.13
+   * @return returns the writer implementation and hopefully the version of the
+   *   software
+   */
+  String getSoftwareVersion();
+
+  /**
    * Get the file tail (footer + postscript)
    *
    * @return - file tail
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index 34da133..f59537c 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -84,6 +84,7 @@ public class ReaderImpl implements Reader {
   protected final boolean useUTCTimestamp;
   private final List<Integer> versionList;
   private final OrcFile.WriterVersion writerVersion;
+  private final String softwareVersion;
 
   protected OrcTail tail;
 
@@ -215,6 +216,11 @@ public class ReaderImpl implements Reader {
   }
 
   @Override
+  public String getSoftwareVersion() {
+    return softwareVersion;
+  }
+
+  @Override
   public OrcProto.FileTail getFileTail() {
     return tail.getFileTail();
   }
@@ -364,6 +370,7 @@ public class ReaderImpl implements Reader {
       this.fileStats = fileMetadata.getFileStats();
       this.stripes = fileMetadata.getStripes();
       this.userMetadata = null; // not cached and not needed here
+      this.softwareVersion = null;
     } else {
       OrcTail orcTail = options.getOrcTail();
       if (orcTail == null) {
@@ -386,6 +393,9 @@ public class ReaderImpl implements Reader {
       this.writerVersion = tail.getWriterVersion();
       this.stripes = tail.getStripes();
       this.stripeStats = tail.getStripeStatisticsProto();
+      OrcProto.Footer footer = tail.getFooter();
+      this.softwareVersion = OrcUtils.getSoftwareVersion(footer.getWriter(),
+          footer.getSoftwareVersion());
     }
     OrcUtils.isValidTypeTree(this.types, 0);
     this.schema = OrcUtils.convertTypeFromProtobuf(this.types, 0);
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index 6859c87..d21a240 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -558,6 +558,7 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
         .setName(entry.getKey()).setValue(entry.getValue()));
     }
     builder.setWriter(OrcFile.WriterImplementation.ORC_JAVA.getId());
+    builder.setSoftwareVersion(OrcUtils.getOrcVersion());
     physicalWriter.writeFileFooter(builder);
     return writePostScript();
   }
diff --git a/java/pom.xml b/java/pom.xml
index 8b3db9b..95c4052 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -207,6 +207,19 @@
     <pluginManagement>
       <plugins>
         <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-jar-plugin</artifactId>
+          <version>3.2.0</version>
+          <configuration>
+            <archive>                   
+              <manifest>
+                <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
+                <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
+              </manifest>
+            </archive>
+          </configuration>
+        </plugin>
+        <plugin>
           <groupId>org.codehaus.mojo</groupId>
           <artifactId>findbugs-maven-plugin</artifactId>
           <version>3.0.5</version>
diff --git a/java/tools/src/java/org/apache/orc/tools/FileDump.java b/java/tools/src/java/org/apache/orc/tools/FileDump.java
index a536f55..0b2006b 100644
--- a/java/tools/src/java/org/apache/orc/tools/FileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/FileDump.java
@@ -305,7 +305,8 @@ public final class FileDump {
     TypeDescription schema = reader.getSchema();
     System.out.println("Structure for " + filename);
     System.out.println("File Version: " + reader.getFileVersion().getName() +
-        " with " + reader.getWriterVersion());
+        " with " + reader.getWriterVersion() + " by " +
+        reader.getSoftwareVersion());
     RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
     System.out.println("Rows: " + reader.getNumberOfRows());
     System.out.println("Compression: " + reader.getCompressionKind());
diff --git a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
index e1d6301..d77df46 100644
--- a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
@@ -87,6 +87,7 @@ public class JsonFileDump {
         }
         writer.key("fileVersion").value(reader.getFileVersion().getName());
         writer.key("writerVersion").value(reader.getWriterVersion());
+        writer.key("softwareVersion").value(reader.getSoftwareVersion());
         RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
         writer.key("numberOfRows").value(reader.getNumberOfRows());
         writer.key("compression").value(reader.getCompressionKind());
diff --git a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
index bfb073c..0cdb45a 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
@@ -19,6 +19,7 @@
 package org.apache.orc.tools;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 import static org.junit.Assume.assumeTrue;
 
 import java.io.BufferedReader;
@@ -28,13 +29,18 @@ import java.io.FileOutputStream;
 import java.io.FileReader;
 import java.io.PrintStream;
 import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Paths;
 import java.sql.Timestamp;
 import java.text.SimpleDateFormat;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Random;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -63,6 +69,7 @@ import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 
+
 public class TestFileDump {
 
   Path workDir = new Path(System.getProperty("test.tmp.dir"));
@@ -181,19 +188,55 @@ public class TestFileDump {
     ((BytesColumnVector) struct.fields[1]).setVal(row, sts.getBytes());
   }
 
+  private static final Pattern ignoreTailPattern =
+      Pattern.compile("^(?<head>File Version|\"softwareVersion\"): .*");
+  private static final Pattern fileSizePattern =
+      Pattern.compile("^(\"fileLength\"|File length): (?<size>[0-9]+).*");
+  // Allow file size to be up to 100 bytes larger.
+  private static final int SIZE_SLOP = 100;
+
+  /**
+   * Preprocess the string for matching.
+   * If it matches the fileSizePattern, we return the file size as a Long.
+   * @param line the input line
+   * @return the processed line or a Long with the file size
+   */
+  private static Object preprocessLine(String line) {
+    if (line == null) {
+      return line;
+    }
+    line = line.trim();
+    Matcher match = fileSizePattern.matcher(line);
+    if (match.matches()) {
+      return Long.parseLong(match.group("size"));
+    }
+    match = ignoreTailPattern.matcher(line);
+    if (match.matches()) {
+      return match.group("head");
+    }
+    return line;
+  }
+
+  /**
+   * Compare two files for equivalence.
+   * @param expected Loaded from the class path
+   * @param actual Loaded from the file system
+   */
   public static void checkOutput(String expected,
                                  String actual) throws Exception {
-    BufferedReader eStream =
-        new BufferedReader(new FileReader
-            (TestJsonFileDump.getFileFromClasspath(expected)));
-    BufferedReader aStream =
-        new BufferedReader(new FileReader(actual));
-    String expectedLine = eStream.readLine().trim();
+    BufferedReader eStream = Files.newBufferedReader(Paths.get(
+        TestJsonFileDump.getFileFromClasspath(expected)), StandardCharsets.UTF_8);
+    BufferedReader aStream = Files.newBufferedReader(Paths.get(actual), StandardCharsets.UTF_8);
+    Object expectedLine = preprocessLine(eStream.readLine());
     while (expectedLine != null) {
-      String actualLine = aStream.readLine().trim();
-      Assert.assertEquals(expectedLine, actualLine);
-      expectedLine = eStream.readLine();
-      expectedLine = expectedLine == null ? null : expectedLine.trim();
+      Object actualLine = preprocessLine(aStream.readLine());
+      if (expectedLine instanceof Long && actualLine instanceof Long) {
+        long diff = (Long) actualLine - (Long) expectedLine;
+        assertTrue("expected: " + expectedLine + ", actual: " + actualLine, diff < SIZE_SLOP);
+      } else {
+        assertEquals(expectedLine, actualLine);
+      }
+      expectedLine = preprocessLine(eStream.readLine());
     }
     Assert.assertNull(eStream.readLine());
     Assert.assertNull(aStream.readLine());
diff --git a/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java b/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
index eadc216..dd22e48 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
@@ -67,22 +67,6 @@ public class TestJsonFileDump {
     fs.delete(testFilePath, false);
   }
 
-  static void checkOutput(String expected,
-                                  String actual) throws Exception {
-    BufferedReader eStream =
-        new BufferedReader(new FileReader(getFileFromClasspath(expected)));
-    BufferedReader aStream =
-        new BufferedReader(new FileReader(actual));
-    String expectedLine = eStream.readLine();
-    while (expectedLine != null) {
-      String actualLine = aStream.readLine();
-      assertEquals(expectedLine, actualLine);
-      expectedLine = eStream.readLine();
-    }
-    assertNull(eStream.readLine());
-    assertNull(aStream.readLine());
-  }
-
   @Test
   public void testJsonDump() throws Exception {
     TypeDescription schema = TypeDescription.createStruct()
@@ -144,6 +128,6 @@ public class TestJsonFileDump {
     System.setOut(origOut);
 
 
-    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+    TestFileDump.checkOutput(outputFilename, workDir + File.separator + outputFilename);
   }
 }
diff --git a/java/tools/src/test/resources/orc-file-dump.json b/java/tools/src/test/resources/orc-file-dump.json
index 1e5d39b..4709c60 100644
--- a/java/tools/src/test/resources/orc-file-dump.json
+++ b/java/tools/src/test/resources/orc-file-dump.json
@@ -2,6 +2,7 @@
   "fileName": "TestFileDump.testDump.orc",
   "fileVersion": "0.12",
   "writerVersion": "ORC_517",
+  "softwareVersion": "ORC Java 1.8.0-SNAPSHOT",
   "numberOfRows": 21000,
   "compression": "ZLIB",
   "compressionBufferSize": 4096,
@@ -1367,7 +1368,7 @@
       }]
     }
   ],
-  "fileLength": 272488,
+  "fileLength": 272529,
   "paddingLength": 0,
   "paddingRatio": 0,
   "status": "OK"
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index 6ad396f..14dba2e 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -220,6 +220,11 @@ message Footer {
   optional uint32 writer = 9;
   // encryption is 10
   optional CalendarKind calendar = 11;
+
+  // informative description about the version of the software that wrote
+  // the file. It is assumed to be within a given writer, so for example
+  // ORC 1.7.2 = "1.7.2". It may include suffixes, such as "-SNAPSHOT".
+  optional string softwareVersion = 12;
 }
 
 enum CompressionKind {
diff --git a/tools/src/FileMetadata.cc b/tools/src/FileMetadata.cc
index c4784fd..b3591dc 100644
--- a/tools/src/FileMetadata.cc
+++ b/tools/src/FileMetadata.cc
@@ -106,6 +106,7 @@ void printMetadata(std::ostream & out, const char*filename, bool verbose) {
   out << "  \"format\": \"" << reader->getFormatVersion().toString()
       << "\", \"writer version\": \""
             << orc::writerVersionToString(reader->getWriterVersion())
+            << "\", \"software version\": \"" << reader->getSoftwareVersion()
             << "\",\n";
   out << "  \"compression\": \""
             << orc::compressionKindToString(reader->getCompression())
diff --git a/tools/test/TestFileMetadata.cc b/tools/test/TestFileMetadata.cc
index ec962e5..adefdd7 100644
--- a/tools/test/TestFileMetadata.cc
+++ b/tools/test/TestFileMetadata.cc
@@ -166,7 +166,7 @@ TEST (TestFileMetadata, testJson) {
     "  \"type\": \"struct<userid:bigint,string1:string,subtype:double,decimal1:decimal(0,0),ts:timestamp>\",\n"
     "  \"rows\": 25000,\n"
     "  \"stripe count\": 5,\n"
-    "  \"format\": \"0.12\", \"writer version\": \"original\",\n"
+    "  \"format\": \"0.12\", \"writer version\": \"original\", \"software version\": \"ORC Java\",\n"
     "  \"compression\": \"none\",\n"
     "  \"file length\": 246402,\n"
     "  \"content\": 245568, \"stripe stats\": 526, \"footer\": 288, \"postscript\": 19,\n"
diff --git a/tools/test/TestMatch.cc b/tools/test/TestMatch.cc
index fc4f6f9..9a7de0d 100644
--- a/tools/test/TestMatch.cc
+++ b/tools/test/TestMatch.cc
@@ -38,6 +38,7 @@ namespace orc {
     std::string json;
     std::string typeString;
     std::string formatVersion;
+    std::string softwareVersion;
     uint64_t rowCount;
     uint64_t contentLength;
     uint64_t stripeCount;
@@ -50,6 +51,7 @@ namespace orc {
                        const std::string& _json,
                        const std::string& _typeString,
                        const std::string& _version,
+                       const std::string& _softwareVersion,
                        uint64_t _rowCount,
                        uint64_t _contentLength,
                        uint64_t _stripeCount,
@@ -61,6 +63,7 @@ namespace orc {
                           json(_json),
                           typeString(_typeString),
                           formatVersion(_version),
+                          softwareVersion(_softwareVersion),
                           rowCount(_rowCount),
                           contentLength(_contentLength),
                           stripeCount(_stripeCount),
@@ -111,6 +114,7 @@ namespace orc {
     EXPECT_EQ(GetParam().rowIndexStride, reader->getRowIndexStride());
     EXPECT_EQ(GetParam().contentLength, reader->getContentLength());
     EXPECT_EQ(GetParam().formatVersion, reader->getFormatVersion().toString());
+    EXPECT_EQ(GetParam().softwareVersion, reader->getSoftwareVersion());
     EXPECT_EQ(getFilename(), reader->getStreamName());
     EXPECT_EQ(GetParam().userMeta.size(), reader->getMetadataKeys().size());
     for(std::map<std::string, std::string>::const_iterator itr =
@@ -163,6 +167,7 @@ namespace orc {
                                        "TestOrcFile.columnProjection.jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        21000,
                                        428406,
                                        5,
@@ -182,6 +187,7 @@ namespace orc {
                                        "string>>,map:map<string,struct<int1:"
                                        "int,string1:string>>>",
                                        "0.12",
+                                       "ORC Java",
                                        0,
                                        3,
                                        0,
@@ -201,6 +207,7 @@ namespace orc {
                                        "string>>,map:map<string,struct<int1:"
                                        "int,string1:string>>>",
                                        "0.12",
+                                       "ORC Java",
                                        1,
                                        980,
                                        1,
@@ -220,6 +227,7 @@ namespace orc {
                                        "string>>,map:map<string,struct<int1:"
                                        "int,string1:string>>>",
                                        "0.12",
+                                       "ORC Java",
                                        2,
                                        1015,
                                        1,
@@ -233,6 +241,7 @@ namespace orc {
                                        ".jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.11",
+                                       "ORC Java",
                                        2500,
                                        18779,
                                        25,
@@ -246,6 +255,7 @@ namespace orc {
                                        ".jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        2500,
                                        10618,
                                        4,
@@ -258,6 +268,7 @@ namespace orc {
                                        ".jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        3500,
                                        15529,
                                        1,
@@ -276,6 +287,7 @@ namespace orc {
                                        "string1:string>>,map:map<string,"
                                        "struct<int1:int,string1:string>>>",
                                        "0.12",
+                                       "ORC Java",
                                        32768,
                                        1896379,
                                        7,
@@ -287,6 +299,7 @@ namespace orc {
                                        "TestOrcFile.testSnappy.jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        10000,
                                        126061,
                                        2,
@@ -300,6 +313,7 @@ namespace orc {
                                        "istics.jsn.gz",
                                        "struct<bytes1:binary,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        4,
                                        185,
                                        1,
@@ -312,6 +326,7 @@ namespace orc {
                                        ".jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        11000,
                                        597,
                                        3,
@@ -323,6 +338,7 @@ namespace orc {
                                        "TestOrcFile.testTimestamp.jsn.gz",
                                        "timestamp",
                                        "0.11",
+                                       "ORC Java",
                                        12,
                                        188,
                                        1,
@@ -336,6 +352,7 @@ namespace orc {
                                        "struct<time:timestamp,union:uniontype"
                                        "<int,string>,decimal:decimal(38,18)>",
                                        "0.12",
+                                       "ORC Java",
                                        5077,
                                        20906,
                                        2,
@@ -347,6 +364,7 @@ namespace orc {
                                        "TestOrcFile.testWithoutIndex.jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        50000,
                                        214643,
                                        10,
@@ -358,6 +376,7 @@ namespace orc {
                                        "decimal.jsn.gz",
                                        "struct<_col0:decimal(10,5)>",
                                        "0.12",
+                                       "ORC Java",
                                        6000,
                                        16186,
                                        1,
@@ -372,6 +391,7 @@ namespace orc {
                                         "_col5:string,_col6:int,_col7:int,"
                                         "_col8:int>"),
                                        "0.11",
+                                       "ORC Java",
                                        1920800,
                                        5069718,
                                        385,
@@ -386,6 +406,7 @@ namespace orc {
                                         "_col5:string,_col6:int,_col7:int,"
                                         "_col8:int>"),
                                        "0.11",
+                                       "ORC Java",
                                        1920800,
                                        396823,
                                        385,
@@ -400,6 +421,7 @@ namespace orc {
                                         "_col5:string,_col6:int,_col7:int,"
                                         "_col8:int>"),
                                        "0.12",
+                                       "ORC Java",
                                        1920800,
                                        45592,
                                        1,
@@ -413,6 +435,7 @@ namespace orc {
                                         "_col2:int,_col3:bigint,_col4:float,"
                                         "_col5:double,_col6:boolean>"),
                                        "0.12",
+                                       "ORC Java",
                                        70000,
                                        366347,
                                        1,
@@ -434,6 +457,7 @@ namespace orc {
                                         "string>>,ts:timestamp,"
                                         "decimal1:decimal(0,0)>"),
                                        "0.11",
+                                       "ORC Java",
                                        7500,
                                        372542,
                                        2,
@@ -448,17 +472,34 @@ namespace orc {
 					"decimal1:decimal(16,6),"
                                         "ts:timestamp>"),
                                        "0.12",
+                                       "ORC Java 1.8.0-SNAPSHOT",
                                        25000,
-                                       1981,
+                                       1980,
                                        1,
                                        CompressionKind_ZLIB,
                                        262144,
                                        10000,
                                        std::map<std::string, std::string>()),
+                    OrcFileDescription("orc_split_elim_cpp.orc",
+                                       "orc_split_elim_cpp.jsn.gz",
+                                       ("struct<userid:bigint,string1:string,"
+                                        "subtype:double,"
+                                        "decimal1:decimal(16,6),"
+                                        "ts:timestamp>"),
+                                       "0.12",
+                                       "ORC C++ 1.8.0-SNAPSHOT",
+                                       25000,
+                                       2942,
+                                       1,
+                                       CompressionKind_ZLIB,
+                                       65536,
+                                       10000,
+                                       std::map<std::string, std::string>()),
                     OrcFileDescription("orc_index_int_string.orc",
                                        "orc_index_int_string.jsn.gz",
                                        ("struct<_col0:int,_col1:varchar(4)>"),
                                        "0.12",
+                                       "ORC Java",
                                        6000,
                                        11280,
                                        1,
@@ -474,6 +515,7 @@ namespace orc {
                                        "_col7:string,_col8:timestamp,"
                                        "_col9:decimal(4,2),_col10:binary>",
                                        "0.12",
+                                       "ORC Java",
                                        2098,
                                        41780,
                                        2,
@@ -485,6 +527,7 @@ namespace orc {
                                        "TestVectorOrcFile.testLz4.jsn.gz",
                                        "struct<x:bigint,y:int,z:bigint>",
                                        "0.12",
+                                       "ORC Java",
                                        10000,
                                        120952,
                                        2,
@@ -496,6 +539,7 @@ namespace orc {
                                        "TestVectorOrcFile.testLzo.jsn.gz",
                                        "struct<x:bigint,y:int,z:bigint>",
                                        "0.12",
+                                       "ORC Java",
                                        10000,
                                        120955,
                                        2,
@@ -512,6 +556,7 @@ INSTANTIATE_TEST_CASE_P(TestMatch1900, FileParam,
                                        "TestOrcFile.testDate1900.jsn.gz",
                                        "struct<time:timestamp,date:date>",
                                        "0.12",
+                                       "ORC Java",
                                        70000,
                                        30478,
                                        8,
@@ -529,6 +574,7 @@ INSTANTIATE_TEST_CASE_P(TestMatch1900, FileParam,
                                        "TestOrcFile.testDate2038.jsn.gz",
                                        "struct<time:timestamp,date:date>",
                                        "0.12",
+                                       "ORC Java",
                                        212000,
                                        94762,
                                        28,