You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@orc.apache.org by do...@apache.org on 2021/09/12 18:45:56 UTC

[orc] branch main updated: ORC-984: Save the software version that wrote each ORC file. (#904)

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/orc.git


The following commit(s) were added to refs/heads/main by this push:
     new cf720d7  ORC-984: Save the software version that wrote each ORC file. (#904)
cf720d7 is described below

commit cf720d7b2333618c652445299486cb9600bdbe4f
Author: Owen O'Malley <oo...@linkedin.com>
AuthorDate: Sun Sep 12 11:45:51 2021 -0700

    ORC-984: Save the software version that wrote each ORC file. (#904)
    
    ### What changes were proposed in this pull request?
    
    I add a string to the file footer that records the version that wrote the file. We already had recorded the implementation that wrote the file as Footer.writer. I added a method that combines these two fields in the reader to produce a user facing string that describes the software version.
    
    I also add a field to the meta data tool to show the version that wrote the file.
    
    Because of that change and the fact that the files change size based on whether the ORC version is a snapshot or not, I had to extend the tests for TestFileDump to allow some slop for the size and to ignore the file version.
    
    ### How was this patch tested?
    
    It passes the unit tests after I updated the tools tests.
---
 c++/include/orc/Common.hh                          |   5 ++
 c++/include/orc/Reader.hh                          |   6 ++
 c++/src/Common.cc                                  |  26 ++++++++
 c++/src/Reader.cc                                  |   9 +++
 c++/src/Reader.hh                                  |   2 +
 c++/src/Writer.cc                                  |   1 +
 examples/expected/orc_split_elim_cpp.jsn.gz        | Bin 0 -> 8980 bytes
 examples/orc_split_elim_cpp.orc                    | Bin 0 -> 3339 bytes
 examples/orc_split_elim_new.orc                    | Bin 2341 -> 2396 bytes
 java/core/src/findbugs/exclude.xml                 |   5 ++
 java/core/src/java/org/apache/orc/OrcUtils.java    |  70 +++++++++++++++++++++
 java/core/src/java/org/apache/orc/Reader.java      |  10 +++
 .../src/java/org/apache/orc/impl/ReaderImpl.java   |  12 +++-
 .../src/java/org/apache/orc/impl/WriterImpl.java   |   1 +
 java/pom.xml                                       |  13 ++++
 .../src/java/org/apache/orc/tools/FileDump.java    |   3 +-
 .../java/org/apache/orc/tools/JsonFileDump.java    |   1 +
 .../test/org/apache/orc/tools/TestFileDump.java    |  59 +++++++++++++----
 .../org/apache/orc/tools/TestJsonFileDump.java     |  22 +------
 java/tools/src/test/resources/orc-file-dump.json   |   3 +-
 proto/orc_proto.proto                              |   5 ++
 tools/src/FileMetadata.cc                          |   1 +
 tools/test/TestFileMetadata.cc                     |   6 +-
 tools/test/TestMatch.cc                            |  48 +++++++++++++-
 24 files changed, 269 insertions(+), 39 deletions(-)

diff --git a/c++/include/orc/Common.hh b/c++/include/orc/Common.hh
index 09d1fb3..f865b30 100644
--- a/c++/include/orc/Common.hh
+++ b/c++/include/orc/Common.hh
@@ -74,6 +74,8 @@ namespace orc {
     UNKNOWN_WRITER = INT32_MAX
   };
 
+  std::string writerIdToString(uint32_t id);
+
   enum CompressionKind {
     CompressionKind_NONE = 0,
     CompressionKind_ZLIB = 1,
@@ -97,6 +99,9 @@ namespace orc {
     WriterVersion_HIVE_13083 = 4,
     WriterVersion_ORC_101 = 5,
     WriterVersion_ORC_135 = 6,
+    WriterVersion_ORC_517 = 7,
+    WriterVersion_ORC_203 = 8,
+    WriterVersion_ORC_14 = 9,
     WriterVersion_MAX = INT32_MAX
   };
 
diff --git a/c++/include/orc/Reader.hh b/c++/include/orc/Reader.hh
index cbc766a..3419d71 100644
--- a/c++/include/orc/Reader.hh
+++ b/c++/include/orc/Reader.hh
@@ -293,6 +293,12 @@ namespace orc {
     virtual uint64_t getNumberOfRows() const = 0;
 
     /**
+     * Get the software instance and version that wrote this file.
+     * @return a user-facing string that specifies the software version
+     */
+    virtual std::string getSoftwareVersion() const = 0;
+
+    /**
      * Get the user metadata keys.
      * @return the set of user metadata keys
      */
diff --git a/c++/src/Common.cc b/c++/src/Common.cc
index 0a5256e..dbf0737 100644
--- a/c++/src/Common.cc
+++ b/c++/src/Common.cc
@@ -58,12 +58,38 @@ namespace orc {
         return "ORC-101";
       case WriterVersion_ORC_135:
         return "ORC-135";
+      case WriterVersion_ORC_517:
+        return "ORC-517";
+      case WriterVersion_ORC_203:
+        return "ORC-203";
+      case WriterVersion_ORC_14:
+        return "ORC-14";
     }
     std::stringstream buffer;
     buffer << "future - " << version;
     return buffer.str();
   }
 
+  std::string writerIdToString(uint32_t id) {
+    switch (id) {
+      case ORC_JAVA_WRITER:
+        return "ORC Java";
+      case ORC_CPP_WRITER:
+        return "ORC C++";
+      case PRESTO_WRITER:
+        return "Presto";
+      case SCRITCHLEY_GO:
+        return "Scritchley Go";
+      case TRINO_WRITER:
+        return "Trino";
+      default: {
+        std::ostringstream buffer;
+        buffer << "Unknown(" << id << ")";
+        return buffer.str();
+      }
+    }
+  }
+
   std::string streamKindToString(StreamKind kind) {
     switch (static_cast<int>(kind)) {
       case StreamKind_PRESENT:
diff --git a/c++/src/Reader.cc b/c++/src/Reader.cc
index c44439a..c9b1627 100644
--- a/c++/src/Reader.cc
+++ b/c++/src/Reader.cc
@@ -558,6 +558,15 @@ namespace orc {
     }
   }
 
+  std::string ReaderImpl::getSoftwareVersion() const {
+    std::ostringstream buffer;
+    buffer << writerIdToString(getWriterIdValue());
+    if (footer->has_softwareversion()) {
+      buffer << " " << footer->softwareversion();
+    }
+    return buffer.str();
+  }
+
   WriterVersion ReaderImpl::getWriterVersion() const {
     return getWriterVersionImpl(contents.get());
   }
diff --git a/c++/src/Reader.hh b/c++/src/Reader.hh
index c097885..b7b76d4 100644
--- a/c++/src/Reader.hh
+++ b/c++/src/Reader.hh
@@ -259,6 +259,8 @@ namespace orc {
 
     uint32_t getWriterIdValue() const override;
 
+    std::string getSoftwareVersion() const override;
+
     WriterVersion getWriterVersion() const override;
 
     uint64_t getNumberOfRows() const override;
diff --git a/c++/src/Writer.cc b/c++/src/Writer.cc
index fda5f5c..b5ae6b7 100644
--- a/c++/src/Writer.cc
+++ b/c++/src/Writer.cc
@@ -395,6 +395,7 @@ namespace orc {
     fileFooter.set_rowindexstride(
                           static_cast<uint32_t>(options.getRowIndexStride()));
     fileFooter.set_writer(writerId);
+    fileFooter.set_softwareversion(ORC_VERSION);
 
     uint32_t index = 0;
     buildFooterType(type, fileFooter, index);
diff --git a/examples/expected/orc_split_elim_cpp.jsn.gz b/examples/expected/orc_split_elim_cpp.jsn.gz
new file mode 100644
index 0000000..5263699
Binary files /dev/null and b/examples/expected/orc_split_elim_cpp.jsn.gz differ
diff --git a/examples/orc_split_elim_cpp.orc b/examples/orc_split_elim_cpp.orc
new file mode 100644
index 0000000..86921f3
Binary files /dev/null and b/examples/orc_split_elim_cpp.orc differ
diff --git a/examples/orc_split_elim_new.orc b/examples/orc_split_elim_new.orc
index cdbe477..24e58f1 100644
Binary files a/examples/orc_split_elim_new.orc and b/examples/orc_split_elim_new.orc differ
diff --git a/java/core/src/findbugs/exclude.xml b/java/core/src/findbugs/exclude.xml
index a196fe5..ea94a42 100644
--- a/java/core/src/findbugs/exclude.xml
+++ b/java/core/src/findbugs/exclude.xml
@@ -43,6 +43,11 @@
   <!-- Java's try with resources causes a false positive.
        See https://github.com/SERG-Delft/jpacman/pull/27 . -->
   <Match>
+    <Bug pattern="RCN_REDUNDANT_NULLCHECK_OF_NONNULL_VALUE"/>
+    <Class name="org.apache.orc.OrcUtils"/>
+    <Method name="getOrcVersion"/>
+  </Match>
+  <Match>
     <Bug pattern="RCN_REDUNDANT_NULLCHECK_WOULD_HAVE_BEEN_A_NPE"/>
     <Class name="org.apache.orc.impl.OrcAcidUtils"/>
     <Method name="getLastFlushLength"/>
diff --git a/java/core/src/java/org/apache/orc/OrcUtils.java b/java/core/src/java/org/apache/orc/OrcUtils.java
index 2f9237b..358407f 100644
--- a/java/core/src/java/org/apache/orc/OrcUtils.java
+++ b/java/core/src/java/org/apache/orc/OrcUtils.java
@@ -22,9 +22,11 @@ import org.apache.orc.impl.ReaderImpl;
 import org.apache.orc.impl.SchemaEvolution;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Properties;
 
 import static org.apache.hadoop.util.StringUtils.COMMA_STR;
 
@@ -390,4 +392,72 @@ public class OrcUtils {
     }
     return result;
   }
+
+  /**
+   * Get the user-facing version string for the software that wrote the file.
+   * @param writer the code for the writer from OrcProto.Footer
+   * @param version the orcVersion from OrcProto.Footer
+   * @return the version string
+   */
+  public static String getSoftwareVersion(int writer,
+                                          String version) {
+    String base;
+    switch (writer) {
+      case 0:
+        base = "ORC Java";
+        break;
+      case 1:
+        base = "ORC C++";
+        break;
+      case 2:
+        base = "Presto";
+        break;
+      case 3:
+        base = "Scritchley Go";
+        break;
+      case 4:
+        base = "Trino";
+        break;
+      default:
+        base = String.format("Unknown(%d)", writer);
+        break;
+    }
+    if (version == null) {
+      return base;
+    } else {
+      return base + " " + version;
+    }
+  }
+
+  /**
+   * Get the software version from Maven.
+   * @return The version of the software.
+   */
+  public static String getOrcVersion() {
+    Class<OrcFile> cls = OrcFile.class;
+    // try to load from maven properties first
+    try (InputStream is = cls.getResourceAsStream(
+        "/META-INF/maven/org.apache.orc/orc-core/pom.properties")) {
+      if (is != null) {
+        Properties p = new Properties();
+        p.load(is);
+        String version = p.getProperty("version", null);
+        if (version != null) {
+          return version;
+        }
+      }
+    } catch (IOException e) {
+      // ignore
+    }
+
+    // fallback to using Java API
+    Package aPackage = cls.getPackage();
+    if (aPackage != null) {
+      String version = aPackage.getImplementationVersion();
+      if (version != null) {
+        return version;
+      }
+    }
+    return "unknown";
+  }
 }
diff --git a/java/core/src/java/org/apache/orc/Reader.java b/java/core/src/java/org/apache/orc/Reader.java
index d976586..134dca3 100644
--- a/java/core/src/java/org/apache/orc/Reader.java
+++ b/java/core/src/java/org/apache/orc/Reader.java
@@ -159,6 +159,16 @@ public interface Reader extends Closeable {
   OrcFile.WriterVersion getWriterVersion();
 
   /**
+   * Get the implementation and version of the software that wrote the file.
+   * It defaults to "ORC Java" for old files. For current files, we include the
+   * version also.
+   * @since 1.5.13
+   * @return returns the writer implementation and hopefully the version of the
+   *   software
+   */
+  String getSoftwareVersion();
+
+  /**
    * Get the file tail (footer + postscript)
    *
    * @return - file tail
diff --git a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
index 71279a1..402152e 100644
--- a/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/ReaderImpl.java
@@ -88,6 +88,7 @@ public class ReaderImpl implements Reader {
   protected final boolean useUTCTimestamp;
   private final List<Integer> versionList;
   private final OrcFile.WriterVersion writerVersion;
+  private final String softwareVersion;
 
   protected final OrcTail tail;
 
@@ -284,6 +285,11 @@ public class ReaderImpl implements Reader {
   }
 
   @Override
+  public String getSoftwareVersion() {
+    return softwareVersion;
+  }
+
+  @Override
   public OrcProto.FileTail getFileTail() {
     return tail.getFileTail();
   }
@@ -552,6 +558,7 @@ public class ReaderImpl implements Reader {
       this.userMetadata = null; // not cached and not needed here
       // FileMetadata is obsolete and doesn't support encryption
       this.encryption = new ReaderEncryption();
+      this.softwareVersion = null;
     } else {
       OrcTail orcTail = options.getOrcTail();
       if (orcTail == null) {
@@ -574,9 +581,12 @@ public class ReaderImpl implements Reader {
       this.writerVersion = tail.getWriterVersion();
       this.stripes = tail.getStripes();
       this.stripeStatistics = null;
-      this.encryption = new ReaderEncryption(tail.getFooter(), schema,
+      OrcProto.Footer footer = tail.getFooter();
+      this.encryption = new ReaderEncryption(footer, schema,
           tail.getStripeStatisticsOffset(), tail.getTailBuffer(), stripes,
           options.getKeyProvider(), conf);
+      this.softwareVersion = OrcUtils.getSoftwareVersion(footer.getWriter(),
+          footer.getSoftwareVersion());
     }
     this.types = OrcUtils.getOrcTypes(schema);
   }
diff --git a/java/core/src/java/org/apache/orc/impl/WriterImpl.java b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
index d95430c..c0a2b52 100644
--- a/java/core/src/java/org/apache/orc/impl/WriterImpl.java
+++ b/java/core/src/java/org/apache/orc/impl/WriterImpl.java
@@ -666,6 +666,7 @@ public class WriterImpl implements WriterInternal, MemoryManager.Callback {
       builder.setEncryption(writeEncryptionFooter());
     }
     builder.setWriter(OrcFile.WriterImplementation.ORC_JAVA.getId());
+    builder.setSoftwareVersion(OrcUtils.getOrcVersion());
     physicalWriter.writeFileFooter(builder);
     return writePostScript();
   }
diff --git a/java/pom.xml b/java/pom.xml
index 33fbe7c..bdd2693 100644
--- a/java/pom.xml
+++ b/java/pom.xml
@@ -208,6 +208,19 @@
     <pluginManagement>
       <plugins>
         <plugin>
+          <groupId>org.apache.maven.plugins</groupId>
+          <artifactId>maven-jar-plugin</artifactId>
+          <version>3.2.0</version>
+          <configuration>
+            <archive>                   
+              <manifest>
+                <addDefaultImplementationEntries>true</addDefaultImplementationEntries>
+                <addDefaultSpecificationEntries>true</addDefaultSpecificationEntries>
+              </manifest>
+            </archive>
+          </configuration>
+        </plugin>
+        <plugin>
           <groupId>org.codehaus.mojo</groupId>
           <artifactId>findbugs-maven-plugin</artifactId>
           <version>3.0.5</version>
diff --git a/java/tools/src/java/org/apache/orc/tools/FileDump.java b/java/tools/src/java/org/apache/orc/tools/FileDump.java
index d3023af..850743f 100644
--- a/java/tools/src/java/org/apache/orc/tools/FileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/FileDump.java
@@ -339,7 +339,8 @@ public final class FileDump {
     TypeDescription schema = reader.getSchema();
     System.out.println("Structure for " + filename);
     System.out.println("File Version: " + reader.getFileVersion().getName() +
-        " with " + reader.getWriterVersion());
+        " with " + reader.getWriterVersion() + " by " +
+        reader.getSoftwareVersion());
     RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
     System.out.println("Rows: " + reader.getNumberOfRows());
     System.out.println("Compression: " + reader.getCompressionKind());
diff --git a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
index fba9e20..4f6b7de 100644
--- a/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
+++ b/java/tools/src/java/org/apache/orc/tools/JsonFileDump.java
@@ -89,6 +89,7 @@ public class JsonFileDump {
         }
         writer.name("fileVersion").value(reader.getFileVersion().getName());
         writer.name("writerVersion").value(reader.getWriterVersion().toString());
+        writer.name("softwareVersion").value(reader.getSoftwareVersion());
         RecordReaderImpl rows = (RecordReaderImpl) reader.rows();
         writer.name("numberOfRows").value(reader.getNumberOfRows());
         writer.name("compression").value(reader.getCompressionKind().toString());
diff --git a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
index 3cbdfe7..5bebbef 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestFileDump.java
@@ -57,8 +57,11 @@ import java.text.SimpleDateFormat;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 import java.util.Random;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertFalse;
@@ -184,22 +187,56 @@ public class TestFileDump {
     ((BytesColumnVector) struct.fields[1]).setVal(row, sts.getBytes(StandardCharsets.UTF_8));
   }
 
+  private static final Pattern ignoreTailPattern =
+      Pattern.compile("^(?<head>File Version|\"softwareVersion\"): .*");
+  private static final Pattern fileSizePattern =
+      Pattern.compile("^(\"fileLength\"|File length): (?<size>[0-9]+).*");
+  // Allow file size to be up to 100 bytes larger.
+  private static final int SIZE_SLOP = 100;
+
+  /**
+   * Preprocess the string for matching.
+   * If it matches the fileSizePattern, we return the file size as a Long.
+   * @param line the input line
+   * @return the processed line or a Long with the file size
+   */
+  private static Object preprocessLine(String line) {
+    if (line == null) {
+      return line;
+    }
+    line = line.trim();
+    Matcher match = fileSizePattern.matcher(line);
+    if (match.matches()) {
+      return Long.parseLong(match.group("size"));
+    }
+    match = ignoreTailPattern.matcher(line);
+    if (match.matches()) {
+      return match.group("head");
+    }
+    return line;
+  }
+
+  /**
+   * Compare two files for equivalence.
+   * @param expected Loaded from the class path
+   * @param actual Loaded from the file system
+   */
   public static void checkOutput(String expected,
                                  String actual) throws Exception {
-    BufferedReader eStream = Files.newBufferedReader(Paths.get(TestJsonFileDump.getFileFromClasspath(expected)), StandardCharsets.UTF_8);
+    BufferedReader eStream = Files.newBufferedReader(Paths.get(
+        TestJsonFileDump.getFileFromClasspath(expected)), StandardCharsets.UTF_8);
     BufferedReader aStream = Files.newBufferedReader(Paths.get(actual), StandardCharsets.UTF_8);
-    String expectedLine = eStream.readLine();
-    if (expectedLine != null) {
-      expectedLine = expectedLine.trim();
-    }
+    Object expectedLine = preprocessLine(eStream.readLine());
     while (expectedLine != null) {
-      String actualLine = aStream.readLine();
-      if (actualLine != null) {
-        actualLine = actualLine.trim();
+      Object actualLine = preprocessLine(aStream.readLine());
+      if (expectedLine instanceof Long && actualLine instanceof Long) {
+        long diff = (Long) actualLine - (Long) expectedLine;
+        assertTrue(diff < SIZE_SLOP,
+            "expected: " + expectedLine + ", actual: " + actualLine);
+      } else {
+        assertEquals(expectedLine, actualLine);
       }
-      assertEquals(expectedLine, actualLine);
-      expectedLine = eStream.readLine();
-      expectedLine = expectedLine == null ? null : expectedLine.trim();
+      expectedLine = preprocessLine(eStream.readLine());
     }
     assertNull(eStream.readLine());
     assertNull(aStream.readLine());
diff --git a/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java b/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
index 69be032..f226ae1 100644
--- a/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
+++ b/java/tools/src/test/org/apache/orc/tools/TestJsonFileDump.java
@@ -69,26 +69,6 @@ public class TestJsonFileDump {
     fs.delete(testFilePath, false);
   }
 
-  static void checkOutput(String expected,
-                                  String actual) throws Exception {
-//    BufferedReader eStream =
-//        new BufferedReader(new FileReader(getFileFromClasspath(expected)));
-//    new BufferedReader(new FileReader(getFileFromClasspath(expected)));
-    BufferedReader eStream  = Files
-        .newBufferedReader(Paths.get(getFileFromClasspath(expected)), StandardCharsets.UTF_8);
-//    BufferedReader aStream =
-//        new BufferedReader(new FileReader(actual));
-    BufferedReader aStream = Files.newBufferedReader(Paths.get(actual), StandardCharsets.UTF_8);
-    String expectedLine = eStream.readLine();
-    while (expectedLine != null) {
-      String actualLine = aStream.readLine();
-      assertEquals(expectedLine, actualLine);
-      expectedLine = eStream.readLine();
-    }
-    assertNull(eStream.readLine());
-    assertNull(aStream.readLine());
-  }
-
   @Test
   public void testJsonDump() throws Exception {
     TypeDescription schema =
@@ -152,6 +132,6 @@ public class TestJsonFileDump {
     System.setOut(origOut);
 
 
-    checkOutput(outputFilename, workDir + File.separator + outputFilename);
+    TestFileDump.checkOutput(outputFilename, workDir + File.separator + outputFilename);
   }
 }
diff --git a/java/tools/src/test/resources/orc-file-dump.json b/java/tools/src/test/resources/orc-file-dump.json
index 85cfa2a..1540f7c 100644
--- a/java/tools/src/test/resources/orc-file-dump.json
+++ b/java/tools/src/test/resources/orc-file-dump.json
@@ -2,6 +2,7 @@
   "fileName": "TestFileDump.testDump.orc",
   "fileVersion": "0.12",
   "writerVersion": "ORC_14",
+  "softwareVersion": "ORC Java 1.8.0-SNAPSHOT",
   "numberOfRows": 21000,
   "compression": "ZLIB",
   "compressionBufferSize": 4096,
@@ -1375,7 +1376,7 @@
       ]
     }
   ],
-  "fileLength": 272513,
+  "fileLength": 272529,
   "paddingLength": 0,
   "paddingRatio": 0.0,
   "status": "OK"
diff --git a/proto/orc_proto.proto b/proto/orc_proto.proto
index 0ffd681..ff05657 100644
--- a/proto/orc_proto.proto
+++ b/proto/orc_proto.proto
@@ -372,6 +372,11 @@ message Footer {
   // information about the encryption in this file
   optional Encryption encryption = 10;
   optional CalendarKind calendar = 11;
+
+  // informative description about the version of the software that wrote
+  // the file. It is assumed to be within a given writer, so for example
+  // ORC 1.7.2 = "1.7.2". It may include suffixes, such as "-SNAPSHOT".
+  optional string softwareVersion = 12;
 }
 
 enum CompressionKind {
diff --git a/tools/src/FileMetadata.cc b/tools/src/FileMetadata.cc
index 292979a..9079ec0 100644
--- a/tools/src/FileMetadata.cc
+++ b/tools/src/FileMetadata.cc
@@ -151,6 +151,7 @@ void printMetadata(std::ostream & out, const char*filename, bool verbose) {
   out << "  \"format\": \"" << reader->getFormatVersion().toString()
       << "\", \"writer version\": \""
             << orc::writerVersionToString(reader->getWriterVersion())
+            << "\", \"software version\": \"" << reader->getSoftwareVersion()
             << "\",\n";
   out << "  \"compression\": \""
             << orc::compressionKindToString(reader->getCompression())
diff --git a/tools/test/TestFileMetadata.cc b/tools/test/TestFileMetadata.cc
index 87f2539..d9ec4e4 100644
--- a/tools/test/TestFileMetadata.cc
+++ b/tools/test/TestFileMetadata.cc
@@ -167,7 +167,7 @@ TEST (TestFileMetadata, testJson) {
     "  \"attributes\": {},\n"
     "  \"rows\": 25000,\n"
     "  \"stripe count\": 5,\n"
-    "  \"format\": \"0.12\", \"writer version\": \"original\",\n"
+    "  \"format\": \"0.12\", \"writer version\": \"original\", \"software version\": \"ORC Java\",\n"
     "  \"compression\": \"none\",\n"
     "  \"file length\": 246402,\n"
     "  \"content\": 245568, \"stripe stats\": 526, \"footer\": 288, \"postscript\": 19,\n"
@@ -215,7 +215,7 @@ TEST (TestFileMetadata, testNoFormat) {
     "  \"attributes\": {},\n"
     "  \"rows\": 5,\n"
     "  \"stripe count\": 1,\n"
-    "  \"format\": \"0.11\", \"writer version\": \"original\",\n"
+    "  \"format\": \"0.11\", \"writer version\": \"original\", \"software version\": \"ORC Java\",\n"
     "  \"compression\": \"zlib\", \"compression block\": 262144,\n"
     "  \"file length\": 745,\n"
     "  \"content\": 525, \"stripe stats\": 0, \"footer\": 210, \"postscript\": 9,\n"
@@ -276,7 +276,7 @@ TEST (TestFileMetadata, testAttributes) {
     "    \"nested_struct.g._value.h.i._elem\": {\"iceberg.id\": \"29\", \"iceberg.required\": \"false\"}},\n"
     "  \"rows\": 1,\n"
     "  \"stripe count\": 1,\n"
-    "  \"format\": \"0.12\", \"writer version\": \"future - 9\",\n"
+    "  \"format\": \"0.12\", \"writer version\": \"ORC-14\", \"software version\": \"ORC Java\",\n"
     "  \"compression\": \"zlib\", \"compression block\": 131072,\n"
     "  \"file length\": 1734,\n"
     "  \"content\": 1006, \"stripe stats\": 167, \"footer\": 535, \"postscript\": 25,\n"
diff --git a/tools/test/TestMatch.cc b/tools/test/TestMatch.cc
index 8ba3079..9fb28f4 100644
--- a/tools/test/TestMatch.cc
+++ b/tools/test/TestMatch.cc
@@ -38,6 +38,7 @@ namespace orc {
     std::string json;
     std::string typeString;
     std::string formatVersion;
+    std::string softwareVersion;
     uint64_t rowCount;
     uint64_t contentLength;
     uint64_t stripeCount;
@@ -50,6 +51,7 @@ namespace orc {
                        const std::string& _json,
                        const std::string& _typeString,
                        const std::string& _version,
+                       const std::string& _softwareVersion,
                        uint64_t _rowCount,
                        uint64_t _contentLength,
                        uint64_t _stripeCount,
@@ -61,6 +63,7 @@ namespace orc {
                           json(_json),
                           typeString(_typeString),
                           formatVersion(_version),
+                          softwareVersion(_softwareVersion),
                           rowCount(_rowCount),
                           contentLength(_contentLength),
                           stripeCount(_stripeCount),
@@ -111,6 +114,7 @@ namespace orc {
     EXPECT_EQ(GetParam().rowIndexStride, reader->getRowIndexStride());
     EXPECT_EQ(GetParam().contentLength, reader->getContentLength());
     EXPECT_EQ(GetParam().formatVersion, reader->getFormatVersion().toString());
+    EXPECT_EQ(GetParam().softwareVersion, reader->getSoftwareVersion());
     EXPECT_EQ(getFilename(), reader->getStreamName());
     EXPECT_EQ(GetParam().userMeta.size(), reader->getMetadataKeys().size());
     for(std::map<std::string, std::string>::const_iterator itr =
@@ -163,6 +167,7 @@ namespace orc {
                                        "TestOrcFile.columnProjection.jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        21000,
                                        428406,
                                        5,
@@ -182,6 +187,7 @@ namespace orc {
                                        "string>>,map:map<string,struct<int1:"
                                        "int,string1:string>>>",
                                        "0.12",
+                                       "ORC Java",
                                        0,
                                        3,
                                        0,
@@ -201,6 +207,7 @@ namespace orc {
                                        "string>>,map:map<string,struct<int1:"
                                        "int,string1:string>>>",
                                        "0.12",
+                                       "ORC Java",
                                        1,
                                        980,
                                        1,
@@ -220,6 +227,7 @@ namespace orc {
                                        "string>>,map:map<string,struct<int1:"
                                        "int,string1:string>>>",
                                        "0.12",
+                                       "ORC Java",
                                        2,
                                        1015,
                                        1,
@@ -233,6 +241,7 @@ namespace orc {
                                        ".jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.11",
+                                       "ORC Java",
                                        2500,
                                        18779,
                                        25,
@@ -246,6 +255,7 @@ namespace orc {
                                        ".jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        2500,
                                        10618,
                                        4,
@@ -258,6 +268,7 @@ namespace orc {
                                        ".jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        3500,
                                        15529,
                                        1,
@@ -276,6 +287,7 @@ namespace orc {
                                        "string1:string>>,map:map<string,"
                                        "struct<int1:int,string1:string>>>",
                                        "0.12",
+                                       "ORC Java",
                                        32768,
                                        1896379,
                                        7,
@@ -287,6 +299,7 @@ namespace orc {
                                        "TestOrcFile.testSnappy.jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        10000,
                                        126061,
                                        2,
@@ -300,6 +313,7 @@ namespace orc {
                                        "istics.jsn.gz",
                                        "struct<bytes1:binary,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        4,
                                        185,
                                        1,
@@ -312,6 +326,7 @@ namespace orc {
                                        ".jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        11000,
                                        597,
                                        3,
@@ -323,6 +338,7 @@ namespace orc {
                                        "TestOrcFile.testTimestamp.jsn.gz",
                                        "timestamp",
                                        "0.11",
+                                       "ORC Java",
                                        12,
                                        188,
                                        1,
@@ -336,6 +352,7 @@ namespace orc {
                                        "struct<time:timestamp,union:uniontype"
                                        "<int,string>,decimal:decimal(38,18)>",
                                        "0.12",
+                                       "ORC Java",
                                        5077,
                                        20906,
                                        2,
@@ -347,6 +364,7 @@ namespace orc {
                                        "TestOrcFile.testWithoutIndex.jsn.gz",
                                        "struct<int1:int,string1:string>",
                                        "0.12",
+                                       "ORC Java",
                                        50000,
                                        214643,
                                        10,
@@ -358,6 +376,7 @@ namespace orc {
                                        "decimal.jsn.gz",
                                        "struct<_col0:decimal(10,5)>",
                                        "0.12",
+                                       "ORC Java",
                                        6000,
                                        16186,
                                        1,
@@ -372,6 +391,7 @@ namespace orc {
                                         "_col5:string,_col6:int,_col7:int,"
                                         "_col8:int>"),
                                        "0.11",
+                                       "ORC Java",
                                        1920800,
                                        5069718,
                                        385,
@@ -386,6 +406,7 @@ namespace orc {
                                         "_col5:string,_col6:int,_col7:int,"
                                         "_col8:int>"),
                                        "0.11",
+                                       "ORC Java",
                                        1920800,
                                        396823,
                                        385,
@@ -400,6 +421,7 @@ namespace orc {
                                         "_col5:string,_col6:int,_col7:int,"
                                         "_col8:int>"),
                                        "0.12",
+                                       "ORC Java",
                                        1920800,
                                        45592,
                                        1,
@@ -413,6 +435,7 @@ namespace orc {
                                         "_col2:int,_col3:bigint,_col4:float,"
                                         "_col5:double,_col6:boolean>"),
                                        "0.12",
+                                       "ORC Java",
                                        70000,
                                        366347,
                                        1,
@@ -434,6 +457,7 @@ namespace orc {
                                         "string>>,ts:timestamp,"
                                         "decimal1:decimal(0,0)>"),
                                        "0.11",
+                                       "ORC Java",
                                        7500,
                                        372542,
                                        2,
@@ -448,17 +472,34 @@ namespace orc {
 					"decimal1:decimal(16,6),"
                                         "ts:timestamp>"),
                                        "0.12",
+                                       "ORC Java 1.8.0-SNAPSHOT",
                                        25000,
-                                       1981,
+                                       1980,
                                        1,
                                        CompressionKind_ZLIB,
                                        262144,
                                        10000,
                                        std::map<std::string, std::string>()),
+                    OrcFileDescription("orc_split_elim_cpp.orc",
+                                       "orc_split_elim_cpp.jsn.gz",
+                                       ("struct<userid:bigint,string1:string,"
+                                        "subtype:double,"
+                                        "decimal1:decimal(16,6),"
+                                        "ts:timestamp>"),
+                                       "0.12",
+                                       "ORC C++ 1.8.0-SNAPSHOT",
+                                       25000,
+                                       2942,
+                                       1,
+                                       CompressionKind_ZLIB,
+                                       65536,
+                                       10000,
+                                       std::map<std::string, std::string>()),
                     OrcFileDescription("orc_index_int_string.orc",
                                        "orc_index_int_string.jsn.gz",
                                        ("struct<_col0:int,_col1:varchar(4)>"),
                                        "0.12",
+                                       "ORC Java",
                                        6000,
                                        11280,
                                        1,
@@ -474,6 +515,7 @@ namespace orc {
                                        "_col7:string,_col8:timestamp,"
                                        "_col9:decimal(4,2),_col10:binary>",
                                        "0.12",
+                                       "ORC Java",
                                        2098,
                                        41780,
                                        2,
@@ -485,6 +527,7 @@ namespace orc {
                                        "TestVectorOrcFile.testLz4.jsn.gz",
                                        "struct<x:bigint,y:int,z:bigint>",
                                        "0.12",
+                                       "ORC Java",
                                        10000,
                                        120952,
                                        2,
@@ -496,6 +539,7 @@ namespace orc {
                                        "TestVectorOrcFile.testLzo.jsn.gz",
                                        "struct<x:bigint,y:int,z:bigint>",
                                        "0.12",
+                                       "ORC Java",
                                        10000,
                                        120955,
                                        2,
@@ -512,6 +556,7 @@ INSTANTIATE_TEST_CASE_P(TestMatch1900, FileParam,
                                        "TestOrcFile.testDate1900.jsn.gz",
                                        "struct<time:timestamp,date:date>",
                                        "0.12",
+                                       "ORC Java",
                                        70000,
                                        30478,
                                        8,
@@ -529,6 +574,7 @@ INSTANTIATE_TEST_CASE_P(TestMatch1900, FileParam,
                                        "TestOrcFile.testDate2038.jsn.gz",
                                        "struct<time:timestamp,date:date>",
                                        "0.12",
+                                       "ORC Java",
                                        212000,
                                        94762,
                                        28,