You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2022/09/01 11:02:15 UTC

[jena] branch main updated: GH-1501: Buffer bz2 decompression

This is an automated email from the ASF dual-hosted git repository.

andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git


The following commit(s) were added to refs/heads/main by this push:
     new 30a79db02c GH-1501: Buffer bz2 decompression
     new ef194edeef Merge pull request #1503 from afs/misc
30a79db02c is described below

commit 30a79db02c7b173b7f3633b81b999de07b5fa2fc
Author: Andy Seaborne <an...@apache.org>
AuthorDate: Tue Aug 30 11:41:10 2022 +0100

    GH-1501: Buffer bz2 decompression
---
 .../src/main/java/org/apache/jena/atlas/io/IO.java | 30 ++++++++++++++++++----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
index 8cce6f9e95..0f27b9d01c 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
@@ -88,11 +88,31 @@ public class IO
         }
         InputStream in = new FileInputStream(filename);
         String ext = getExtension(filename);
+
+        // Input is a file stream.
+        // https://commons.apache.org/proper/commons-compress/examples.html#Buffering :
+        // """
+        // The stream classes all wrap around streams provided by the calling
+        // code and they work on them directly without any additional
+        // buffering. On the other hand most of them will benefit from
+        // buffering so it is highly recommended that users wrap their stream
+        // in Buffered(In|Out)putStreams before using the Commons Compress
+        // API.
+        // """
+        // GZip and Snappy have internal buffering.
+        // BZip2 does not.
         switch ( ext ) {
-            case "":        return in;
-            case ext_gz:    return new GZIPInputStream(in);
-            case ext_bz2:   return new BZip2CompressorInputStream(in, true);
-            case ext_sz:    return new SnappyCompressorInputStream(in);
+            case "":
+                return in;
+            case ext_gz:
+                // Makes a small improvement (<5%) to use 8K.
+                return new GZIPInputStream(in, 8*1024);
+            case ext_bz2:
+                // Make a huge improvement. x10 faster.
+                in = IO.ensureBuffered(in);
+                return new BZip2CompressorInputStream(in, true);
+            case ext_sz:
+                return new SnappyCompressorInputStream(in);
         }
         return in;
     }
@@ -194,7 +214,7 @@ public class IO
 
     /** Create an buffered reader that uses UTF-8 encoding */
     static public BufferedReader asBufferedUTF8(InputStream in) {
-        // Alway buffered - for readLine.
+        // Always buffered - for readLine.
         return new BufferedReader(asUTF8(in), BUFSIZE_IN / 2);
     }