You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2022/09/01 11:02:15 UTC
[jena] branch main updated: GH-1501: Buffer bz2 decompression
This is an automated email from the ASF dual-hosted git repository.
andy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/jena.git
The following commit(s) were added to refs/heads/main by this push:
new 30a79db02c GH-1501: Buffer bz2 decompression
new ef194edeef Merge pull request #1503 from afs/misc
30a79db02c is described below
commit 30a79db02c7b173b7f3633b81b999de07b5fa2fc
Author: Andy Seaborne <an...@apache.org>
AuthorDate: Tue Aug 30 11:41:10 2022 +0100
GH-1501: Buffer bz2 decompression
---
.../src/main/java/org/apache/jena/atlas/io/IO.java | 30 ++++++++++++++++++----
1 file changed, 25 insertions(+), 5 deletions(-)
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
index 8cce6f9e95..0f27b9d01c 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
@@ -88,11 +88,31 @@ public class IO
}
InputStream in = new FileInputStream(filename);
String ext = getExtension(filename);
+
+ // Input is a file stream.
+ // https://commons.apache.org/proper/commons-compress/examples.html#Buffering :
+ // """
+ // The stream classes all wrap around streams provided by the calling
+ // code and they work on them directly without any additional
+ // buffering. On the other hand most of them will benefit from
+ // buffering so it is highly recommended that users wrap their stream
+ // in Buffered(In|Out)putStreams before using the Commons Compress
+ // API.
+ // """
+ // GZip and Snappy have internal buffering.
+ // BZip2 does not.
switch ( ext ) {
- case "": return in;
- case ext_gz: return new GZIPInputStream(in);
- case ext_bz2: return new BZip2CompressorInputStream(in, true);
- case ext_sz: return new SnappyCompressorInputStream(in);
+ case "":
+ return in;
+ case ext_gz:
+ // Makes a small improvement (<5%) to use 8K.
+ return new GZIPInputStream(in, 8*1024);
+ case ext_bz2:
+ // Make a huge improvement. x10 faster.
+ in = IO.ensureBuffered(in);
+ return new BZip2CompressorInputStream(in, true);
+ case ext_sz:
+ return new SnappyCompressorInputStream(in);
}
return in;
}
@@ -194,7 +214,7 @@ public class IO
/** Create an buffered reader that uses UTF-8 encoding */
static public BufferedReader asBufferedUTF8(InputStream in) {
- // Alway buffered - for readLine.
+ // Always buffered - for readLine.
return new BufferedReader(asUTF8(in), BUFSIZE_IN / 2);
}