You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by an...@apache.org on 2018/06/05 19:29:59 UTC
[2/5] jena git commit: JENA-1554: Add bz2 compression/decompression
JENA-1554: Add bz2 compression/decompression
Add Snappy
default 32k block
decompress only; compressor not available
Update javadoc (RDFLanguages, BinRDF) that mentions gz.
Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/f88fbc57
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/f88fbc57
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/f88fbc57
Branch: refs/heads/master
Commit: f88fbc578d02ed8925104bf5d4a03795470d9275
Parents: eb9ba39
Author: Andy Seaborne <an...@apache.org>
Authored: Sun Jun 3 10:11:13 2018 +0100
Committer: Andy Seaborne <an...@apache.org>
Committed: Sun Jun 3 10:11:13 2018 +0100
----------------------------------------------------------------------
.../java/org/apache/jena/riot/RDFLanguages.java | 6 +--
.../org/apache/jena/riot/thrift/BinRDF.java | 5 +-
.../main/java/org/apache/jena/atlas/io/IO.java | 49 ++++++++++++-----
.../java/org/apache/jena/atlas/io/TS_IO.java | 1 +
.../jena/atlas/io/TestFilenameExtensions.java | 56 ++++++++++++++++++++
5 files changed, 97 insertions(+), 20 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/jena/blob/f88fbc57/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java b/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
index cdfb6a5..d3f5c08 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/RDFLanguages.java
@@ -45,6 +45,7 @@ import static org.apache.jena.riot.WebContent.contentTypeTurtleAlt2;
import java.util.*;
+import org.apache.jena.atlas.io.IO;
import org.apache.jena.atlas.logging.Log ;
import org.apache.jena.atlas.web.ContentType ;
import org.apache.jena.atlas.web.MediaType ;
@@ -409,9 +410,8 @@ public class RDFLanguages
int iHash = filename.indexOf('#');
if ( iHash > 0 )
filename = filename.substring(0, iHash);
- // Gzip compressed?
- if ( filename.endsWith(".gz") )
- filename = filename.substring(0, filename.length()-3);
+ // Gzip or BZip2 compressed?
+ filename = IO.filenameNoCompression(filename);
return fileExtToLang(FileUtils.getFilenameExt(filename));
}
http://git-wip-us.apache.org/repos/asf/jena/blob/f88fbc57/jena-arq/src/main/java/org/apache/jena/riot/thrift/BinRDF.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/riot/thrift/BinRDF.java b/jena-arq/src/main/java/org/apache/jena/riot/thrift/BinRDF.java
index 96e4ea6..7da523d 100644
--- a/jena-arq/src/main/java/org/apache/jena/riot/thrift/BinRDF.java
+++ b/jena-arq/src/main/java/org/apache/jena/riot/thrift/BinRDF.java
@@ -62,8 +62,8 @@ public class BinRDF {
}
/**
- * Create an {@link StreamRDF} for output. A filename ending {@code .gz} will have
- * a gzip compressor added to the output path. A filename of "-" is {@code System.out}.
+ * Create an {@link StreamRDF} for output. A filenames ending {@code .gz} or {@code .bz2} will have
+ * the respective compressor added to the output path. A filename of "-" is {@code System.out}.
* The file is closed when {@link StreamRDF#finish()} is called unless it is {@code System.out}.
* Call {@link StreamRDF#start()}...{@link StreamRDF#finish()}.
*
@@ -73,7 +73,6 @@ public class BinRDF {
*/
public static StreamRDF streamToFile(String filename, boolean withValues) {
OutputStream out = IO.openOutputFile(filename) ;
- // Is this internally buffered as well?
BufferedOutputStream bout = new BufferedOutputStream(out, BUFSIZE_OUT) ;
TProtocol protocol = TRDF.protocol(bout) ;
return new StreamRDF2Thrift(protocol, withValues) ;
http://git-wip-us.apache.org/repos/asf/jena/blob/f88fbc57/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
----------------------------------------------------------------------
diff --git a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
index fea37ac..3a74913 100644
--- a/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
+++ b/jena-base/src/main/java/org/apache/jena/atlas/io/IO.java
@@ -24,7 +24,11 @@ import java.nio.charset.StandardCharsets ;
import java.util.zip.GZIPInputStream ;
import java.util.zip.GZIPOutputStream ;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
+import org.apache.commons.compress.compressors.bzip2.BZip2CompressorOutputStream;
+import org.apache.commons.compress.compressors.snappy.SnappyCompressorInputStream;
import org.apache.jena.atlas.RuntimeIOException ;
+import org.apache.jena.atlas.lib.FileOps;
import org.apache.jena.atlas.lib.IRILib ;
public class IO
@@ -63,7 +67,7 @@ public class IO
/** Open an input stream to a file; do not mask IOExceptions.
* If the filename is null or "-", return System.in
- * If the filename ends in .gz, wrap in GZIPInputStream
+ * If the filename ends in .gz, wrap in GZIPInputStream
* @param filename
* @throws FileNotFoundException
* @throws IOException
@@ -77,10 +81,28 @@ public class IO
filename = IRILib.decode(filename) ;
}
InputStream in = new FileInputStream(filename) ;
- if ( filename.endsWith(".gz") )
- in = new GZIPInputStream(in) ;
+ String ext = FileOps.extension(filename);
+ switch ( ext ) {
+ case "": return in;
+ case "gz": return new GZIPInputStream(in) ;
+ case "bz2": return new BZip2CompressorInputStream(in);
+ case "sz": return new SnappyCompressorInputStream(in);
+ }
return in ;
}
+
+ private static String[] extensions = { ".gz", ".bz2", ".sz" };
+
+ /** The filename without any compression extension, or the original filename.
+ * It tests for compression types handled by {@link #openFileEx}.
+ */
+ static public String filenameNoCompression(String filename) {
+ for ( String ext : extensions ) {
+ if ( filename.endsWith(ext) )
+ return filename.substring(0, filename.length()-ext.length());
+ }
+ return filename;
+ }
/** Open a UTF8 Reader for a file.
* If the filename is null or "-", use System.in
@@ -134,11 +156,8 @@ public class IO
}
/** Open a file for output - may include adding gzip processing. */
- static public OutputStream openOutputFile(String filename)
- {
- try {
- return openOutputFileEx(filename) ;
- }
+ static public OutputStream openOutputFile(String filename) {
+ try { return openOutputFileEx(filename) ; }
catch (IOException ex) { IO.exception(ex) ; return null ; }
}
@@ -158,15 +177,18 @@ public class IO
filename = IRILib.decode(filename) ;
}
OutputStream out = new FileOutputStream(filename) ;
- if ( filename.endsWith(".gz") )
- out = new GZIPOutputStream(out) ;
+ String ext = FileOps.extension(filename);
+ switch ( ext ) {
+ case "": return out;
+ case "gz": return new GZIPOutputStream(out) ;
+ case "bz2": return new BZip2CompressorOutputStream(out);
+ case "sz": throw new UnsupportedOperationException("Snappy output");
+ }
return out ;
}
/** Wrap in a general writer interface */
- static public AWriter wrap(Writer w) {
- return Writer2.wrap(w) ;
- }
+ static public AWriter wrap(Writer w) { return Writer2.wrap(w) ; }
/** Wrap in a general writer interface */
static public AWriter wrapUTF8(OutputStream out) { return wrap(asUTF8(out)) ; }
@@ -343,5 +365,4 @@ public class IO
return null ;
}
}
-
}
http://git-wip-us.apache.org/repos/asf/jena/blob/f88fbc57/jena-base/src/test/java/org/apache/jena/atlas/io/TS_IO.java
----------------------------------------------------------------------
diff --git a/jena-base/src/test/java/org/apache/jena/atlas/io/TS_IO.java b/jena-base/src/test/java/org/apache/jena/atlas/io/TS_IO.java
index 4479243..085cbf3 100644
--- a/jena-base/src/test/java/org/apache/jena/atlas/io/TS_IO.java
+++ b/jena-base/src/test/java/org/apache/jena/atlas/io/TS_IO.java
@@ -37,6 +37,7 @@ import org.junit.runners.Suite ;
// Writers
, TestBufferingWriter.class
// Other
+ , TestFilenameExtensions.class
, TestPrintUtils.class
} )
public class TS_IO
http://git-wip-us.apache.org/repos/asf/jena/blob/f88fbc57/jena-base/src/test/java/org/apache/jena/atlas/io/TestFilenameExtensions.java
----------------------------------------------------------------------
diff --git a/jena-base/src/test/java/org/apache/jena/atlas/io/TestFilenameExtensions.java b/jena-base/src/test/java/org/apache/jena/atlas/io/TestFilenameExtensions.java
new file mode 100644
index 0000000..edb6848
--- /dev/null
+++ b/jena-base/src/test/java/org/apache/jena/atlas/io/TestFilenameExtensions.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jena.atlas.io;
+
+import static org.junit.Assert.assertEquals;
+
+import org.junit.Test;
+
+public class TestFilenameExtensions {
+
+ @Test public void ext_1() {
+ String fn1 = "file.txt";
+ String fn2 = IO.filenameNoCompression(fn1);
+ assertEquals(fn1, fn2);
+ }
+
+ @Test public void ext_2() {
+ String fn1 = "a/b/file.gz";
+ String fn2 = IO.filenameNoCompression(fn1);
+ assertEquals("a/b/file", fn2);
+ }
+
+ @Test public void ext_3() {
+ String fn1 = "file.ttl.bz2";
+ String fn2 = IO.filenameNoCompression(fn1);
+ assertEquals("file.ttl", fn2);
+ }
+
+ @Test public void ext_4() {
+ String fn1 = "file.txt.gz";
+ String fn2 = IO.filenameNoCompression(fn1);
+ assertEquals("file.txt", fn2);
+ }
+
+ @Test public void ext_5() {
+ String fn1 = "a/b/file.ttl.bz2";
+ String fn2 = IO.filenameNoCompression(fn1);
+ assertEquals("a/b/file.ttl", fn2);
+ }
+}