You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by ss...@apache.org on 2014/02/05 18:17:43 UTC

git commit: added 7z support to loader

Updated Branches:
  refs/heads/develop b72d342dd -> f53b62551


added 7z support to loader


Project: http://git-wip-us.apache.org/repos/asf/marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/marmotta/commit/f53b6255
Tree: http://git-wip-us.apache.org/repos/asf/marmotta/tree/f53b6255
Diff: http://git-wip-us.apache.org/repos/asf/marmotta/diff/f53b6255

Branch: refs/heads/develop
Commit: f53b62551b7ad0bd99f245e8b50645d0bf8a76d9
Parents: b72d342
Author: Sebastian Schaffert <ss...@apache.org>
Authored: Wed Feb 5 18:17:37 2014 +0100
Committer: Sebastian Schaffert <ss...@apache.org>
Committed: Wed Feb 5 18:17:37 2014 +0100

----------------------------------------------------------------------
 .../marmotta/loader/core/MarmottaLoader.java    | 128 ++++++++++++++-----
 .../marmotta/loader/core/test/ArchiveTest.java  |   3 +-
 .../loader/core/test/LoaderTestBase.java        |   2 +-
 .../src/test/resources/demo-data.7z             | Bin 0 -> 1423 bytes
 4 files changed, 97 insertions(+), 36 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/marmotta/blob/f53b6255/loader/marmotta-loader-core/src/main/java/org/apache/marmotta/loader/core/MarmottaLoader.java
----------------------------------------------------------------------
diff --git a/loader/marmotta-loader-core/src/main/java/org/apache/marmotta/loader/core/MarmottaLoader.java b/loader/marmotta-loader-core/src/main/java/org/apache/marmotta/loader/core/MarmottaLoader.java
index 38e8316..cceb297 100644
--- a/loader/marmotta-loader-core/src/main/java/org/apache/marmotta/loader/core/MarmottaLoader.java
+++ b/loader/marmotta-loader-core/src/main/java/org/apache/marmotta/loader/core/MarmottaLoader.java
@@ -9,6 +9,8 @@ import org.apache.commons.compress.archivers.ArchiveException;
 import org.apache.commons.compress.archivers.ArchiveInputStream;
 import org.apache.commons.compress.archivers.ArchiveStreamFactory;
 import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream;
+import org.apache.commons.compress.archivers.sevenz.SevenZArchiveEntry;
+import org.apache.commons.compress.archivers.sevenz.SevenZFile;
 import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
 import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
 import org.apache.commons.compress.compressors.CompressorException;
@@ -269,52 +271,107 @@ public class MarmottaLoader {
         log.info("loading files in archive {} ...", archive);
 
         if(archive.exists() && archive.canRead()) {
-            InputStream in;
 
-            String archiveCompression = detectCompression(archive);
-            InputStream fin = new BufferedInputStream(new FileInputStream(archive));
-            if(archiveCompression != null) {
-                if (CompressorStreamFactory.GZIP.equalsIgnoreCase(archiveCompression)) {
-                    log.info("auto-detected archive compression: GZIP");
-                    in = new GzipCompressorInputStream(fin,true);
-                } else if (CompressorStreamFactory.BZIP2.equalsIgnoreCase(archiveCompression)) {
-                    log.info("auto-detected archive compression: BZIP2");
-                    in = new BZip2CompressorInputStream(fin, true);
+            if(archive.getName().endsWith("7z")) {
+                log.info("auto-detected archive format: 7Z");
+
+                final SevenZFile sevenZFile = new SevenZFile(archive);
+
+                try {
+                    SevenZArchiveEntry entry;
+                    while( (entry = sevenZFile.getNextEntry()) != null) {
+
+                        if(! entry.isDirectory()) {
+                            log.info("loading entry {} ...", entry.getName());
+
+                            // detect the file format
+                            RDFFormat detectedFormat = RDFFormat.forFileName(entry.getName());
+                            if(format == null) {
+                                if(detectedFormat != null) {
+                                    log.info("auto-detected entry format: {}", detectedFormat.getName());
+                                    format = detectedFormat;
+                                } else {
+                                    throw new RDFParseException("could not detect input format of entry "+ entry.getName());
+                                }
+                            } else {
+                                if(detectedFormat != null && !format.equals(detectedFormat)) {
+                                    log.warn("user-specified entry format ({}) overrides auto-detected format ({})", format.getName(), detectedFormat.getName());
+                                } else {
+                                    log.info("user-specified entry format: {}", format.getName());
+                                }
+                            }
+
+
+                            load(new InputStream() {
+                                @Override
+                                public int read() throws IOException {
+                                    return sevenZFile.read();
+                                }
+
+                                @Override
+                                public int read(byte[] b) throws IOException {
+                                    return sevenZFile.read(b);
+                                }
+
+                                @Override
+                                public int read(byte[] b, int off, int len) throws IOException {
+                                    return sevenZFile.read(b, off, len);
+                                }
+                            },handler,format);
+                        }
+                    }
+                } finally {
+                    sevenZFile.close();
+                }
+
+            } else {
+                InputStream in;
+
+                String archiveCompression = detectCompression(archive);
+                InputStream fin = new BufferedInputStream(new FileInputStream(archive));
+                if(archiveCompression != null) {
+                    if (CompressorStreamFactory.GZIP.equalsIgnoreCase(archiveCompression)) {
+                        log.info("auto-detected archive compression: GZIP");
+                        in = new GzipCompressorInputStream(fin,true);
+                    } else if (CompressorStreamFactory.BZIP2.equalsIgnoreCase(archiveCompression)) {
+                        log.info("auto-detected archive compression: BZIP2");
+                        in = new BZip2CompressorInputStream(fin, true);
+                    } else {
+                        in = fin;
+                    }
                 } else {
                     in = fin;
                 }
-            } else {
-                in = fin;
-            }
 
-            ArchiveInputStream zipStream = new ArchiveStreamFactory().createArchiveInputStream(new BufferedInputStream(in));
-            logArchiveType(zipStream);
+                ArchiveInputStream zipStream = new ArchiveStreamFactory().createArchiveInputStream(new BufferedInputStream(in));
+                logArchiveType(zipStream);
 
-            ArchiveEntry entry;
-            while( (entry = zipStream.getNextEntry()) != null) {
+                ArchiveEntry entry;
+                while( (entry = zipStream.getNextEntry()) != null) {
 
-                if(! entry.isDirectory()) {
-                    log.info("loading entry {} ...", entry.getName());
+                    if(! entry.isDirectory()) {
+                        log.info("loading entry {} ...", entry.getName());
 
-                    // detect the file format
-                    RDFFormat detectedFormat = RDFFormat.forFileName(entry.getName());
-                    if(format == null) {
-                        if(detectedFormat != null) {
-                            log.info("auto-detected entry format: {}", detectedFormat.getName());
-                            format = detectedFormat;
-                        } else {
-                            throw new RDFParseException("could not detect input format of entry "+ entry.getName());
-                        }
-                    } else {
-                        if(detectedFormat != null && !format.equals(detectedFormat)) {
-                            log.warn("user-specified entry format ({}) overrides auto-detected format ({})", format.getName(), detectedFormat.getName());
+                        // detect the file format
+                        RDFFormat detectedFormat = RDFFormat.forFileName(entry.getName());
+                        if(format == null) {
+                            if(detectedFormat != null) {
+                                log.info("auto-detected entry format: {}", detectedFormat.getName());
+                                format = detectedFormat;
+                            } else {
+                                throw new RDFParseException("could not detect input format of entry "+ entry.getName());
+                            }
                         } else {
-                            log.info("user-specified entry format: {}", format.getName());
+                            if(detectedFormat != null && !format.equals(detectedFormat)) {
+                                log.warn("user-specified entry format ({}) overrides auto-detected format ({})", format.getName(), detectedFormat.getName());
+                            } else {
+                                log.info("user-specified entry format: {}", format.getName());
+                            }
                         }
-                    }
 
 
-                    load(zipStream,handler,format);
+                        load(zipStream,handler,format);
+                    }
                 }
             }
 
@@ -324,6 +381,7 @@ public class MarmottaLoader {
 
     }
 
+
     private void logArchiveType(ArchiveInputStream stream) {
         if(log.isInfoEnabled()) {
             if(stream instanceof ZipArchiveInputStream) {
@@ -332,6 +390,8 @@ public class MarmottaLoader {
                 log.info("auto-detected archive format: TAR");
             } else if (stream instanceof CpioArchiveInputStream) {
                 log.info("auto-detected archive format: CPIO");
+            } else if (stream instanceof CpioArchiveInputStream) {
+                log.info("auto-detected archive format: CPIO");
             } else {
                 log.info("unknown archive format, relying on commons-compress");
             }

http://git-wip-us.apache.org/repos/asf/marmotta/blob/f53b6255/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/ArchiveTest.java
----------------------------------------------------------------------
diff --git a/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/ArchiveTest.java b/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/ArchiveTest.java
index 9e9f9e8..a477a59 100644
--- a/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/ArchiveTest.java
+++ b/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/ArchiveTest.java
@@ -50,7 +50,8 @@ public class ArchiveTest extends LoaderTestBase {
     public static Collection<Object[]> data() {
         Object[][] data = new Object[][] {
                 { "demo-data.tar.gz"},
-                { "demo-data.zip"}
+                { "demo-data.zip"},
+                { "demo-data.7z"}
         };
         return Arrays.asList(data);
     }

http://git-wip-us.apache.org/repos/asf/marmotta/blob/f53b6255/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/LoaderTestBase.java
----------------------------------------------------------------------
diff --git a/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/LoaderTestBase.java b/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/LoaderTestBase.java
index 7cd3872..c4dfc1b 100644
--- a/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/LoaderTestBase.java
+++ b/loader/marmotta-loader-core/src/test/java/org/apache/marmotta/loader/core/test/LoaderTestBase.java
@@ -65,7 +65,7 @@ public abstract class LoaderTestBase {
 
         log.info("running loader tests from temporary directory {}", tempDir);
 
-        for(String filename : new String[] {"demo-data.rdf", "demo-data.rdf.gz", "demo-data.rdf.bz2", "demo-data.tar.gz", "demo-data.zip"}) {
+        for(String filename : new String[] {"demo-data.rdf", "demo-data.rdf.gz", "demo-data.rdf.bz2", "demo-data.tar.gz", "demo-data.zip", "demo-data.7z"}) {
             File data = new File(tempDir.toFile(), filename);
             FileUtils.copyInputStreamToFile(ArchiveTest.class.getResourceAsStream("/" + filename), data);
         }

http://git-wip-us.apache.org/repos/asf/marmotta/blob/f53b6255/loader/marmotta-loader-core/src/test/resources/demo-data.7z
----------------------------------------------------------------------
diff --git a/loader/marmotta-loader-core/src/test/resources/demo-data.7z b/loader/marmotta-loader-core/src/test/resources/demo-data.7z
new file mode 100644
index 0000000..b6cfb8f
Binary files /dev/null and b/loader/marmotta-loader-core/src/test/resources/demo-data.7z differ