You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@marmotta.apache.org by ss...@apache.org on 2014/03/18 10:30:51 UTC

[1/2] git commit: compress cached representation of very long literals

Repository: marmotta
Updated Branches:
  refs/heads/develop 47ee5ac97 -> c5828bb06


compress cached representation of very long literals


Project: http://git-wip-us.apache.org/repos/asf/marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/marmotta/commit/50414517
Tree: http://git-wip-us.apache.org/repos/asf/marmotta/tree/50414517
Diff: http://git-wip-us.apache.org/repos/asf/marmotta/diff/50414517

Branch: refs/heads/develop
Commit: 504145177765d61e2087809d7970a839de328e2d
Parents: 5d383da
Author: Sebastian Schaffert <ss...@apache.org>
Authored: Tue Mar 18 10:30:36 2014 +0100
Committer: Sebastian Schaffert <ss...@apache.org>
Committed: Tue Mar 18 10:30:36 2014 +0100

----------------------------------------------------------------------
 .../test/externalizer/ExternalizerTest.java     |  6 ++
 .../org/apache/marmotta/kiwi/io/KiWiIO.java     | 86 +++++++++++++++++++-
 2 files changed, 90 insertions(+), 2 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/marmotta/blob/50414517/libraries/kiwi/kiwi-caching-infinispan/src/test/java/org/apache/marmotta/kiwi/test/externalizer/ExternalizerTest.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-caching-infinispan/src/test/java/org/apache/marmotta/kiwi/test/externalizer/ExternalizerTest.java b/libraries/kiwi/kiwi-caching-infinispan/src/test/java/org/apache/marmotta/kiwi/test/externalizer/ExternalizerTest.java
index 7b6bb85..6cf8184 100644
--- a/libraries/kiwi/kiwi-caching-infinispan/src/test/java/org/apache/marmotta/kiwi/test/externalizer/ExternalizerTest.java
+++ b/libraries/kiwi/kiwi-caching-infinispan/src/test/java/org/apache/marmotta/kiwi/test/externalizer/ExternalizerTest.java
@@ -119,6 +119,12 @@ public class ExternalizerTest {
     }
 
     @Test
+    public void testLongStringLiteral() throws Exception {
+        marshall((KiWiStringLiteral) valueFactory.createLiteral(RandomStringUtils.random(1000)), new StringLiteralExternalizer());
+    }
+
+
+    @Test
     public void testLangLiteral() throws Exception {
         marshall((KiWiStringLiteral) valueFactory.createLiteral(RandomStringUtils.randomAscii(40),"en"), new StringLiteralExternalizer());
     }

http://git-wip-us.apache.org/repos/asf/marmotta/blob/50414517/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/io/KiWiIO.java
----------------------------------------------------------------------
diff --git a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/io/KiWiIO.java b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/io/KiWiIO.java
index df175fc..2cce40b 100644
--- a/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/io/KiWiIO.java
+++ b/libraries/kiwi/kiwi-triplestore/src/main/java/org/apache/marmotta/kiwi/io/KiWiIO.java
@@ -22,6 +22,8 @@ import org.apache.marmotta.commons.io.DataIO;
 import org.apache.marmotta.commons.vocabulary.XSD;
 import org.apache.marmotta.kiwi.model.rdf.*;
 import org.openrdf.model.vocabulary.*;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import java.io.DataInput;
 import java.io.DataOutput;
@@ -30,6 +32,9 @@ import java.util.Date;
 import java.util.HashMap;
 import java.util.Locale;
 import java.util.Map;
+import java.util.zip.DataFormatException;
+import java.util.zip.Deflater;
+import java.util.zip.Inflater;
 
 /**
  * Add file description here!
@@ -38,6 +43,13 @@ import java.util.Map;
  */
 public class KiWiIO {
 
+    private static Logger log = LoggerFactory.getLogger(KiWiIO.class);
+
+    /**
+     * Minimum length of content where we start using compression.
+     */
+    private static final int LITERAL_COMPRESS_LENGTH = 500;
+
     private static final int PREFIX_UNKNOWN = 0;
     private static final int PREFIX_XSD     = 1;
     private static final int PREFIX_RDF     = 2;
@@ -533,7 +545,7 @@ public class KiWiIO {
             out.writeLong(-1L);
         } else {
             out.writeLong(literal.getId());
-            DataIO.writeString(out, literal.getContent());
+            writeContent(out, literal.getContent());
             if(langTable.containsKey(literal.getLanguage())) {
                 out.writeByte(langTable.get(literal.getLanguage()));
             } else {
@@ -560,7 +572,7 @@ public class KiWiIO {
         if(id == -1) {
             return null;
         } else {
-            String content = DataIO.readString(input);
+            String content = readContent(input);
             byte   langB   = input.readByte();
             String lang;
 
@@ -729,4 +741,74 @@ public class KiWiIO {
         return result;
 
     }
+
+
+    private static String readContent(DataInput in) throws IOException {
+        int mode = in.readByte();
+
+        if(mode == MODE_COMPRESSED) {
+            try {
+                int strlen = in.readInt();
+                int buflen = in.readInt();
+
+                byte[] buffer = new byte[buflen];
+                in.readFully(buffer);
+
+                Inflater decompressor = new Inflater(true);
+                decompressor.setInput(buffer);
+
+                byte[] data = new byte[strlen];
+                decompressor.inflate(data);
+                decompressor.end();
+
+                return new String(data,"UTF-8");
+            } catch(DataFormatException ex) {
+                throw new IllegalStateException("input data is not valid",ex);
+            }
+        } else {
+            return DataIO.readString(in);
+        }
+    }
+
+    /**
+     * Write a string to the data output. In case the string length exceeds LITERAL_COMPRESS_LENGTH, uses a LZW
+     * compressed format, otherwise writes the plain bytes.
+     *
+     * @param out      output destination to write to
+     * @param content  string to write
+     * @throws IOException
+     */
+    private static void writeContent(DataOutput out, String content) throws IOException {
+        if(content.length() > LITERAL_COMPRESS_LENGTH) {
+            // temporary buffer of the size of bytes in the content string (assuming that the compressed data will fit into it)
+            byte[] data   = content.getBytes("UTF-8");
+            byte[] buffer = new byte[data.length];
+
+            Deflater compressor = new Deflater(Deflater.BEST_COMPRESSION, true);
+            compressor.setInput(data);
+            compressor.finish();
+
+            int length = compressor.deflate(buffer);
+
+            // only use compressed version if it is smaller than the number of bytes used by the string
+            if(length < buffer.length) {
+                log.debug("compressed string with {} bytes; compression ratio {}", data.length, (double)length/data.length);
+
+                out.writeByte(MODE_COMPRESSED);
+                out.writeInt(data.length);
+                out.writeInt(length);
+                out.write(buffer,0,length);
+            } else {
+                log.warn("compressed length exceeds string buffer: {} > {}", length, buffer.length);
+
+                out.writeByte(MODE_DEFAULT);
+                DataIO.writeString(out,content);
+            }
+
+            compressor.end();
+        } else {
+            out.writeByte(MODE_DEFAULT);
+            DataIO.writeString(out,content);
+        }
+    }
 }


[2/2] git commit: Merge remote-tracking branch 'origin/develop' into develop

Posted by ss...@apache.org.
Merge remote-tracking branch 'origin/develop' into develop


Project: http://git-wip-us.apache.org/repos/asf/marmotta/repo
Commit: http://git-wip-us.apache.org/repos/asf/marmotta/commit/c5828bb0
Tree: http://git-wip-us.apache.org/repos/asf/marmotta/tree/c5828bb0
Diff: http://git-wip-us.apache.org/repos/asf/marmotta/diff/c5828bb0

Branch: refs/heads/develop
Commit: c5828bb06b4f3e907c00209759455c0ab88ccead
Parents: 5041451 47ee5ac
Author: Sebastian Schaffert <ss...@apache.org>
Authored: Tue Mar 18 10:30:49 2014 +0100
Committer: Sebastian Schaffert <ss...@apache.org>
Committed: Tue Mar 18 10:30:49 2014 +0100

----------------------------------------------------------------------
 .../src/main/resources/installer/LICENSE.txt    |   1 +
 .../backend/kiwi/DatabaseWebService.java        |   2 +-
 .../core/api/jaxrs/InterceptorService.java      |  31 +++++
 .../platform/core/jaxrs/CDIExceptionMapper.java |  28 -----
 .../platform/core/jaxrs/ErrorResponse.java      | 126 -------------------
 .../core/jaxrs/HttpErrorExceptionMapper.java    |  91 --------------
 .../core/jaxrs/IllegalStateExceptionMapper.java |  47 -------
 .../core/jaxrs/MarmottaExceptionMapper.java     |  49 --------
 .../jaxrs/MarmottaImportExceptionMapper.java    |  48 -------
 .../core/jaxrs/RepositoryExceptionMapper.java   |  50 --------
 .../core/jaxrs/URISyntaxExceptionMapper.java    |  47 -------
 .../UnsupportedOperationExceptionMapper.java    |  47 -------
 .../exceptionmappers/CDIExceptionMapper.java    |  28 +++++
 .../jaxrs/exceptionmappers/ErrorResponse.java   | 126 +++++++++++++++++++
 .../HttpErrorExceptionMapper.java               |  91 ++++++++++++++
 .../IllegalStateExceptionMapper.java            |  46 +++++++
 .../MarmottaExceptionMapper.java                |  48 +++++++
 .../MarmottaImportExceptionMapper.java          |  48 +++++++
 .../RepositoryExceptionMapper.java              |  49 ++++++++
 .../URISyntaxExceptionMapper.java               |  47 +++++++
 .../UnsupportedOperationExceptionMapper.java    |  46 +++++++
 .../core/jaxrs/interceptors/CDIInterceptor.java |  30 +++++
 .../jaxrs/interceptors/JsonPInterceptor.java    |  45 +++++++
 .../jaxrs/ExceptionMapperServiceImpl.java       |   2 +-
 .../services/jaxrs/InterceptorServiceImpl.java  |  73 +++++++++++
 .../templating/TemplatingServiceImpl.java       |   2 +
 .../webservices/system/SystemWebService.java    |   2 +-
 .../platform/core/test/base/JettyMarmotta.java  |  12 +-
 platform/marmotta-versioning-kiwi/pom.xml       |   8 ++
 .../resources/templates/memento_timemap.ftl     |  44 ++++++-
 30 files changed, 776 insertions(+), 538 deletions(-)
----------------------------------------------------------------------