You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by ha...@apache.org on 2018/04/02 17:22:37 UTC

any23 git commit: ANY23-336 Hacky patch to tide us over until jsonldjava 0.11.2 release

Repository: any23
Updated Branches:
  refs/heads/master 60d6f6164 -> 205cfe442


ANY23-336 Hacky patch to tide us over until jsonldjava 0.11.2 release


Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/205cfe44
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/205cfe44
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/205cfe44

Branch: refs/heads/master
Commit: 205cfe442c427dee6caae806d8293f7bfaec5e74
Parents: 60d6f61
Author: Hans <fi...@gmail.com>
Authored: Mon Apr 2 03:05:34 2018 -0500
Committer: Hans <fi...@gmail.com>
Committed: Mon Apr 2 12:11:26 2018 -0500

----------------------------------------------------------------------
 .../any23/extractor/rdf/JSONLDExtractor.java    | 91 ++++++++++++++++++++
 .../extractor/rdf/JSONLDExtractorTest.java      | 23 +++++
 2 files changed, 114 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/any23/blob/205cfe44/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
index 96067b8..a073a21 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
@@ -17,10 +17,27 @@
 
 package org.apache.any23.extractor.rdf;
 
+import com.github.jsonldjava.utils.JarCacheStorage;
+import com.github.jsonldjava.utils.JsonUtils;
 import org.apache.any23.extractor.ExtractionContext;
 import org.apache.any23.extractor.ExtractionResult;
 import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.http.client.cache.HttpCacheEntry;
+import org.apache.http.client.cache.HttpCacheStorage;
+import org.apache.http.client.protocol.RequestAcceptEncoding;
+import org.apache.http.client.protocol.ResponseContentEncoding;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.DefaultRedirectStrategy;
+import org.apache.http.impl.client.cache.BasicHttpCacheStorage;
+import org.apache.http.impl.client.cache.CacheConfig;
+import org.apache.http.impl.client.cache.CachingHttpClientBuilder;
 import org.eclipse.rdf4j.rio.RDFParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.lang.reflect.Field;
 
 /**
  * Concrete implementation of {@link org.apache.any23.extractor.Extractor.ContentExtractor}
@@ -29,6 +46,80 @@ import org.eclipse.rdf4j.rio.RDFParser;
  */
 public class JSONLDExtractor extends BaseRDFExtractor {
 
+    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+    //TODO: the static members of this class can be removed once jsonldjava 0.11.2 is released
+    //See https://issues.apache.org/jira/browse/ANY23-336
+    static final boolean needsHttpClientSwap;
+
+    static {
+        if (!(needsHttpClientSwap = isHttpClientSwapNeeded())) {
+            LOG.warn("The static members of this class are no longer needed.");
+        } else {
+            try {
+                Field field = JsonUtils.class.getDeclaredField("DEFAULT_HTTP_CLIENT");
+                field.setAccessible(true);
+                field.set(null, createDefaultHttpClient());
+            } catch (Throwable e) {
+                LOG.warn("failed to swap jsonldjava http client", e);
+            }
+        }
+    }
+
+    private static boolean isHttpClientSwapNeeded() {
+        try {
+            JsonUtils.class.getDeclaredField("JSONLD_JAVA_USER_AGENT");
+            return false;
+        } catch (Throwable th) {
+            return true;
+        }
+    }
+
+    private static CloseableHttpClient createDefaultHttpClient() {
+        // Common CacheConfig for both the JarCacheStorage and the underlying
+        // BasicHttpCacheStorage
+        final CacheConfig cacheConfig = CacheConfig.custom().setMaxCacheEntries(500)
+                .setMaxObjectSize(1024 * 256).setSharedCache(false)
+                .setHeuristicCachingEnabled(true).setHeuristicDefaultLifetime(86400).build();
+
+        final CloseableHttpClient result = CachingHttpClientBuilder.create()
+                // allow caching
+                .setCacheConfig(cacheConfig)
+                // Wrap the local JarCacheStorage around a BasicHttpCacheStorage
+                .setHttpCacheStorage(new JarCacheStorage0(null, cacheConfig,
+                        new BasicHttpCacheStorage(cacheConfig)))
+                // Support compressed data
+                // https://wayback.archive.org/web/20130901115452/http://hc.apache.org:80/httpcomponents-client-ga/tutorial/html/httpagent.html#d5e1238
+                .addInterceptorFirst(new RequestAcceptEncoding())
+                .addInterceptorFirst(new ResponseContentEncoding())
+                .setRedirectStrategy(DefaultRedirectStrategy.INSTANCE)
+                // use system defaults for proxy etc.
+                .useSystemProperties().build();
+
+        return result;
+    }
+
+    private static class JarCacheStorage0 extends JarCacheStorage {
+
+        private final HttpCacheStorage delegate;
+
+        public JarCacheStorage0(ClassLoader classLoader, CacheConfig cacheConfig,
+                               HttpCacheStorage delegate) {
+            super(classLoader, cacheConfig, delegate);
+            this.delegate = delegate;
+        }
+
+        @Override
+        public HttpCacheEntry getEntry(String key) throws IOException {
+            HttpCacheEntry entry = delegate.getEntry(key);
+            return entry != null ? entry : super.getEntry(key);
+        }
+    }
+
+
+
+
+
     public JSONLDExtractor(boolean verifyDataType, boolean stopAtFirstError) {
         super(verifyDataType, stopAtFirstError);
     }

http://git-wip-us.apache.org/repos/asf/any23/blob/205cfe44/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
index 1e9aa6f..fcef3e4 100644
--- a/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
@@ -19,6 +19,7 @@ package org.apache.any23.extractor.rdf;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 
+import com.github.jsonldjava.core.DocumentLoader;
 import org.apache.any23.extractor.ExtractionContext;
 import org.apache.any23.extractor.ExtractionException;
 import org.apache.any23.extractor.ExtractionParameters;
@@ -29,6 +30,7 @@ import org.apache.any23.writer.RDFXMLWriter;
 import org.apache.any23.writer.TripleHandler;
 import org.apache.any23.writer.TripleHandlerException;
 import org.junit.After;
+import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
 import org.eclipse.rdf4j.model.IRI;
@@ -56,6 +58,27 @@ public class JSONLDExtractorTest {
   }
 
   @Test
+  public void testRemoteContextCaching() throws Exception {
+    Assert.assertTrue("The static members of " + JSONLDExtractor.class + " can now be removed!",
+            JSONLDExtractor.needsHttpClientSwap);
+    DocumentLoader documentLoader = new DocumentLoader();
+    final String[] urls = {"http://schema.org/", "http://schema.org/docs/jsonldcontext.json"};
+    for (String url : urls) {
+      long start = System.currentTimeMillis();
+      for (int i = 1; i <= 10000; i++) {
+        documentLoader.loadDocument(url);
+
+        long seconds = (System.currentTimeMillis() - start) / 1000;
+
+        if (seconds > 60) {
+          Assert.fail(String.format("Took %s seconds to access %s %s times", seconds, url, i));
+          break;
+        }
+      }
+    }
+  }
+
+  @Test
   public void testExtractFromJSONLDDocument() 
     throws IOException, ExtractionException, TripleHandlerException {
       final IRI uri = RDFUtils.iri("http://host.com/place-example.jsonld");