You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by ha...@apache.org on 2018/04/02 17:22:37 UTC
any23 git commit: ANY23-336 Hacky patch to tide us over until
jsonldjava 0.11.2 release
Repository: any23
Updated Branches:
refs/heads/master 60d6f6164 -> 205cfe442
ANY23-336 Hacky patch to tide us over until jsonldjava 0.11.2 release
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/205cfe44
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/205cfe44
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/205cfe44
Branch: refs/heads/master
Commit: 205cfe442c427dee6caae806d8293f7bfaec5e74
Parents: 60d6f61
Author: Hans <fi...@gmail.com>
Authored: Mon Apr 2 03:05:34 2018 -0500
Committer: Hans <fi...@gmail.com>
Committed: Mon Apr 2 12:11:26 2018 -0500
----------------------------------------------------------------------
.../any23/extractor/rdf/JSONLDExtractor.java | 91 ++++++++++++++++++++
.../extractor/rdf/JSONLDExtractorTest.java | 23 +++++
2 files changed, 114 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/205cfe44/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
index 96067b8..a073a21 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
@@ -17,10 +17,27 @@
package org.apache.any23.extractor.rdf;
+import com.github.jsonldjava.utils.JarCacheStorage;
+import com.github.jsonldjava.utils.JsonUtils;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.http.client.cache.HttpCacheEntry;
+import org.apache.http.client.cache.HttpCacheStorage;
+import org.apache.http.client.protocol.RequestAcceptEncoding;
+import org.apache.http.client.protocol.ResponseContentEncoding;
+import org.apache.http.impl.client.CloseableHttpClient;
+import org.apache.http.impl.client.DefaultRedirectStrategy;
+import org.apache.http.impl.client.cache.BasicHttpCacheStorage;
+import org.apache.http.impl.client.cache.CacheConfig;
+import org.apache.http.impl.client.cache.CachingHttpClientBuilder;
import org.eclipse.rdf4j.rio.RDFParser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.lang.reflect.Field;
/**
* Concrete implementation of {@link org.apache.any23.extractor.Extractor.ContentExtractor}
@@ -29,6 +46,80 @@ import org.eclipse.rdf4j.rio.RDFParser;
*/
public class JSONLDExtractor extends BaseRDFExtractor {
+ private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ //TODO: the static members of this class can be removed once jsonldjava 0.11.2 is released
+ //See https://issues.apache.org/jira/browse/ANY23-336
+ static final boolean needsHttpClientSwap;
+
+ static {
+ if (!(needsHttpClientSwap = isHttpClientSwapNeeded())) {
+ LOG.warn("The static members of this class are no longer needed.");
+ } else {
+ try {
+ Field field = JsonUtils.class.getDeclaredField("DEFAULT_HTTP_CLIENT");
+ field.setAccessible(true);
+ field.set(null, createDefaultHttpClient());
+ } catch (Throwable e) {
+ LOG.warn("failed to swap jsonldjava http client", e);
+ }
+ }
+ }
+
+ private static boolean isHttpClientSwapNeeded() {
+ try {
+ JsonUtils.class.getDeclaredField("JSONLD_JAVA_USER_AGENT");
+ return false;
+ } catch (Throwable th) {
+ return true;
+ }
+ }
+
+ private static CloseableHttpClient createDefaultHttpClient() {
+ // Common CacheConfig for both the JarCacheStorage and the underlying
+ // BasicHttpCacheStorage
+ final CacheConfig cacheConfig = CacheConfig.custom().setMaxCacheEntries(500)
+ .setMaxObjectSize(1024 * 256).setSharedCache(false)
+ .setHeuristicCachingEnabled(true).setHeuristicDefaultLifetime(86400).build();
+
+ final CloseableHttpClient result = CachingHttpClientBuilder.create()
+ // allow caching
+ .setCacheConfig(cacheConfig)
+ // Wrap the local JarCacheStorage around a BasicHttpCacheStorage
+ .setHttpCacheStorage(new JarCacheStorage0(null, cacheConfig,
+ new BasicHttpCacheStorage(cacheConfig)))
+ // Support compressed data
+ // https://wayback.archive.org/web/20130901115452/http://hc.apache.org:80/httpcomponents-client-ga/tutorial/html/httpagent.html#d5e1238
+ .addInterceptorFirst(new RequestAcceptEncoding())
+ .addInterceptorFirst(new ResponseContentEncoding())
+ .setRedirectStrategy(DefaultRedirectStrategy.INSTANCE)
+ // use system defaults for proxy etc.
+ .useSystemProperties().build();
+
+ return result;
+ }
+
+ private static class JarCacheStorage0 extends JarCacheStorage {
+
+ private final HttpCacheStorage delegate;
+
+ public JarCacheStorage0(ClassLoader classLoader, CacheConfig cacheConfig,
+ HttpCacheStorage delegate) {
+ super(classLoader, cacheConfig, delegate);
+ this.delegate = delegate;
+ }
+
+ @Override
+ public HttpCacheEntry getEntry(String key) throws IOException {
+ HttpCacheEntry entry = delegate.getEntry(key);
+ return entry != null ? entry : super.getEntry(key);
+ }
+ }
+
+
+
+
+
public JSONLDExtractor(boolean verifyDataType, boolean stopAtFirstError) {
super(verifyDataType, stopAtFirstError);
}
http://git-wip-us.apache.org/repos/asf/any23/blob/205cfe44/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
index 1e9aa6f..fcef3e4 100644
--- a/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
@@ -19,6 +19,7 @@ package org.apache.any23.extractor.rdf;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
+import com.github.jsonldjava.core.DocumentLoader;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
@@ -29,6 +30,7 @@ import org.apache.any23.writer.RDFXMLWriter;
import org.apache.any23.writer.TripleHandler;
import org.apache.any23.writer.TripleHandlerException;
import org.junit.After;
+import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.eclipse.rdf4j.model.IRI;
@@ -56,6 +58,27 @@ public class JSONLDExtractorTest {
}
@Test
+ public void testRemoteContextCaching() throws Exception {
+ Assert.assertTrue("The static members of " + JSONLDExtractor.class + " can now be removed!",
+ JSONLDExtractor.needsHttpClientSwap);
+ DocumentLoader documentLoader = new DocumentLoader();
+ final String[] urls = {"http://schema.org/", "http://schema.org/docs/jsonldcontext.json"};
+ for (String url : urls) {
+ long start = System.currentTimeMillis();
+ for (int i = 1; i <= 10000; i++) {
+ documentLoader.loadDocument(url);
+
+ long seconds = (System.currentTimeMillis() - start) / 1000;
+
+ if (seconds > 60) {
+ Assert.fail(String.format("Took %s seconds to access %s %s times", seconds, url, i));
+ break;
+ }
+ }
+ }
+ }
+
+ @Test
public void testExtractFromJSONLDDocument()
throws IOException, ExtractionException, TripleHandlerException {
final IRI uri = RDFUtils.iri("http://host.com/place-example.jsonld");