You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2014/04/04 01:05:10 UTC
[5/6] git commit: ANY23-177 Add support for JSON-LD
ANY23-177 Add support for JSON-LD
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/47278c16
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/47278c16
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/47278c16
Branch: refs/heads/master
Commit: 47278c1649050f107b615e00a51db00a356c7ca6
Parents: ebfbbec
Author: Lewis John McGibbney <le...@apache.org>
Authored: Tue Mar 25 19:26:47 2014 +0000
Committer: Lewis John McGibbney <le...@apache.org>
Committed: Tue Mar 25 19:26:47 2014 +0000
----------------------------------------------------------------------
core/pom.xml | 7 +
.../any23/extractor/akn/AKNExtractor.java | 50 -
.../extractor/akn/AKNExtractorFactory.java | 54 -
.../apache/any23/extractor/akn/AKNParser.java | 33 -
.../any23/extractor/akn/package-info.java | 27 -
.../any23/extractor/rdf/BaseRDFExtractor.java | 2 -
.../any23/extractor/rdf/JSONLDExtractor.java | 51 +
.../extractor/rdf/JSONLDExtractorFactory.java | 59 ++
.../any23/extractor/rdf/RDFParserFactory.java | 19 +
.../any23/extractor/rdf/example-jsonld.jsonld | 17 +
.../extractor/rdf/JSONLDExtractorTest.java | 97 ++
pom.xml | 8 +
.../any23/extractor/rdf/embedded_json-ld.html | 981 +++++++++++++++++++
.../any23/extractor/rdf/place-example.jsonld | 27 +
14 files changed, 1266 insertions(+), 166 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/47278c16/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index 6a7db29..e938a7c 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -134,6 +134,13 @@
<artifactId>sesame-repository-api</artifactId>
</dependency>
<!-- END: Sesame -->
+
+ <!-- BEGIN: Misc -->
+ <dependency>
+ <groupId>com.github.jsonld-java</groupId>
+ <artifactId>jsonld-java-sesame</artifactId>
+ </dependency>
+ <!-- END: Misc -->
<!-- BEGIN: Apache Commons, this version is hosted in the
any23-repository-external repository -->
http://git-wip-us.apache.org/repos/asf/any23/blob/47278c16/core/src/main/java/org/apache/any23/extractor/akn/AKNExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/akn/AKNExtractor.java b/core/src/main/java/org/apache/any23/extractor/akn/AKNExtractor.java
deleted file mode 100644
index e637276..0000000
--- a/core/src/main/java/org/apache/any23/extractor/akn/AKNExtractor.java
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.any23.extractor.akn;
-
-import java.io.IOException;
-
-import org.apache.any23.extractor.ExtractionContext;
-import org.apache.any23.extractor.ExtractionException;
-import org.apache.any23.extractor.ExtractionParameters;
-import org.apache.any23.extractor.ExtractionResult;
-import org.apache.any23.extractor.Extractor;
-import org.apache.any23.extractor.ExtractorDescription;
-import org.w3c.dom.Document;
-
-/**
- * Extractor for the <a href="http://www.akomtantoso.org">Akoma Ntoso</a>
- * XML Format.
- * @author lewismc
- *
- */
-public class AKNExtractor implements Extractor.TagSoupDOMExtractor {
-
- @Override
- public void run(ExtractionParameters extractionParameters, ExtractionContext context, Document in,
- ExtractionResult out) throws IOException, ExtractionException {
- // TODO Auto-generated method stub
-
- }
-
- @Override
- public ExtractorDescription getDescription() {
- // TODO Auto-generated method stub
- return null;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/47278c16/core/src/main/java/org/apache/any23/extractor/akn/AKNExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/akn/AKNExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/akn/AKNExtractorFactory.java
deleted file mode 100644
index bbd0a87..0000000
--- a/core/src/main/java/org/apache/any23/extractor/akn/AKNExtractorFactory.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.extractor.akn;
-
-import java.util.Arrays;
-
-import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
-import org.apache.any23.rdf.PopularPrefixes;
-import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
-
-/**
- * @author lewismc
- *
- */
-@MetaInfServices(ExtractorFactory.class)
-public class AKNExtractorFactory extends SimpleExtractorFactory<AKNExtractor> implements
- ExtractorFactory<AKNExtractor> {
-
- private static final ExtractorDescription descriptionInstance = new AKNExtractorFactory();
- private static final String NAME = "akomaNtoso";
- private static final Prefixes PREFIXES = PopularPrefixes.createSubset("akn", "AKN", "AKOMA");
-
- public AKNExtractorFactory() {
- super(AKNExtractorFactory.NAME,
- AKNExtractorFactory.PREFIXES);
- }
-
- @Override
- public AKNExtractor createExtractor() {
- return new AKNExtractor();
- }
-
- public static ExtractorDescription getDescriptionInstance() {
- return descriptionInstance;
- }
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/47278c16/core/src/main/java/org/apache/any23/extractor/akn/AKNParser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/akn/AKNParser.java b/core/src/main/java/org/apache/any23/extractor/akn/AKNParser.java
deleted file mode 100644
index 2320da2..0000000
--- a/core/src/main/java/org/apache/any23/extractor/akn/AKNParser.java
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.any23.extractor.akn;
-
-/**
- * This class provides utility methods for handling <b>Akoma Ntoso</b>
- * nodes contained within a <i>DOM</i> document.
- * @author lewismc
- */
-public class AKNParser {
-
- enum ErrorMode {
- /** This mode raises an exception at first encountered error. */
- StopAtFirstError,
- /** This mode produces a full error report. */
- FullReport
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/47278c16/core/src/main/java/org/apache/any23/extractor/akn/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/akn/package-info.java b/core/src/main/java/org/apache/any23/extractor/akn/package-info.java
deleted file mode 100644
index 508ee81..0000000
--- a/core/src/main/java/org/apache/any23/extractor/akn/package-info.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * <p>This package contains the definition of a specific
- * {@link org.apache.any23.extractor.Extractor} for <i>AkomaNtoso</i>
- * files.</p>
- * <p>Akoma Ntoso is an emerging legal document standard for representing
- * legislative and judicial documents in XML format.
- * @see http://www.akomtantoso.org
- * @see http://code.google.com/p/akomantoso
- * @author lewismc
- */
-package org.apache.any23.extractor.akn;
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/47278c16/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index 6dda7a9..18a30ca 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -28,8 +28,6 @@ import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.RioSetting;
import org.openrdf.rio.helpers.BasicParserSettings;
-import org.openrdf.rio.helpers.RDFParserBase;
-
import java.io.IOException;
import java.io.InputStream;
import java.util.HashSet;
http://git-wip-us.apache.org/repos/asf/any23/blob/47278c16/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
new file mode 100644
index 0000000..23a4d1e
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractor.java
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.rdf;
+
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractorDescription;
+import org.openrdf.rio.RDFParser;
+
+/**
+ * Concrete implementation of {@link org.apache.any23.extractor.Extractor.ContentExtractor}
+ * handling <a href="http://www.w3.org/TR/json-ld/">JSON-LD</a> format.
+ *
+ */
+public class JSONLDExtractor extends BaseRDFExtractor {
+
+ public JSONLDExtractor(boolean verifyDataType, boolean stopAtFirstError) {
+ super(verifyDataType, stopAtFirstError);
+ }
+
+ public JSONLDExtractor() {
+ this(false, false);
+ }
+
+ @Override
+ public ExtractorDescription getDescription() {
+ return JSONLDExtractorFactory.getDescriptionInstance();
+ }
+
+ @Override
+ protected RDFParser getParser(ExtractionContext extractionContext, ExtractionResult extractionResult) {
+ return RDFParserFactory.getInstance().getJSONLDParser(
+ isVerifyDataType(), isStopAtFirstError(), extractionContext, extractionResult
+ );
+ }
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/47278c16/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java
new file mode 100644
index 0000000..bedd200
--- /dev/null
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java
@@ -0,0 +1,59 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.any23.extractor.rdf;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class JSONLDExtractorFactory extends SimpleExtractorFactory<JSONLDExtractor> implements
+ ExtractorFactory<JSONLDExtractor> {
+
+ public static final String NAME = "rdf-jsonld";
+
+ public static final Prefixes PREFIXES = null;
+
+ private static final ExtractorDescription descriptionInstance = new JSONLDExtractorFactory();
+
+ public JSONLDExtractorFactory() {
+ super(
+ JSONLDExtractorFactory.NAME,
+ JSONLDExtractorFactory.PREFIXES,
+ Arrays.asList(
+ "application/ld+json;q=0.1"
+ ),
+ "example-jsonld.jsonld");
+ }
+
+ @Override
+ public JSONLDExtractor createExtractor() {
+ return new JSONLDExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/47278c16/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
index 606364b..575cebb 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
@@ -157,6 +157,25 @@ public class RDFParserFactory {
configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
return parser;
}
+
+ /**
+ * Returns a new instance of a configured {@link SesameJSONLDParser}.
+ * @param verifyDataType data verification enable if <code>true</code>.
+ * @param stopAtFirstError the parser stops at first error if <code>true</code>.
+ * @param extractionContext the extraction context where the parser is used.
+ * @param extractionResult the output extraction result.
+ * @return a new instance of a configured JSONLDParser parser.
+ */
+ public RDFParser getJSONLDParser(
+ final boolean verifyDataType,
+ final boolean stopAtFirstError,
+ final ExtractionContext extractionContext,
+ final ExtractionResult extractionResult
+ ) {
+ final RDFParser parser = Rio.createParser(RDFFormat.JSONLD);
+ configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
+ return parser;
+ }
/**
* Configures the given parser on the specified extraction result
http://git-wip-us.apache.org/repos/asf/any23/blob/47278c16/core/src/main/resources/org/apache/any23/extractor/rdf/example-jsonld.jsonld
----------------------------------------------------------------------
diff --git a/core/src/main/resources/org/apache/any23/extractor/rdf/example-jsonld.jsonld b/core/src/main/resources/org/apache/any23/extractor/rdf/example-jsonld.jsonld
new file mode 100644
index 0000000..8c25185
--- /dev/null
+++ b/core/src/main/resources/org/apache/any23/extractor/rdf/example-jsonld.jsonld
@@ -0,0 +1,17 @@
+{
+ "@context": {
+ "name": "http://xmlns.com/foaf/0.1/name",
+ "knows": "http://xmlns.com/foaf/0.1/knows"
+ },
+ "@id": "http://me.markus-lanthaler.com/",
+ "name": "Markus Lanthaler",
+ "knows": [
+ {
+ "@id": "http://manu.sporny.org/about#manu",
+ "name": "Manu Sporny"
+ },
+ {
+ "name": "Dave Longley"
+ }
+ ]
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/47278c16/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
new file mode 100644
index 0000000..d6b42ea
--- /dev/null
+++ b/core/src/test/java/org/apache/any23/extractor/rdf/JSONLDExtractorTest.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.any23.extractor.rdf;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.any23.extractor.ExtractionContext;
+import org.apache.any23.extractor.ExtractionException;
+import org.apache.any23.extractor.ExtractionParameters;
+import org.apache.any23.extractor.ExtractionResult;
+import org.apache.any23.extractor.ExtractionResultImpl;
+import org.apache.any23.rdf.RDFUtils;
+import org.apache.any23.writer.JSONWriter;
+import org.apache.any23.writer.RDFXMLWriter;
+import org.apache.any23.writer.TripleHandler;
+import org.apache.any23.writer.TripleHandlerException;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+import org.openrdf.model.URI;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Test case for {@link JSONLDExtractor}.
+ *
+ */
+public class JSONLDExtractorTest {
+
+ private static final Logger logger = LoggerFactory.getLogger(JSONLDExtractorTest.class);
+
+ private JSONLDExtractor extractor;
+
+ @Before
+ public void setUp() throws Exception {
+ extractor = new JSONLDExtractor();
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ extractor = null;
+ }
+
+ @Test
+ public void testExtractFromJSONLDDocuement()
+ throws IOException, ExtractionException, TripleHandlerException {
+ final URI uri = RDFUtils.uri("http://host.com/place-example.jsonld");
+ extract(uri, "/org/apache/any23/extractor/rdf/place-example.jsonld");
+ }
+
+ @Ignore("Need to verify if jsonld-java-sesame can extract from HTML")
+ @Test
+ public void testExtractFromHTMLDocument()
+ throws IOException, ExtractionException, TripleHandlerException {
+ final URI uri = RDFUtils.uri("http://host.com/embedded_json-ld.html");
+ extract(uri, "/org/apache/any23/extractor/rdf/embedded_json-ld.html");
+ }
+
+ public void extract(URI uri, String filePath)
+ throws IOException, ExtractionException, TripleHandlerException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ final TripleHandler tHandler = new JSONWriter(baos);
+ final ExtractionContext extractionContext = new ExtractionContext("json-ld-extractor", uri);
+ final ExtractionResult result = new ExtractionResultImpl(extractionContext, extractor, tHandler);
+ extractor.setStopAtFirstError(false);
+ try {
+ extractor.run(
+ ExtractionParameters.newDefault(),
+ extractionContext,
+ this.getClass().getResourceAsStream(filePath),
+ result
+ );
+ } finally {
+ logger.debug(baos.toString());
+ tHandler.close();
+ result.close();
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/any23/blob/47278c16/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 44c1dfa..0a03abc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -396,6 +396,14 @@
<version>0.3</version>
</dependency>
<!-- END: Sesame -->
+
+ <!-- BEGIN: Misc -->
+ <dependency>
+ <groupId>com.github.jsonld-java</groupId>
+ <artifactId>jsonld-java-sesame</artifactId>
+ <version>0.3</version>
+ </dependency>
+ <!-- END: Misc -->
<!-- BEGIN: Apache Commons -->
<dependency>