You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by an...@apache.org on 2017/01/13 22:25:41 UTC
[02/25] any23 git commit: ANY23-276 : Convert from Sesame to RDF4J
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsParserTest.java
----------------------------------------------------------------------
diff --git a/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsParserTest.java b/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsParserTest.java
deleted file mode 100644
index ba54b01..0000000
--- a/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsParserTest.java
+++ /dev/null
@@ -1,654 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.io.nquads;
-
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-import org.openrdf.model.BNode;
-import org.openrdf.model.Literal;
-import org.openrdf.model.Resource;
-import org.openrdf.model.Statement;
-import org.openrdf.model.URI;
-import org.openrdf.model.Value;
-import org.openrdf.model.impl.URIImpl;
-import org.openrdf.rio.ParseLocationListener;
-import org.openrdf.rio.RDFHandler;
-import org.openrdf.rio.RDFHandlerException;
-import org.openrdf.rio.RDFParseException;
-import org.openrdf.rio.RDFParser;
-import org.openrdf.rio.RioSetting;
-import org.openrdf.rio.helpers.BasicParserSettings;
-import org.openrdf.rio.helpers.NTriplesParserSettings;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.util.ArrayList;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-
-import static org.hamcrest.core.Is.is;
-
-/**
- * Test case for {@link NQuadsParser}.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-public class NQuadsParserTest {
-
- private static final Logger logger = LoggerFactory.getLogger(NQuadsParser.class);
-
- private NQuadsParser parser;
-
- private TestRDFHandler rdfHandler;
-
- @Before
- public void setUp() {
- parser = new NQuadsParser();
- rdfHandler = new TestRDFHandler();
- parser.setRDFHandler(rdfHandler);
- Set<RioSetting<?>> nonFatalErrors = new HashSet<RioSetting<?>>();
- parser.getParserConfig().setNonFatalErrors(nonFatalErrors);
- parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, false);
- parser.getParserConfig().addNonFatalError(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES);
- parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, false);
- parser.getParserConfig().addNonFatalError(BasicParserSettings.VERIFY_DATATYPE_VALUES);
- parser.getParserConfig().set(BasicParserSettings.NORMALIZE_DATATYPE_VALUES, false);
- parser.getParserConfig().addNonFatalError(BasicParserSettings.NORMALIZE_DATATYPE_VALUES);
-
- }
-
- @After
- public void tearDown() {
- parser = null;
- }
-
- /**
- * Tests the correct behavior with incomplete input.
- *
- * @throws RDFHandlerException
- * @throws IOException
- * @throws RDFParseException
- */
- @Test(expected = RDFParseException.class)
- public void testIncompleteParsing() throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- "<http://s> <http://p> <http://o> <http://g>".getBytes()
- );
- parser.parse(bais, "http://base-uri");
- }
-
- /**
- * Tests parsing of empty lines and comments.
- *
- * @throws java.io.IOException
- */
- @Test
- public void testParseEmptyLinesAndComments() throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- " \n\n\n# This is a comment\n\n#this is another comment."
- .getBytes()
- );
- parser.parse(bais, "http://test.base.uri");
- Assert.assertEquals(rdfHandler.getStatements().size(), 0);
- }
-
- /**
- * Tests basic N-Quads parsing.
- *
- * @throws RDFHandlerException
- * @throws IOException
- * @throws RDFParseException
- */
- @Test
- public void testParseBasic() throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- "<http://www.v/dat/4b> <http://www.w3.org/20/ica#dtend> <http://sin/value/2> <http://sin.siteserv.org/def/>."
- .getBytes()
- );
- parser.parse(bais, "http://test.base.uri");
- Assert.assertThat(rdfHandler.getStatements().size(), is(1));
- final Statement statement = rdfHandler.getStatements().get(0);
- Assert.assertEquals("http://www.v/dat/4b", statement.getSubject().stringValue());
- Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue());
- Assert.assertTrue(statement.getObject() instanceof URI);
- Assert.assertEquals("http://sin/value/2", statement.getObject().stringValue());
- Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue());
- }
-
- /**
- * Tests basic N-Quads parsing with blank node.
- *
- * @throws RDFHandlerException
- * @throws IOException
- * @throws RDFParseException
- */
- @Test
- public void testParseBasicBNode() throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- "_:a123456768 <http://www.w3.org/20/ica#dtend> <http://sin/value/2> <http://sin.siteserv.org/def/>."
- .getBytes()
- );
- parser.parse(bais, "http://test.base.uri");
- Assert.assertThat(rdfHandler.getStatements().size(), is(1));
- final Statement statement = rdfHandler.getStatements().get(0);
- Assert.assertTrue(statement.getSubject() instanceof BNode);
- Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue());
- Assert.assertTrue(statement.getObject() instanceof URI);
- Assert.assertEquals("http://sin/value/2", statement.getObject().stringValue());
- Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue());
- }
-
- /**
- * Tests basic N-Quads parsing with literal.
- *
- * @throws RDFHandlerException
- * @throws IOException
- * @throws RDFParseException
- */
- @Test
- public void testParseBasicLiteral() throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- "_:a123456768 <http://www.w3.org/20/ica#dtend> \"2010-05-02\" <http://sin.siteserv.org/def/>."
- .getBytes()
- );
- parser.parse(bais, "http://test.base.uri");
- Assert.assertThat(rdfHandler.getStatements().size(), is(1));
- final Statement statement = rdfHandler.getStatements().get(0);
- Assert.assertTrue(statement.getSubject() instanceof BNode);
- Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue());
- Assert.assertTrue(statement.getObject() instanceof Literal);
- Assert.assertEquals("2010-05-02", statement.getObject().stringValue());
- Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue());
- }
-
- /**
- * Tests N-Quads parsing with literal and language.
- *
- * @throws RDFHandlerException
- * @throws IOException
- * @throws RDFParseException
- */
- @Test
- public void testParseBasicLiteralLang() throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- "<http://www.v/dat/4b2-21> <http://www.w3.org/20/ica#dtend> \"2010-05-02\"@en <http://sin.siteserv.org/def/>."
- .getBytes()
- );
- parser.parse(bais, "http://test.base.uri");
- final Statement statement = rdfHandler.getStatements().get(0);
- Assert.assertEquals("http://www.v/dat/4b2-21", statement.getSubject().stringValue());
- Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue());
- Assert.assertTrue(statement.getObject() instanceof Literal);
- Literal object = (Literal) statement.getObject();
- Assert.assertEquals("2010-05-02", object.stringValue());
- Assert.assertEquals("en", object.getLanguage());
- Assert.assertNull("en", object.getDatatype());
- Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue());
- }
-
- /**
- * Tests N-Quads parsing with literal and datatype.
- *
- * @throws RDFHandlerException
- * @throws IOException
- * @throws RDFParseException
- */
- @Test
- public void testParseBasicLiteraDatatype() throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- ("<http://www.v/dat/4b2-21> " +
- "<http://www.w3.org/20/ica#dtend> " +
- "\"2010\"^^<http://www.w3.org/2001/XMLSchema#integer> " +
- "<http://sin.siteserv.org/def/>."
- ).getBytes()
- );
- parser.parse(bais, "http://test.base.uri");
- final Statement statement = rdfHandler.getStatements().get(0);
- Assert.assertEquals("http://www.v/dat/4b2-21", statement.getSubject().stringValue());
- Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue());
- Assert.assertTrue(statement.getObject() instanceof Literal);
- Literal object = (Literal) statement.getObject();
- Assert.assertEquals("2010", object.stringValue());
- Assert.assertNull(object.getLanguage());
- Assert.assertEquals("http://www.w3.org/2001/XMLSchema#integer", object.getDatatype().toString());
- Assert.assertEquals("http://sin.siteserv.org/def/", statement.getContext().stringValue());
- }
-
- /**
- * Tests the correct support for literal escaping.
- *
- * @throws RDFHandlerException
- * @throws IOException
- * @throws RDFParseException
- */
- @Test
- public void testLiteralEscapeManagement1()
- throws RDFHandlerException, IOException, RDFParseException {
- TestParseLocationListener parseLocationListener = new TestParseLocationListener();
- parser.setParseLocationListener(parseLocationListener);
-
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- "<http://a> <http://b> \"\\\\\" <http://c> .".getBytes()
- );
- parser.parse(bais, "http://base-uri");
-
- rdfHandler.assertHandler(1);
- //parseLocationListener.assertListener(1, 40);
- parseLocationListener.assertListener(1, 1);
- }
-
- /**
- * Tests the correct support for literal escaping.
- *
- * @throws RDFHandlerException
- * @throws IOException
- * @throws RDFParseException
- */
- @Test
- public void testLiteralEscapeManagement2()
- throws RDFHandlerException, IOException, RDFParseException {
- TestParseLocationListener parseLocationListener = new TestParseLocationListener();
- parser.setParseLocationListener(parseLocationListener);
-
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- "<http://a> <http://b> \"Line text 1\\nLine text 2\" <http://c> .".getBytes()
- );
- parser.parse(bais, "http://base-uri");
-
- rdfHandler.assertHandler(1);
- final Value object = rdfHandler.getStatements().get(0).getObject();
- Assert.assertTrue( object instanceof Literal);
- final String literalContent = ((Literal) object).getLabel();
- Assert.assertEquals("Line text 1\nLine text 2", literalContent);
- }
-
- /**
- * Tests the correct decoding of UTF-8 encoded chars in URIs.
- *
- * @throws RDFHandlerException
- * @throws IOException
- * @throws RDFParseException
- */
- @Test
- public void testURIDecodingManagement() throws RDFHandlerException, IOException, RDFParseException {
- TestParseLocationListener parseLocationListener = new TestParseLocationListener();
- parser.setParseLocationListener(parseLocationListener);
-
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- "<http://s/\\u306F\\u3080> <http://p/\\u306F\\u3080> <http://o/\\u306F\\u3080> <http://g/\\u306F\\u3080> ."
- .getBytes()
- );
- parser.parse(bais, "http://base-uri");
-
- rdfHandler.assertHandler(1);
- final Statement statement = rdfHandler.getStatements().get(0);
-
- final Resource subject = statement.getSubject();
- Assert.assertTrue( subject instanceof URI);
- final String subjectURI = subject.toString();
- Assert.assertEquals("http://s/\u306f\u3080", subjectURI);
-
- final Resource predicate = statement.getPredicate();
- Assert.assertTrue( predicate instanceof URI);
- final String predicateURI = predicate.toString();
- Assert.assertEquals("http://p/\u306f\u3080", predicateURI);
-
- final Value object = statement.getObject();
- Assert.assertTrue( object instanceof URI);
- final String objectURI = object.toString();
- Assert.assertEquals("http://o/\u306f\u3080", objectURI);
-
- final Resource graph = statement.getContext();
- Assert.assertTrue( graph instanceof URI);
- final String graphURI = graph.toString();
- Assert.assertEquals("http://g/\u306f\u3080", graphURI);
- }
-
- @Test
- public void testUnicodeLiteralManagement() throws RDFHandlerException, IOException, RDFParseException {
- final String INPUT_LITERAL = "[\u306f\u3001\u30a4\u30ae\u30ea\u30b9\u304a\u3088\u3073\u30a4\u30f3\u30b0\u30e9\u30f3\u30c9\u306e\u9996\u90fd\u3067\u3042\u308b] [\u662f\u5927\u4e0d\u5217\u985b\u53ca\u5317\u611b\u723e\u862d\u806f\u5408\u738b\u570b\u548c\u82f1\u683c\u862d\u7684\u9996\u90fd]";
- final String INPUT_STRING = String.format(
- "<http://a> <http://b> \"%s\" <http://c> .",
- INPUT_LITERAL
- );
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- INPUT_STRING.getBytes()
- );
- parser.parse(bais, "http://base-uri");
-
- rdfHandler.assertHandler(1);
- final Literal obj = (Literal) rdfHandler.getStatements().get(0).getObject();
- Assert.assertEquals(INPUT_LITERAL, obj.getLabel());
- }
-
- @Test
- public void testUnicodeLiteralDecoding() throws RDFHandlerException, IOException, RDFParseException {
- final String INPUT_LITERAL_PLAIN = "[\u306f]";
- final String INPUT_LITERAL_ENCODED = "[\\u306F]";
- final String INPUT_STRING = String.format(
- "<http://a> <http://b> \"%s\" <http://c> .",
- INPUT_LITERAL_ENCODED
- );
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- INPUT_STRING.getBytes()
- );
- parser.parse(bais, "http://base-uri");
-
- rdfHandler.assertHandler(1);
- final Literal obj = (Literal) rdfHandler.getStatements().get(0).getObject();
- Assert.assertEquals(INPUT_LITERAL_PLAIN, obj.getLabel());
- }
-
- @Test(expected = RDFParseException.class)
- public void testWrongUnicodeEncodedCharFail() throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- "<http://s> <http://p> \"\\u123X\" <http://g> .".getBytes()
- );
- parser.parse(bais, "http://base-uri");
- }
-
- /**
- * Tests the correct support for EOS exception.
- *
- * @throws RDFHandlerException
- * @throws IOException
- * @throws RDFParseException
- */
- @Test(expected = RDFParseException.class)
- public void testEndOfStreamReached()
- throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- "<http://a> <http://b> \"\\\" <http://c> .".getBytes()
- );
- parser.parse(bais, "http://base-uri");
- }
-
- /**
- * Tests the parser with all cases defined by the NQuads grammar.
- *
- * @throws IOException
- * @throws RDFParseException
- * @throws RDFHandlerException
- */
- @Test
- public void testFullParseScenario()
- throws IOException, RDFParseException, RDFHandlerException {
- TestParseLocationListener parseLocationListerner = new TestParseLocationListener();
- FullParseScenarioRDFHandler rdfHandler = new FullParseScenarioRDFHandler();
- parser.setParseLocationListener(parseLocationListerner);
- parser.setRDFHandler(rdfHandler);
-
- BufferedReader br = new BufferedReader(
- new InputStreamReader(
- this.getClass().getClassLoader().getResourceAsStream("application/nquads/test1.nq")
- )
- );
- parser.parse(
- br,
- "http://test.base.uri"
- );
-
- rdfHandler.assertHandler(6);
- parseLocationListerner.assertListener(8, 71);
- }
-
- /**
- * Tests parser with real data.
- *
- * @throws IOException
- * @throws RDFParseException
- * @throws RDFHandlerException
- */
- @Test
- public void testParseRealData()
- throws IOException, RDFParseException, RDFHandlerException {
- TestParseLocationListener parseLocationListener = new TestParseLocationListener();
- parser.setParseLocationListener(parseLocationListener);
-
- parser.parse(
- this.getClass().getClassLoader().getResourceAsStream("application/nquads/test2.nq"),
- "http://test.base.uri"
- );
-
- rdfHandler.assertHandler(400);
- parseLocationListener.assertListener(400, 349);
- }
-
- @Test
- public void testStatementWithInvalidLiteralContentAndIgnoreValidation()
- throws RDFHandlerException, IOException, RDFParseException {
- verifyStatementWithInvalidLiteralContent(RDFParser.DatatypeHandling.IGNORE);
- }
-
- @Test(expected = RDFParseException.class)
- public void testStatementWithInvalidLiteralContentAndStrictValidation()
- throws RDFHandlerException, IOException, RDFParseException {
- verifyStatementWithInvalidLiteralContent(RDFParser.DatatypeHandling.VERIFY);
- }
-
- @Test
- public void testStatementWithInvalidDatatypeAndIgnoreValidation()
- throws RDFHandlerException, IOException, RDFParseException {
- verifyStatementWithInvalidDatatype(RDFParser.DatatypeHandling.IGNORE);
- }
-
- @Test(expected = RDFParseException.class)
- public void testStatementWithInvalidDatatypeAndVerifyValidation()
- throws RDFHandlerException, IOException, RDFParseException {
- verifyStatementWithInvalidDatatype(RDFParser.DatatypeHandling.VERIFY);
- }
-
- @Test (expected = RDFParseException.class)
- public void testStopAtFirstErrorStrictParsing() throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- (
- "<http://s0> <http://p0> <http://o0> <http://g0> .\n" +
- "<http://sX> .\n" + // Line with error.
- "<http://s1> <http://p1> <http://o1> <http://g1> .\n"
- ).getBytes()
- );
- parser.setStopAtFirstError(true);
- parser.parse(bais, "http://base-uri");
- }
-
- @Test
- public void testStopAtFirstErrorTolerantParsing() throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- (
- "<http://s0> <http://p0> <http://o0> <http://g0> .\n" +
- "<http://sX> .\n" + // Line with error.
- "<http://s1> <http://p1> <http://o1> <http://g1> .\n"
- ).getBytes()
- );
- //parser.setStopAtFirstError(false);
- parser.getParserConfig().addNonFatalError(NTriplesParserSettings.FAIL_ON_NTRIPLES_INVALID_LINES);
- parser.parse(bais, "http://base-uri");
- rdfHandler.assertHandler(2);
- final List<Statement> statements = rdfHandler.getStatements();
- final int size = statements.size();
- for(int i = 0; i < size; i++) {
- Assert.assertEquals("http://s" + i, statements.get(i).getSubject().stringValue() );
- Assert.assertEquals("http://p" + i, statements.get(i).getPredicate().stringValue());
- Assert.assertEquals("http://o" + i, statements.get(i).getObject().stringValue() );
- Assert.assertEquals("http://g" + i, statements.get(i).getContext().stringValue() );
- }
- }
-
- @Test
- public void testReportInvalidLiteralAttribute() throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- "<http://a> <http://b> \"literal\"^^xsd:datetime <http://c> .".getBytes()
- );
- try {
- parser.parse(bais, "http://base-uri");
- Assert.fail("Expected failure here.");
- } catch (RDFParseException e) {
- Assert.assertTrue(e.getMessage().contains("Expected '<'"));
- Assert.assertEquals(1 , e.getLineNumber());
- //Assert.assertEquals(35, e.getColumnNumber());
- }
- }
-
- @Test
- public void testParseWithNoContext() throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- ("<http://www.v/dat/4b2-21>" +
- "<http://www.w3.org/20/ica#dtend>" +
- "\"2010\"^^<http://www.w3.org/2001/XMLSchema#integer> ."
- ).getBytes()
- );
- parser.parse(bais, "http://test.base.uri");
- final Statement statement = rdfHandler.getStatements().get(0);
- Assert.assertEquals("http://www.v/dat/4b2-21", statement.getSubject().stringValue());
- Assert.assertEquals("http://www.w3.org/20/ica#dtend", statement.getPredicate().stringValue());
- Assert.assertTrue(statement.getObject() instanceof Literal);
- Literal object = (Literal) statement.getObject();
- Assert.assertEquals("2010", object.stringValue());
- Assert.assertNull(object.getLanguage());
- Assert.assertEquals("http://www.w3.org/2001/XMLSchema#integer", object.getDatatype().toString());
- Assert.assertNull(statement.getContext());
- }
-
- private void verifyStatementWithInvalidLiteralContent(RDFParser.DatatypeHandling datatypeHandling)
- throws RDFHandlerException, IOException, RDFParseException {
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- (
- "<http://dbpedia.org/resource/Camillo_Benso,_conte_di_Cavour> " +
- "<http://dbpedia.org/property/mandatofine> " +
- "\"1380.0\"^^<http://www.w3.org/2001/XMLSchema#int> " + // Float declared as int.
- "<http://it.wikipedia.org/wiki/Camillo_Benso,_conte_di_Cavour#absolute-line=20> ."
- ).getBytes()
- );
- if(datatypeHandling == RDFParser.DatatypeHandling.VERIFY) {
- parser.getParserConfig().setNonFatalErrors(new HashSet<RioSetting<?>>());
- parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true);
- parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
- }
- //parser.setDatatypeHandling(datatypeHandling);
- parser.parse(bais, "http://base-uri");
- }
-
- private void verifyStatementWithInvalidDatatype(RDFParser.DatatypeHandling datatypeHandling)
- throws RDFHandlerException, IOException, RDFParseException {
- parser.setDatatypeHandling(datatypeHandling);
- final ByteArrayInputStream bais = new ByteArrayInputStream(
- (
- "<http://dbpedia.org/resource/Camillo_Benso,_conte_di_Cavour> " +
- "<http://dbpedia.org/property/mandatofine> " +
- "\"1380.0\"^^<http://dbpedia.org/invalid/datatype/second> " +
- "<http://it.wikipedia.org/wiki/Camillo_Benso,_conte_di_Cavour#absolute-line=20> ."
- ).getBytes()
- );
- if(datatypeHandling == RDFParser.DatatypeHandling.VERIFY) {
- parser.getParserConfig().setNonFatalErrors(new HashSet<RioSetting<?>>());
- parser.getParserConfig().set(BasicParserSettings.FAIL_ON_UNKNOWN_DATATYPES, true);
- parser.getParserConfig().set(BasicParserSettings.VERIFY_DATATYPE_VALUES, true);
- }
- parser.parse(bais, "http://base-uri");
- rdfHandler.assertHandler(1);
- }
-
- private class TestParseLocationListener implements ParseLocationListener {
-
- private int lastRow, lastCol;
-
- public void parseLocationUpdate(int r, int c) {
- lastRow = r;
- lastCol = c;
- }
-
- private void assertListener(int row, int col) {
- Assert.assertEquals("Unexpected last row", row , lastRow);
- // Column numbers are not supported by the Rio NQuadsParser currently
- //Assert.assertEquals("Unexpected last col", col , lastCol);
- }
-
- }
-
- private class TestRDFHandler implements RDFHandler {
-
- private boolean started = false;
- private boolean ended = false;
-
- private final List<Statement> statements = new ArrayList<Statement>();
-
- protected List<Statement> getStatements() {
- return statements;
- }
-
- public void startRDF() throws RDFHandlerException {
- started = true;
- }
-
- public void endRDF() throws RDFHandlerException {
- ended = true;
- }
-
- public void handleNamespace(String s, String s1) throws RDFHandlerException {
- }
-
- public void handleStatement(Statement statement) throws RDFHandlerException {
- logger.debug(statement.toString());
- statements.add(statement);
- }
-
- public void handleComment(String s) throws RDFHandlerException {
- }
-
- public void assertHandler(int expected) {
- Assert.assertTrue("Never stated.", started);
- Assert.assertTrue("Never ended." , ended );
- Assert.assertEquals("Unexpected number of statements.", expected, statements.size());
- }
- }
-
- private class FullParseScenarioRDFHandler extends TestRDFHandler {
-
- public void handleStatement(Statement statement) throws RDFHandlerException {
- int statementIndex = getStatements().size();
- if(statementIndex == 0){
- Assert.assertEquals(new URIImpl("http://example.org/alice/foaf.rdf#me"), statement.getSubject() );
- } else {
- Assert.assertTrue(statement.getSubject() instanceof BNode);
- }
-
- if( statementIndex == 4) {
- Assert.assertEquals(new URIImpl("http://example.org/#like"), statement.getPredicate() );
- }
-
- if(statementIndex == 5) {
- Assert.assertNull(statement.getContext());
- } else {
- Assert.assertEquals(
- new URIImpl(String.format("http://example.org/alice/foaf%s.rdf", statementIndex + 1)),
- statement.getContext()
- );
- }
-
- super.handleStatement(statement);
- }
- }
-
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsWriterTest.java
----------------------------------------------------------------------
diff --git a/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsWriterTest.java b/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsWriterTest.java
deleted file mode 100644
index 1fe92c6..0000000
--- a/nquads/src/test/java/org/apache/any23/io/nquads/NQuadsWriterTest.java
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.io.nquads;
-
-import org.junit.After;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-import org.openrdf.model.BNode;
-import org.openrdf.model.Literal;
-import org.openrdf.model.Resource;
-import org.openrdf.model.Statement;
-import org.openrdf.model.URI;
-import org.openrdf.model.Value;
-import org.openrdf.model.ValueFactory;
-import org.openrdf.model.impl.ValueFactoryImpl;
-import org.openrdf.rio.RDFHandlerException;
-import org.openrdf.rio.RDFParseException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-
-/**
- * Test case for {@link NQuadsWriter}.
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-public class NQuadsWriterTest {
-
- private static final Logger logger = LoggerFactory.getLogger(NQuadsWriterTest.class);
-
- private final ByteArrayOutputStream baos = new ByteArrayOutputStream();
-
- private NQuadsWriter writer;
-
- private ValueFactory vf;
-
- @Before
- public void setUp() {
- vf = ValueFactoryImpl.getInstance();
- writer = new NQuadsWriter(baos);
- }
-
- @After
- public void tearDown() {
- logger.debug( "\n" + baos.toString() );
- baos.reset();
- writer = null;
- }
-
- @Test
- public void testWrite() throws RDFHandlerException {
- Statement s1 = quad(
- uri("http://sub"),
- uri("http://pre"),
- uri("http://obj"),
- uri("http://gra1")
- );
- Statement s2 = quad(
- bnode("1"),
- uri("http://pre"),
- bnode("2"),
- uri("http://gra2")
- );
- Statement s3 = quad(
- bnode("3"),
- uri("http://pre"),
- literal("Sample text 1"),
- uri("http://gra2")
- );
- Statement s4 = quad(
- bnode("4"),
- uri("http://pre"),
- literal("Sample text 2", "en"),
- uri("http://gra2")
- );
- Statement s5 = quad(
- bnode("5"),
- uri("http://pre"),
- literal("12345", uri("http://www.w3.org/2001/XMLSchema#integer")),
- uri("http://gra2")
- );
-// Statement s6 = quad(
-// uri("p1:sub"),
-// uri("p1:pre"),
-// uri("p1:obj"),
-// uri("p1:gra2")
-// );
- Statement s7 = quad(
- uri("http://sub"),
- uri("http://pre"),
- literal("This is line 1.\nThis is line 2.\n"),
- uri("http://gra3")
- );
-
- // Sending events.
- writer.startRDF();
- //writer.handleNamespace("p1", "http://test.com/");
- writer.handleStatement(s1);
- writer.handleStatement(s2);
- writer.handleStatement(s3);
- writer.handleStatement(s4);
- writer.handleStatement(s5);
- //writer.handleStatement(s6);
- writer.handleStatement(s7);
- writer.endRDF();
-
- // Checking content.
- String content = baos.toString();
- logger.info("output={}", content);
- String[] lines = content.split("\n");
- Assert.assertEquals("Unexpected number of lines.", 6, lines.length);
- Assert.assertTrue( lines[0].matches("<.*> <.*> <.*> <.*> \\.") );
- Assert.assertTrue( lines[1].matches("_:.* <.*> _:.* <.*> \\.") );
- Assert.assertTrue( lines[2].matches("_:.* <.*> \".*\" <.*> \\.") );
- Assert.assertTrue( lines[3].matches("_:.* <.*> \".*\"@en <.*> \\.") );
- Assert.assertTrue( lines[4].matches("_:.* <.*> \".*\"\\^\\^<.*> <.*> \\.") );
- //Assert.assertTrue( lines[5].matches("<http://.*> <http://.*> <http://.*> <http://.*> \\.") );
- Assert.assertEquals(
- "<http://sub> <http://pre> \"This is line 1.\\nThis is line 2.\\n\" <http://gra3> .",
- lines[5]
- );
- }
-
- @Test
- public void testReadWrite() throws RDFHandlerException, IOException, RDFParseException {
- NQuadsParser parser = new NQuadsParser();
- parser.setRDFHandler(writer);
- parser.parse(
- this.getClass().getClassLoader().getResourceAsStream("application/nquads/test2.nq"),
- "http://test.base.uri"
- );
-
- Assert.assertEquals("Unexpected number of lines.", 400, baos.toString().split("\n").length);
- }
-
- private Statement quad(Resource subject, URI predicate, Value object, Resource context) {
- return this.vf.createStatement(subject, predicate, object, context);
- }
-
- private URI uri(String uri) {
- return this.vf.createURI(uri);
- }
-
- private BNode bnode(String testID) {
- return this.vf.createBNode(testID);
- }
-
- private Literal literal(String literalValue) {
- return this.vf.createLiteral(literalValue);
- }
-
- private Literal literal(String literalValue, URI datatype) {
- return this.vf.createLiteral(literalValue, datatype);
- }
-
- private Literal literal(String literalValue, String language) {
- return this.vf.createLiteral(literalValue, language);
- }
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/nquads/src/test/resources/log4j.properties
----------------------------------------------------------------------
diff --git a/nquads/src/test/resources/log4j.properties b/nquads/src/test/resources/log4j.properties
deleted file mode 100644
index 84062ba..0000000
--- a/nquads/src/test/resources/log4j.properties
+++ /dev/null
@@ -1,35 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-log4j.rootCategory=INFO, O
-
-# Stdout
-log4j.appender.O=org.apache.log4j.ConsoleAppender
-
-# File
-#log4j.appender.R=org.apache.log4j.RollingFileAppender
-#log4j.appender.R.File=log4j.log
-
-# Control the maximum log file size
-#log4j.appender.R.MaxFileSize=100KB
-
-# Archive log files (one backup file here)
-log4j.appender.R.MaxBackupIndex=1
-
-log4j.appender.R.layout=org.apache.log4j.PatternLayout
-log4j.appender.O.layout=org.apache.log4j.PatternLayout
-
-log4j.appender.R.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n
-log4j.appender.O.layout.ConversionPattern=[%d{ISO8601}]%5p%6.6r[%t]%x - %C.%M(%F:%L) - %m%n
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/basic-crawler/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/basic-crawler/pom.xml b/plugins/basic-crawler/pom.xml
index 6d63627..bffd7e2 100644
--- a/plugins/basic-crawler/pom.xml
+++ b/plugins/basic-crawler/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23</artifactId>
- <version>1.2-SNAPSHOT</version>
+ <version>2.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
@@ -35,8 +35,8 @@
<dependencies>
<!-- Sesame. -->
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-model</artifactId>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-model</artifactId>
<scope>provided</scope>
</dependency>
@@ -44,14 +44,14 @@
<dependency>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23-core</artifactId>
- <version>1.2-SNAPSHOT</version>
+ <version>2.0-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23-core</artifactId>
- <version>1.2-SNAPSHOT</version>
+ <version>2.0-SNAPSHOT</version>
<type>test-jar</type>
<scope>test</scope>
</dependency>
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java
----------------------------------------------------------------------
diff --git a/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java b/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java
index 1f84069..66b167b 100644
--- a/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java
+++ b/plugins/basic-crawler/src/main/java/org/apache/any23/cli/Crawler.java
@@ -78,10 +78,10 @@ public class Crawler extends Rover {
public void run() throws Exception {
super.configure();
- if (inputURIs.size() != 1) {
+ if (inputIRIs.size() != 1) {
throw new IllegalArgumentException("Expected just one seed.");
}
- final URL seed = new URL(inputURIs.get( 0 ));
+ final URL seed = new URL(inputIRIs.get( 0 ));
if ( storageFolder.isFile() ) {
throw new IllegalStateException( format( "Storage folder %s can not be a file, must be a directory",
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java
----------------------------------------------------------------------
diff --git a/plugins/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java b/plugins/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java
index 2439336..eac75f3 100644
--- a/plugins/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java
+++ b/plugins/basic-crawler/src/test/java/org/apache/any23/cli/CrawlerTest.java
@@ -22,10 +22,10 @@ import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.util.FileUtils;
import org.junit.Ignore;
import org.junit.Test;
-import org.openrdf.model.Statement;
-import org.openrdf.rio.RDFFormat;
-import org.openrdf.rio.RDFHandlerException;
-import org.openrdf.rio.RDFParseException;
+import org.eclipse.rdf4j.model.Statement;
+import org.eclipse.rdf4j.rio.RDFFormat;
+import org.eclipse.rdf4j.rio.RDFHandlerException;
+import org.eclipse.rdf4j.rio.RDFParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/html-scraper/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/html-scraper/pom.xml b/plugins/html-scraper/pom.xml
index 04340cf..e04651a 100644
--- a/plugins/html-scraper/pom.xml
+++ b/plugins/html-scraper/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23</artifactId>
- <version>1.2-SNAPSHOT</version>
+ <version>2.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
@@ -35,8 +35,8 @@
<dependencies>
<!-- Sesame. -->
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-model</artifactId>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-model</artifactId>
<scope>provided</scope>
</dependency>
@@ -44,7 +44,7 @@
<dependency>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23-core</artifactId>
- <version>1.2-SNAPSHOT</version>
+ <version>2.0-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
----------------------------------------------------------------------
diff --git a/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java b/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
index 0605f62..ab7d34a 100644
--- a/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
+++ b/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
@@ -30,8 +30,8 @@ import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.vocab.SINDICE;
-import org.openrdf.model.URI;
-import org.openrdf.model.impl.ValueFactoryImpl;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import java.io.IOException;
import java.io.InputStream;
@@ -46,14 +46,14 @@ import java.util.List;
*/
public class HTMLScraperExtractor implements Extractor.ContentExtractor {
- public final static URI PAGE_CONTENT_DE_PROPERTY =
- ValueFactoryImpl.getInstance().createURI(SINDICE.NS + "pagecontent/de");
- public final static URI PAGE_CONTENT_AE_PROPERTY =
- ValueFactoryImpl.getInstance().createURI(SINDICE.NS + "pagecontent/ae");
- public final static URI PAGE_CONTENT_LCE_PROPERTY =
- ValueFactoryImpl.getInstance().createURI(SINDICE.NS + "pagecontent/lce");
- public final static URI PAGE_CONTENT_CE_PROPERTY =
- ValueFactoryImpl.getInstance().createURI(SINDICE.NS + "pagecontent/ce");
+ public final static IRI PAGE_CONTENT_DE_PROPERTY =
+ SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/de");
+ public final static IRI PAGE_CONTENT_AE_PROPERTY =
+ SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/ae");
+ public final static IRI PAGE_CONTENT_LCE_PROPERTY =
+ SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/lce");
+ public final static IRI PAGE_CONTENT_CE_PROPERTY =
+ SimpleValueFactory.getInstance().createIRI(SINDICE.NS + "pagecontent/ce");
private final List<ExtractionRule> extractionRules = new ArrayList<ExtractionRule>();
@@ -61,7 +61,7 @@ public class HTMLScraperExtractor implements Extractor.ContentExtractor {
loadDefaultRules();
}
- public void addTextExtractor(String name, URI property, BoilerpipeExtractor extractor) {
+ public void addTextExtractor(String name, IRI property, BoilerpipeExtractor extractor) {
extractionRules.add( new ExtractionRule(name, property, extractor) );
}
@@ -81,13 +81,13 @@ public class HTMLScraperExtractor implements Extractor.ContentExtractor {
ExtractionResult extractionResult
) throws IOException, ExtractionException {
try {
- final URI documentURI = extractionContext.getDocumentURI();
+ final IRI documentIRI = extractionContext.getDocumentIRI();
for (ExtractionRule extractionRule : extractionRules) {
final String content = extractionRule.boilerpipeExtractor.getText(new InputStreamReader(inputStream));
extractionResult.writeTriple(
- documentURI,
+ documentIRI,
extractionRule.property,
- ValueFactoryImpl.getInstance().createLiteral(content)
+ SimpleValueFactory.getInstance().createLiteral(content)
);
}
} catch (BoilerpipeProcessingException bpe) {
@@ -118,10 +118,10 @@ public class HTMLScraperExtractor implements Extractor.ContentExtractor {
class ExtractionRule {
public final String name;
- public final URI property;
+ public final IRI property;
public final BoilerpipeExtractor boilerpipeExtractor;
- ExtractionRule(String name, URI property, BoilerpipeExtractor boilerpipeExtractor) {
+ ExtractionRule(String name, IRI property, BoilerpipeExtractor boilerpipeExtractor) {
if(name == null) {
throw new NullPointerException("name cannot be null.");
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java
----------------------------------------------------------------------
diff --git a/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java b/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java
index 1420362..7465ade 100644
--- a/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java
+++ b/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java
@@ -26,9 +26,9 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Matchers;
-import org.openrdf.model.URI;
-import org.openrdf.model.Value;
-import org.openrdf.model.impl.ValueFactoryImpl;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Value;
+import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import java.io.IOException;
import java.io.InputStream;
@@ -68,24 +68,24 @@ public class HTMLScraperExtractorTest {
public void testRun() throws IOException, ExtractionException {
final InputStream is = this.getClass().getResourceAsStream("html-scraper-extractor-test.html");
final ExtractionResult extractionResult = mock(ExtractionResult.class);
- final URI pageURI = ValueFactoryImpl.getInstance().createURI("http://fake/test/page/testrun");
+ final IRI pageIRI = SimpleValueFactory.getInstance().createIRI("http://fake/test/page/testrun");
final ExtractionContext extractionContext = new ExtractionContext(
extractor.getDescription().getExtractorName(),
- pageURI
+ pageIRI
);
extractor.run(ExtractionParameters.newDefault(), extractionContext, is, extractionResult);
verify(extractionResult).writeTriple(
- eq(pageURI), eq(HTMLScraperExtractor.PAGE_CONTENT_DE_PROPERTY) , (Value) Matchers.anyObject())
+ eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_DE_PROPERTY) , (Value) Matchers.anyObject())
;
verify(extractionResult).writeTriple(
- eq(pageURI), eq(HTMLScraperExtractor.PAGE_CONTENT_AE_PROPERTY) , (Value) Matchers.anyObject())
+ eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_AE_PROPERTY) , (Value) Matchers.anyObject())
;
verify(extractionResult).writeTriple(
- eq(pageURI), eq(HTMLScraperExtractor.PAGE_CONTENT_LCE_PROPERTY) , (Value) Matchers.anyObject())
+ eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_LCE_PROPERTY) , (Value) Matchers.anyObject())
;
verify(extractionResult).writeTriple(
- eq(pageURI), eq(HTMLScraperExtractor.PAGE_CONTENT_CE_PROPERTY) , (Value) Matchers.anyObject())
+ eq(pageIRI), eq(HTMLScraperExtractor.PAGE_CONTENT_CE_PROPERTY) , (Value) Matchers.anyObject())
;
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/html-scraper/src/test/resources/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html
----------------------------------------------------------------------
diff --git a/plugins/html-scraper/src/test/resources/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html b/plugins/html-scraper/src/test/resources/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html
index 1e416ac..6ec92fb 100644
--- a/plugins/html-scraper/src/test/resources/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html
+++ b/plugins/html-scraper/src/test/resources/org/apache/any23/plugin/htmlscraper/html-scraper-extractor-test.html
@@ -138,15 +138,15 @@
</div>
<div class="search-types" id="search-type-term">
<form action="/search" method="get">
- <input onfocus="if (this.value=='Type one or more keywords or URI') {this.value=''} else {this.select()}; return true;"
- type="text" name="q" size="45" value="Type one or more keywords or URI"/>
+ <input onfocus="if (this.value=='Type one or more keywords or IRI') {this.value=''} else {this.select()}; return true;"
+ type="text" name="q" size="45" value="Type one or more keywords or IRI"/>
<button type="submit" class="inspectButton">
SEARCH
</button>
</form>
<span class="tip">Examples:
<a href="/search?q=tim%20berners%20lee">tim berners lee</a>
- (by <a href="/search?q=http%3A%2F%2Fwww.w3.org%2FPeople%2FBerners-Lee%2Fcard">URI</a>),
+ (by <a href="/search?q=http%3A%2F%2Fwww.w3.org%2FPeople%2FBerners-Lee%2Fcard">IRI</a>),
<a href="/search?q=michele">michele</a>, <a href="/search?q=deri">deri</a></span>
<div style="height:1em;">
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/integration-test/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/integration-test/pom.xml b/plugins/integration-test/pom.xml
index d80f5fc..c1d0723 100644
--- a/plugins/integration-test/pom.xml
+++ b/plugins/integration-test/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23</artifactId>
- <version>1.2-SNAPSHOT</version>
+ <version>2.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/office-scraper/pom.xml
----------------------------------------------------------------------
diff --git a/plugins/office-scraper/pom.xml b/plugins/office-scraper/pom.xml
index 80cb238..db5e3ef 100644
--- a/plugins/office-scraper/pom.xml
+++ b/plugins/office-scraper/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23</artifactId>
- <version>1.2-SNAPSHOT</version>
+ <version>2.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
@@ -35,8 +35,8 @@
<dependencies>
<!-- Sesame. -->
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-model</artifactId>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-model</artifactId>
<scope>provided</scope>
</dependency>
@@ -44,7 +44,7 @@
<dependency>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23-core</artifactId>
- <version>1.2-SNAPSHOT</version>
+ <version>2.0-SNAPSHOT</version>
<scope>provided</scope>
</dependency>
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java
----------------------------------------------------------------------
diff --git a/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java b/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java
index c4e5284..4c8826c 100644
--- a/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java
+++ b/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java
@@ -31,8 +31,8 @@ import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
-import org.openrdf.model.URI;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import java.io.IOException;
import java.io.InputStream;
@@ -76,90 +76,88 @@ public class ExcelExtractor implements Extractor.ContentExtractor {
ExtractionResult er
) throws IOException, ExtractionException {
try {
- final URI documentURI = context.getDocumentURI();
- final Workbook workbook = createWorkbook(documentURI, in);
- processWorkbook(documentURI, workbook, er);
+ final IRI documentIRI = context.getDocumentIRI();
+ final Workbook workbook = createWorkbook(documentIRI, in);
+ processWorkbook(documentIRI, workbook, er);
} catch (Exception e) {
throw new ExtractionException("An error occurred while extracting MS Excel content.", e);
}
}
// TODO: this should be done by Tika, the extractors should be split.
- private Workbook createWorkbook(URI document, InputStream is) throws IOException {
- final String documentURI = document.toString();
- if(documentURI.endsWith(".xlsx")) {
+ private Workbook createWorkbook(IRI document, InputStream is) throws IOException {
+ final String documentIRI = document.toString();
+ if(documentIRI.endsWith(".xlsx")) {
return new XSSFWorkbook(is);
- } else if(documentURI.endsWith("xls")) {
+ } else if(documentIRI.endsWith("xls")) {
return new HSSFWorkbook(is);
} else {
- throw new IllegalArgumentException("Unsupported extension for resource [" + documentURI + "]");
+ throw new IllegalArgumentException("Unsupported extension for resource [" + documentIRI + "]");
}
}
- private void processWorkbook(URI documentURI, Workbook wb, ExtractionResult er) {
+ private void processWorkbook(IRI documentIRI, Workbook wb, ExtractionResult er) {
for (int sheetIndex = 0; sheetIndex < wb.getNumberOfSheets(); sheetIndex++) {
final Sheet sheet = wb.getSheetAt(sheetIndex);
- final URI sheetURI = getSheetURI(documentURI, sheet);
- er.writeTriple(documentURI, excel.containsSheet, sheetURI);
- er.writeTriple(sheetURI, RDF.TYPE, excel.sheet);
- writeSheetMetadata(sheetURI, sheet, er);
+ final IRI sheetIRI = getSheetIRI(documentIRI, sheet);
+ er.writeTriple(documentIRI, excel.containsSheet, sheetIRI);
+ er.writeTriple(sheetIRI, RDF.TYPE, excel.sheet);
+ writeSheetMetadata(sheetIRI, sheet, er);
for (Row row : sheet) {
- final URI rowURI = getRowURI(sheetURI, row);
- er.writeTriple(sheetURI, excel.containsRow, rowURI);
- er.writeTriple(rowURI, RDF.TYPE, excel.row);
- writeRowMetadata(rowURI, row, er);
+ final IRI rowIRI = getRowIRI(sheetIRI, row);
+ er.writeTriple(sheetIRI, excel.containsRow, rowIRI);
+ er.writeTriple(rowIRI, RDF.TYPE, excel.row);
+ writeRowMetadata(rowIRI, row, er);
for (Cell cell : row) {
- writeCell(rowURI, cell, er);
+ writeCell(rowIRI, cell, er);
}
}
}
}
- private void writeSheetMetadata(URI sheetURI, Sheet sheet, ExtractionResult er) {
+ private void writeSheetMetadata(IRI sheetIRI, Sheet sheet, ExtractionResult er) {
final String sheetName = sheet.getSheetName();
final int firstRowNum = sheet.getFirstRowNum();
final int lastRowNum = sheet.getLastRowNum();
- er.writeTriple(sheetURI, excel.sheetName, RDFUtils.literal(sheetName));
- er.writeTriple(sheetURI, excel.firstRow, RDFUtils.literal(firstRowNum));
- er.writeTriple(sheetURI, excel.lastRow , RDFUtils.literal(lastRowNum ));
+ er.writeTriple(sheetIRI, excel.sheetName, RDFUtils.literal(sheetName));
+ er.writeTriple(sheetIRI, excel.firstRow, RDFUtils.literal(firstRowNum));
+ er.writeTriple(sheetIRI, excel.lastRow , RDFUtils.literal(lastRowNum ));
}
- private void writeRowMetadata(URI rowURI, Row row, ExtractionResult er) {
+ private void writeRowMetadata(IRI rowIRI, Row row, ExtractionResult er) {
final int firstCellNum = row.getFirstCellNum();
final int lastCellNum = row.getLastCellNum();
- er.writeTriple(rowURI, excel.firstCell , RDFUtils.literal(firstCellNum));
- er.writeTriple(rowURI, excel.lastCell , RDFUtils.literal(lastCellNum ));
+ er.writeTriple(rowIRI, excel.firstCell , RDFUtils.literal(firstCellNum));
+ er.writeTriple(rowIRI, excel.lastCell , RDFUtils.literal(lastCellNum ));
}
- private void writeCell(URI rowURI, Cell cell, ExtractionResult er) {
- final URI cellType = cellTypeToType(cell.getCellType());
+ private void writeCell(IRI rowIRI, Cell cell, ExtractionResult er) {
+ final IRI cellType = cellTypeToType(cell.getCellType());
if(cellType == null) return; // Skip unsupported cells.
- final URI cellURI = getCellURI(rowURI, cell);
- er.writeTriple(rowURI, excel.containsCell, cellURI);
- er.writeTriple(cellURI, RDF.TYPE, excel.cell);
+ final IRI cellIRI = getCellIRI(rowIRI, cell);
+ er.writeTriple(rowIRI, excel.containsCell, cellIRI);
+ er.writeTriple(cellIRI, RDF.TYPE, excel.cell);
er.writeTriple(
- cellURI,
+ cellIRI,
excel.cellValue,
RDFUtils.literal(cell.getStringCellValue(), cellType)
);
}
- private URI getSheetURI(URI documentURI, Sheet sheet) {
- return RDFUtils.uri( documentURI.toString() + "/sheet/" + sheet.getSheetName() );
+ private IRI getSheetIRI(IRI documentIRI, Sheet sheet) {
+ return RDFUtils.iri(documentIRI.toString() + "/sheet/" + sheet.getSheetName());
}
- private URI getRowURI(URI sheetURI, Row row) {
- return RDFUtils.uri( sheetURI.toString() + "/" + row.getRowNum() );
+ private IRI getRowIRI(IRI sheetIRI, Row row) {
+ return RDFUtils.iri(sheetIRI.toString() + "/" + row.getRowNum());
}
- private URI getCellURI(URI rowURI, Cell cell) {
- return RDFUtils.uri(
- rowURI +
- String.format("/%d/", cell.getColumnIndex())
- );
+ private IRI getCellIRI(IRI rowIRI, Cell cell) {
+ return RDFUtils.iri(rowIRI +
+ String.format("/%d/", cell.getColumnIndex()));
}
- private URI cellTypeToType(int cellType) {
+ private IRI cellTypeToType(int cellType) {
final String postfix;
switch (cellType) {
case Cell.CELL_TYPE_STRING:
@@ -174,7 +172,7 @@ public class ExcelExtractor implements Extractor.ContentExtractor {
default:
postfix = null;
}
- return postfix == null ? null : RDFUtils.uri(excel.getNamespace().toString() + postfix);
+ return postfix == null ? null : RDFUtils.iri(excel.getNamespace().toString() + postfix);
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/office-scraper/src/main/java/org/apache/any23/vocab/Excel.java
----------------------------------------------------------------------
diff --git a/plugins/office-scraper/src/main/java/org/apache/any23/vocab/Excel.java b/plugins/office-scraper/src/main/java/org/apache/any23/vocab/Excel.java
index de21ff8..64ca060 100644
--- a/plugins/office-scraper/src/main/java/org/apache/any23/vocab/Excel.java
+++ b/plugins/office-scraper/src/main/java/org/apache/any23/vocab/Excel.java
@@ -17,7 +17,7 @@
package org.apache.any23.vocab;
-import org.openrdf.model.URI;
+import org.eclipse.rdf4j.model.IRI;
/**
* The <i>MS Excel</i> extractor vocabulary.
@@ -46,63 +46,63 @@ public class Excel extends Vocabulary {
/**
* This property links the identifier of a <i>document</i> to the identifier of a <i>sheet</i>.
*/
- public final URI containsSheet = createProperty(CONTAINS_SHEET);
+ public final IRI containsSheet = createProperty(CONTAINS_SHEET);
/**
* This property links the identifier of a <i>sheet</i> to the identifier of a <i>row</i>.
*/
- public final URI containsRow = createProperty(CONTAINS_ROW);
+ public final IRI containsRow = createProperty(CONTAINS_ROW);
/**
* This property links the identifier of a <i>row</i> to the identifier of a <i>cell</i>.
*/
- public final URI containsCell = createProperty(CONTAINS_CELL);
+ public final IRI containsCell = createProperty(CONTAINS_CELL);
/**
* This property links the identifier of a <i>Sheet</i> to the name of the sheet.
*/
- public final URI sheetName = createProperty(SHEET_NAME);
+ public final IRI sheetName = createProperty(SHEET_NAME);
/**
* This property links the identifier of a <i>Sheet</i> to the index of the first declared row.
*/
- public final URI firstRow = createProperty(FIRST_ROW);
+ public final IRI firstRow = createProperty(FIRST_ROW);
/**
* This property links the identifier of a <i>Sheet</i> to the index of the last declared row.
*/
- public final URI lastRow = createProperty(LAST_ROW);
+ public final IRI lastRow = createProperty(LAST_ROW);
/**
* This property links the identifier of a <i>Row</i> to the index of the first declared cell.
*/
- public final URI firstCell = createProperty(FIRST_CELL);
+ public final IRI firstCell = createProperty(FIRST_CELL);
/**
* This property links the identifier of a <i>Row</i> to the index of the last declared cell.
*/
- public final URI lastCell = createProperty(LAST_CELL);
+ public final IRI lastCell = createProperty(LAST_CELL);
/**
* This property links the identifier of a <i>cell</i> to the content of the cell.
*/
- public final URI cellValue = createProperty(CELL_VALUE);
+ public final IRI cellValue = createProperty(CELL_VALUE);
/**
* This resource identifies a <i>Sheet</i>.
*/
- public final URI sheet = createResource(SHEET);
+ public final IRI sheet = createResource(SHEET);
/**
* This resource identifies a <i>row</i>.
*/
- public final URI row = createResource(ROW);
+ public final IRI row = createResource(ROW);
/**
* This resource identifies a <i>cell</i>.
*/
- public final URI cell = createResource(CELL);
+ public final IRI cell = createResource(CELL);
/**
* The namespace of the vocabulary as a string.
@@ -118,16 +118,16 @@ public class Excel extends Vocabulary {
return instance;
}
- public URI createResource(String localName) {
+ public IRI createResource(String localName) {
return createProperty(NS, localName);
}
/**
*
* @param localName
- * @return the new URI instance.
+ * @return the new IRI instance.
*/
- public URI createProperty(String localName) {
+ public IRI createProperty(String localName) {
return createProperty(NS, localName);
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java
----------------------------------------------------------------------
diff --git a/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java b/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java
index 3650790..f970889 100644
--- a/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java
+++ b/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java
@@ -33,10 +33,10 @@ import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;
-import org.openrdf.model.Resource;
-import org.openrdf.model.URI;
-import org.openrdf.model.Value;
-import org.openrdf.model.vocabulary.RDF;
+import org.eclipse.rdf4j.model.Resource;
+import org.eclipse.rdf4j.model.IRI;
+import org.eclipse.rdf4j.model.Value;
+import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -81,7 +81,7 @@ public class ExcelExtractorTest {
final ExtractionParameters extractionParameters = ExtractionParameters.newDefault();
final ExtractionContext extractionContext = new ExtractionContext(
extractor.getDescription().getExtractorName(),
- RDFUtils.uri("file://" + resource)
+ RDFUtils.iri("file://" + resource)
);
final InputStream is = this.getClass().getResourceAsStream(resource);
final CompositeTripleHandler compositeTripleHandler = new CompositeTripleHandler();
@@ -107,24 +107,24 @@ public class ExcelExtractorTest {
verifyTypeOccurrence(verifierTripleHandler, Excel.getInstance().cell , 18);
}
- private void verifyPredicateOccurrence(TripleHandler mock, URI predicate, int occurrence)
+ private void verifyPredicateOccurrence(TripleHandler mock, IRI predicate, int occurrence)
throws TripleHandlerException {
Mockito.verify( mock, Mockito.times(occurrence)).receiveTriple(
Mockito.<Resource>anyObject(),
Mockito.eq(predicate),
Mockito.<Value>anyObject(),
- Mockito.<URI>any(),
+ Mockito.<IRI>any(),
Mockito.<ExtractionContext>anyObject()
);
}
- private void verifyTypeOccurrence(TripleHandler mock, URI type, int occurrence)
+ private void verifyTypeOccurrence(TripleHandler mock, IRI type, int occurrence)
throws TripleHandlerException {
Mockito.verify( mock, Mockito.times(occurrence)).receiveTriple(
Mockito.<Resource>anyObject(),
Mockito.eq(RDF.TYPE),
Mockito.eq(type),
- Mockito.<URI>any(),
+ Mockito.<IRI>any(),
Mockito.<ExtractionContext>anyObject()
);
}
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index e761507..0a8c69b 100644
--- a/pom.xml
+++ b/pom.xml
@@ -26,7 +26,7 @@
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23</artifactId>
- <version>1.2-SNAPSHOT</version>
+ <version>2.0-SNAPSHOT</version>
<packaging>pom</packaging>
<name>Apache Any23</name>
@@ -199,7 +199,6 @@
<modules>
<module>api</module>
<module>test-resources</module>
- <module>nquads</module>
<module>csvutils</module>
<module>mime</module>
<module>encoding</module>
@@ -236,9 +235,9 @@
<maven.build.timestamp.format>yyyy-MM-dd HH:mm:ssZ</maven.build.timestamp.format>
<implementation.build>${scmBranch}@r${buildNumber}</implementation.build>
<implementation.build.tstamp>${maven.build.timestamp}</implementation.build.tstamp>
- <slf4j.logger.version>1.7.5</slf4j.logger.version>
- <sesame.version>2.7.14</sesame.version>
- <semargl.version>0.6.1</semargl.version>
+ <slf4j.logger.version>1.7.21</slf4j.logger.version>
+ <rdf4j.version>2.1.3</rdf4j.version>
+ <semargl.version>0.7</semargl.version>
<latest.stable.released>1.1</latest.stable.released>
<form.tracker.id>UA-59636188-1</form.tracker.id>
@@ -343,77 +342,77 @@
<!-- BEGIN: Sesame -->
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-model</artifactId>
- <version>${sesame.version}</version>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-model</artifactId>
+ <version>${rdf4j.version}</version>
</dependency>
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-rio-api</artifactId>
- <version>${sesame.version}</version>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-rio-api</artifactId>
+ <version>${rdf4j.version}</version>
</dependency>
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-rio-turtle</artifactId>
- <version>${sesame.version}</version>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-rio-turtle</artifactId>
+ <version>${rdf4j.version}</version>
</dependency>
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-rio-rdfxml</artifactId>
- <version>${sesame.version}</version>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-rio-rdfxml</artifactId>
+ <version>${rdf4j.version}</version>
</dependency>
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-rio-ntriples</artifactId>
- <version>${sesame.version}</version>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-rio-ntriples</artifactId>
+ <version>${rdf4j.version}</version>
</dependency>
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-rio-nquads</artifactId>
- <version>${sesame.version}</version>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-rio-nquads</artifactId>
+ <version>${rdf4j.version}</version>
</dependency>
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-rio-n3</artifactId>
- <version>${sesame.version}</version>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-rio-n3</artifactId>
+ <version>${rdf4j.version}</version>
</dependency>
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-rio-trix</artifactId>
- <version>${sesame.version}</version>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-rio-trix</artifactId>
+ <version>${rdf4j.version}</version>
</dependency>
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-rio-rdfjson</artifactId>
- <version>${sesame.version}</version>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-rio-rdfjson</artifactId>
+ <version>${rdf4j.version}</version>
</dependency>
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-repository-sail</artifactId>
- <version>${sesame.version}</version>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-repository-sail</artifactId>
+ <version>${rdf4j.version}</version>
</dependency>
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-sail-memory</artifactId>
- <version>${sesame.version}</version>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-sail-memory</artifactId>
+ <version>${rdf4j.version}</version>
</dependency>
<dependency>
- <groupId>org.openrdf.sesame</groupId>
- <artifactId>sesame-repository-api</artifactId>
- <version>${sesame.version}</version>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-repository-api</artifactId>
+ <version>${rdf4j.version}</version>
</dependency>
<dependency>
<groupId>org.semarglproject</groupId>
- <artifactId>semargl-sesame</artifactId>
+ <artifactId>semargl-rdf4j</artifactId>
<version>${semargl.version}</version>
</dependency>
<!-- END: Sesame -->
<!-- BEGIN: Misc -->
<dependency>
- <groupId>com.github.jsonld-java</groupId>
- <artifactId>jsonld-java-sesame</artifactId>
- <version>0.5.0</version>
+ <groupId>org.eclipse.rdf4j</groupId>
+ <artifactId>rdf4j-rio-jsonld</artifactId>
+ <version>${rdf4j.version}</version>
</dependency>
<!-- END: Misc -->
@@ -493,7 +492,7 @@
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
- <version>4.11</version>
+ <version>4.12</version>
<scope>test</scope>
</dependency>
<dependency>
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/service/pom.xml
----------------------------------------------------------------------
diff --git a/service/pom.xml b/service/pom.xml
index 4276412..3264290 100644
--- a/service/pom.xml
+++ b/service/pom.xml
@@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.any23</groupId>
<artifactId>apache-any23</artifactId>
- <version>1.2-SNAPSHOT</version>
+ <version>2.0-SNAPSHOT</version>
<relativePath>../pom.xml</relativePath>
</parent>
@@ -43,11 +43,6 @@
<artifactId>apache-any23-core</artifactId>
<version>${project.version}</version>
</dependency>
- <dependency>
- <groupId>org.apache.any23</groupId>
- <artifactId>apache-any23-nquads</artifactId>
- <version>${project.version}</version>
- </dependency>
<!-- Logging -->
<dependency>
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/service/src/main/java/org/apache/any23/servlet/Servlet.java
----------------------------------------------------------------------
diff --git a/service/src/main/java/org/apache/any23/servlet/Servlet.java b/service/src/main/java/org/apache/any23/servlet/Servlet.java
index 1c13c3c..b60ad5f 100644
--- a/service/src/main/java/org/apache/any23/servlet/Servlet.java
+++ b/service/src/main/java/org/apache/any23/servlet/Servlet.java
@@ -27,7 +27,7 @@ import org.apache.any23.source.DocumentSource;
import org.apache.any23.source.HTTPDocumentSource;
import org.apache.any23.source.StringDocumentSource;
import org.apache.commons.httpclient.URI;
-import org.openrdf.rio.RDFFormat;
+import org.eclipse.rdf4j.rio.RDFFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -42,7 +42,7 @@ import java.util.regex.Pattern;
import static org.apache.any23.extractor.ExtractionParameters.ValidationMode;
/**
- * A <i>Servlet</i> that fetches a client-specified <i>URI</i>,
+ * A <i>Servlet</i> that fetches a client-specified <i>IRI</i>,
* RDFizes the content, and returns it in a format chosen by the client.
*
* @author Gabriele Renzi
@@ -52,7 +52,7 @@ public class Servlet extends HttpServlet {
private static final Logger LOG = LoggerFactory.getLogger(Servlet.class);
- public static final String DEFAULT_BASE_URI = "http://any23.org/tmp/";
+ public static final String DEFAULT_BASE_IRI = "http://any23.org/tmp/";
private static final long serialVersionUID = 8207685628715421336L;
@@ -70,9 +70,9 @@ public class Servlet extends HttpServlet {
responder.sendError(406, "Client accept header does not include a supported output format", report);
return;
}
- final String uri = getInputURIFromRequest(req);
+ final String uri = getInputIRIFromRequest(req);
if (uri == null) {
- responder.sendError(404, "Missing URI in GET request. Try /format/http://example.com/myfile", report);
+ responder.sendError(404, "Missing IRI in GET request. Try /format/http://example.com/myfile", report);
return;
}
final ExtractionParameters eps = getExtractionParameters(req);
@@ -88,7 +88,7 @@ public class Servlet extends HttpServlet {
responder.sendError(400, "Invalid POST request, no Content-Type for the message body specified", report);
return;
}
- final String uri = getInputURIFromRequest(req);
+ final String uri = getInputIRIFromRequest(req);
final String format = getFormatFromRequestOrNegotiation(req);
if (format == null) {
responder.sendError(406, "Client accept header does not include a supported output format", report);
@@ -97,7 +97,7 @@ public class Servlet extends HttpServlet {
final ExtractionParameters eps = getExtractionParameters(req);
if ("application/x-www-form-urlencoded".equals(getContentTypeHeader(req))) {
if (uri != null) {
- log("Attempting conversion to '" + format + "' from URI <" + uri + ">");
+ log("Attempting conversion to '" + format + "' from IRI <" + uri + ">");
responder.runExtraction(createHTTPDocumentSource(responder, uri, report), eps, format, report, annotate);
return;
}
@@ -111,7 +111,7 @@ public class Servlet extends HttpServlet {
}
log("Attempting conversion to '" + format + "' from body parameter");
responder.runExtraction(
- new StringDocumentSource(req.getParameter("body"), Servlet.DEFAULT_BASE_URI, type),
+ new StringDocumentSource(req.getParameter("body"), Servlet.DEFAULT_BASE_IRI, type),
eps,
format,
report, annotate
@@ -122,7 +122,7 @@ public class Servlet extends HttpServlet {
responder.runExtraction(
new ByteArrayDocumentSource(
req.getInputStream(),
- Servlet.DEFAULT_BASE_URI,
+ Servlet.DEFAULT_BASE_IRI,
getContentTypeHeader(req)
),
eps,
@@ -169,7 +169,7 @@ public class Servlet extends HttpServlet {
return args[1];
}
- private String getInputURIFromRequest(HttpServletRequest request) {
+ private String getInputIRIFromRequest(HttpServletRequest request) {
if (request.getPathInfo() == null) return null;
String[] args = request.getPathInfo().split("/", 3);
if (args.length < 3) {
@@ -222,13 +222,13 @@ public class Servlet extends HttpServlet {
private DocumentSource createHTTPDocumentSource(WebResponder responder, String uri, boolean report)
throws IOException {
try {
- if (!isValidURI(uri)) {
+ if (!isValidIRI(uri)) {
throw new URISyntaxException(uri, "@@@");
}
return createHTTPDocumentSource(responder.getRunner().getHTTPClient(), uri);
} catch (URISyntaxException ex) {
- LOG.error("Invalid URI detected", ex);
- responder.sendError(400, "Invalid input URI " + uri, report);
+ LOG.error("Invalid IRI detected", ex);
+ responder.sendError(400, "Invalid input IRI " + uri, report);
return null;
}
}
@@ -238,7 +238,7 @@ public class Servlet extends HttpServlet {
return new HTTPDocumentSource(httpClient, uri);
}
- private boolean isValidURI(String s) {
+ private boolean isValidIRI(String s) {
try {
URI uri = new URI(s, false);
if (!"http".equals(uri.getScheme()) && !"https".equals(uri.getScheme())) {
http://git-wip-us.apache.org/repos/asf/any23/blob/445d13ab/service/src/main/resources/form.html
----------------------------------------------------------------------
diff --git a/service/src/main/resources/form.html b/service/src/main/resources/form.html
index 8a53d2c..e11a019 100644
--- a/service/src/main/resources/form.html
+++ b/service/src/main/resources/form.html
@@ -61,9 +61,9 @@ function showModal( id )
<p>Parses Microformats, RDFa, Microdata, RDF/XML, Turtle, N-Triples, JSON-LD and NQuads.</p>
<p>Download and install Any23: visit the <a href="http://any23.apache.org/" target="_blank">Developers Site</a> and the <a href="http://any23.apache.org/getting-started.html" target="_blank">Documentation</a>.
<hr />
- <h2>Convert document at URI</h2>
+ <h2>Convert document at IRI</h2>
<form class="well form-horizontal" method="get" action="any23/">
- <label>Pick an output format and enter the URI of a web document:</label>
+ <label>Pick an output format and enter the IRI of a web document:</label>
<div class="control-group">
<label class="control-label app-base-uri" for="format">http://.../</label>
@@ -204,18 +204,18 @@ function showModal( id )
</ul>
<h3>Compact API</h3>
<p>HTTP GET requests can be made
- to URIs of the shape</p>
+ to IRIs of the shape</p>
<pre><span class="app-base-uri">http://.../</span><em>format</em>/<em>input-uri</em></pre>
<p>The response is the input document converted to the desired output format.</p>
<h3>Form-style GET API</h3>
<p>HTTP GET requests can be made to
- the URI
+ the IRI
<code class="app-base-uri">http://.../</code> with the following
query parameters:
</p>
<table class="table">
- <tr><th>uri</th><td>URI of an input document.</td></tr>
+ <tr><th>uri</th><td>IRI of an input document.</td></tr>
<tr><th>format</th><td>Desired output format, defaults to <code>best</code>.</td></tr>
<tr><th>validation-mode</th><td>The validation level to be applied on the input. Possible values:<br/>
<code>none</code> (no validation applied);<br/>
@@ -308,7 +308,7 @@ Content-Length: 174
<tbody>
<tr><th>200 OK</th><td>Success</td></tr>
<tr><th>400 Bad Request</th><td>Missing or malformed input parameter</td></tr>
- <tr><th>404 Not Found</th><td>Malformed request URI</td></tr>
+ <tr><th>404 Not Found</th><td>Malformed request IRI</td></tr>
<tr><th>406 Not Acceptable</th><td>None of the media types specified in the <code>Accept</code> header are supported</td></tr>
<tr><th>415 Unsupported Media Type</th><td>Document body with unsupported media type was POSTed</td></tr>
<tr><th>501 Not Implemented</th><td>Extraction from input was successful, but yielded zero triples</td></tr>