You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2018/01/08 13:14:19 UTC
[3/6] any23 git commit: ANY23-320 Address @Ignore tests in Any23 and
ANY23-131 Nested Microdata are not extracted
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
index 8886e31..ed300af 100644
--- a/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
@@ -21,11 +21,9 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.html.AbstractExtractorTestCase;
import org.apache.any23.vocab.CSV;
import org.junit.Test;
-import org.eclipse.rdf4j.model.impl.LiteralImpl;
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
import org.eclipse.rdf4j.model.vocabulary.RDF;
import org.eclipse.rdf4j.model.vocabulary.XMLSchema;
-import org.eclipse.rdf4j.repository.RepositoryException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -36,93 +34,93 @@ import org.slf4j.LoggerFactory;
*/
public class CSVExtractorTest extends AbstractExtractorTestCase {
- private static final Logger logger = LoggerFactory
- .getLogger(CSVExtractorTest.class);
-
- @Override
- protected ExtractorFactory<?> getExtractorFactory() {
- return new CSVExtractorFactory();
- }
-
- @Test
- public void testExtractionCommaSeparated() throws Exception {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-comma.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 28);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
- XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
- XMLSchema.INTEGER));
- }
-
- @Test
- public void testExtractionSemicolonSeparated() throws Exception {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 28);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
- XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
- XMLSchema.INTEGER));
- }
-
- @Test
- public void testExtractionTabSeparated() throws Exception {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-tab.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 28);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
- XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
- XMLSchema.INTEGER));
- }
-
- @Test
- public void testTypeManagement() throws Exception {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-type.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 21);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("2",
- XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
- XMLSchema.INTEGER));
- assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("5.2", XMLSchema.FLOAT));
- assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("7.9", XMLSchema.FLOAT));
- assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("10", XMLSchema.INTEGER));
- }
-
- @Test
- public void testExtractionEmptyValue() throws Exception {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-missing.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 25);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
- XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
- XMLSchema.INTEGER));
- assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Michele", XMLSchema.STRING));
- assertContains(null, null,
- SimpleValueFactory.getInstance().createLiteral("Giovanni", XMLSchema.STRING));
- }
+ private static final Logger logger = LoggerFactory
+ .getLogger(CSVExtractorTest.class);
+
+ @Override
+ protected ExtractorFactory<?> getExtractorFactory() {
+ return new CSVExtractorFactory();
+ }
+
+ @Test
+ public void testExtractionCommaSeparated() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-comma.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 28);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+ XMLSchema.INTEGER));
+ }
+
+ @Test
+ public void testExtractionSemicolonSeparated() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 28);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+ XMLSchema.INTEGER));
+ }
+
+ @Test
+ public void testExtractionTabSeparated() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-tab.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 28);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+ XMLSchema.INTEGER));
+ }
+
+ @Test
+ public void testTypeManagement() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-type.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 21);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("2",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+ XMLSchema.INTEGER));
+ assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("5.2", XMLSchema.FLOAT));
+ assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("7.9", XMLSchema.FLOAT));
+ assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("10", XMLSchema.INTEGER));
+ }
+
+ @Test
+ public void testExtractionEmptyValue() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-missing.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 25);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, SimpleValueFactory.getInstance().createLiteral("4",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, SimpleValueFactory.getInstance().createLiteral("3",
+ XMLSchema.INTEGER));
+ assertContains(null, null, SimpleValueFactory.getInstance().createLiteral("Michele", XMLSchema.STRING));
+ assertContains(null, null,
+ SimpleValueFactory.getInstance().createLiteral("Giovanni", XMLSchema.STRING));
+ }
}
http://git-wip-us.apache.org/repos/asf/any23/blob/60e93a76/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java b/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
index 855a88c..5354924 100644
--- a/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
+++ b/core/src/test/java/org/apache/any23/extractor/html/AbstractExtractorTestCase.java
@@ -31,6 +31,7 @@ import org.apache.any23.writer.RepositoryWriter;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
+import org.eclipse.rdf4j.common.iteration.Iterations;
import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Resource;
@@ -62,802 +63,799 @@ import java.util.Map;
*/
public abstract class AbstractExtractorTestCase extends AbstractAny23TestBase {
- /**
- * Base test document.
- */
- protected static IRI baseIRI = RDFUtils.iri("http://bob.example.com/"); // TODO:
- // change
- // base
- // IRI
- // string.
-
- /**
- * Internal connection used to collect extraction results.
- */
- protected RepositoryConnection conn;
-
- /**
- * The latest generated report.
- */
- private SingleDocumentExtractionReport report;
-
- private Sail store;
-
- private SailRepository repository;
-
- /**
- * Constructor.
- */
- public AbstractExtractorTestCase() {
- super();
- }
-
- /**
- * @return the factory of the extractor to be tested.
- */
- protected abstract ExtractorFactory<?> getExtractorFactory();
-
- /**
- * Test case initialization.
- *
- * @throws Exception
- */
- @Before
- public void setUp() throws Exception {
- super.setUp();
- store = new MemoryStore();
- repository = new SailRepository(store);
- repository.initialize();
- conn = repository.getConnection();
- }
-
- /**
- * Test case resources release.
- *
- * @throws RepositoryException
- */
- @After
- public void tearDown() throws RepositoryException {
- try {
- conn.close();
- } finally {
- repository.shutDown();
- }
- conn = null;
- report = null;
- store = null;
- repository = null;
- }
-
- /**
- * @return the connection to the memory repository.
- */
- protected RepositoryConnection getConnection() {
- return conn;
- }
-
- /**
- * @return the last generated report.
- */
- protected SingleDocumentExtractionReport getReport() {
- return report;
- }
-
- /**
- * Returns the list of issues raised by a given extractor.
- *
- * @param extractorName
- * name of the extractor.
- * @return collection of issues.
- */
- protected Collection<IssueReport.Issue> getIssues(String extractorName) {
- for (Map.Entry<String, Collection<IssueReport.Issue>> issueEntry : report
- .getExtractorToIssues().entrySet()) {
- if (issueEntry.getKey().equals(extractorName)) {
- return issueEntry.getValue();
- }
- }
- return Collections.emptyList();
- }
-
- /**
- * Returns the list of issues raised by the extractor under testing.
- *
- * @return collection of issues.
- */
- protected Collection<IssueReport.Issue> getIssues() {
- return getIssues(getExtractorFactory().getExtractorName());
- }
-
- /**
- * Applies the extractor provided by the {@link #getExtractorFactory()} to
- * the specified resource.
- *
- * @param resource
- * resource name.
- * @throws org.apache.any23.extractor.ExtractionException
- * @throws IOException
- */
- // TODO: MimeType detector to null forces the execution of all extractors,
- // but extraction
- // tests should be based on mimetype detection.
- protected void extract(String resource) throws ExtractionException,
- IOException {
- SingleDocumentExtraction ex = new SingleDocumentExtraction(
- new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseIRI
- .toString()), getExtractorFactory(),
- new RepositoryWriter(conn));
- ex.setMIMETypeDetector(null);
- report = ex.run();
- }
-
- /**
- * Performs data extraction over the content of a resource and assert that
- * the extraction was fine.
- *
- * @param resource
- * resource name.
- * @param assertNoIssues
- * if <code>true</code>invokes {@link #assertNoIssues()} after
- * the extraction.
- */
- protected void assertExtract(String resource, boolean assertNoIssues) {
- try {
- extract(resource);
- if (assertNoIssues)
- assertNoIssues();
- } catch (ExtractionException ex) {
- throw new RuntimeException(ex);
- } catch (IOException ex) {
- throw new RuntimeException(ex);
- }
- }
-
- /**
- * Performs data extraction over the content of a resource and assert that
- * the extraction was fine and raised no issues.
- *
- * @param resource
- */
- protected void assertExtract(String resource) {
- assertExtract(resource, true);
- }
-
- /**
- * Asserts that the extracted triples contain the pattern
- * <code>(_ p o)</code>.
- *
- * @param p
- * predicate
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertContains(IRI p, Resource o) throws RepositoryException {
- assertContains(null, p, o);
- }
-
- /**
- * Asserts that the extracted triples contain the pattern
- * <code>(_ p o)</code>.
- *
- * @param p
- * predicate
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertContains(IRI p, String o) throws RepositoryException {
- assertContains(null, p, RDFUtils.literal(o));
- }
-
- /**
- * Asserts that the extracted triples contain the pattern
- * <code>(_ p o)</code>.
- *
- * @param p
- * predicate
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertNotContains(IRI p, Resource o)
- throws RepositoryException {
- assertNotContains(null, p, o);
- }
-
- /**
- * Asserts that the extracted triples contain the pattern
- * <code>(s p o)</code>.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertContains(Resource s, IRI p, Value o)
- throws RepositoryException {
- Assert.assertTrue(
- getFailedExtractionMessage()
- + String.format("Cannot find triple (%s %s %s)", s, p,
- o), conn.hasStatement(s, p, o, false));
- }
-
- /**
- * Asserts that the extracted triples contain the pattern
- * <code>(s p o)</code>.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertNotContains(Resource s, IRI p, String o)
- throws RepositoryException {
- Assert.assertFalse(getFailedExtractionMessage(),
- conn.hasStatement(s, p, RDFUtils.literal(o), false));
- }
-
- /**
- * Asserts that the extracted triples contain the pattern
- * <code>(s p o)</code>.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertNotContains(Resource s, IRI p, Resource o)
- throws RepositoryException {
- Assert.assertFalse(getFailedExtractionMessage(),
- conn.hasStatement(s, p, o, false));
- }
-
- /**
- * Asserts that the model contains at least a statement.
- *
- * @throws RepositoryException
- */
- protected void assertModelNotEmpty() throws RepositoryException {
- Assert.assertFalse("The model is expected to not be empty."
- + getFailedExtractionMessage(), conn.isEmpty());
- }
-
- /**
- * Asserts that the model doesn't contain the pattern <code>(s p o)</code>
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @throws RepositoryException
- */
- protected void assertNotContains(Resource s, IRI p, Literal o)
- throws RepositoryException {
- Assert.assertFalse(getFailedExtractionMessage(),
- conn.hasStatement(s, p, o, false));
- }
-
- /**
- * Asserts that the model is expected to contains no statements.
- *
- * @throws RepositoryException
- */
- protected void assertModelEmpty() throws RepositoryException {
- Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty());
- }
-
- /**
- * Asserts that the extraction generated no issues.
- */
- protected void assertNoIssues() {
- for (Map.Entry<String, Collection<IssueReport.Issue>> entry : report
- .getExtractorToIssues().entrySet()) {
- if (entry.getValue().size() > 0) {
- System.out.println("Unexpected issue for extractor " + entry.getKey()
- + " : " + entry.getValue());
- }
- for(Issue nextIssue : entry.getValue()) {
- if(nextIssue.getLevel() == IssueLevel.ERROR || nextIssue.getLevel() == IssueLevel.FATAL) {
- Assert.fail("Unexpected issue for extractor " + entry.getKey()
- + " : " + entry.getValue());
- }
- }
- }
- }
-
- /**
- * Asserts that an issue has been produced by the processed
- * {@link org.apache.any23.extractor.Extractor}.
- *
- * @param level
- * expected issue level
- * @param issueRegex
- * regex matching the expected human readable issue message.
- */
- protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) {
- final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory()
- .getExtractorName());
- boolean found = false;
- for (IssueReport.Issue issue : issues) {
- if (issue.getLevel() == level
- && issue.getMessage().matches(issueRegex)) {
- found = true;
- break;
- }
- }
- Assert.assertTrue(String.format(
- "Cannot find issue with level %s matching expression '%s'",
- level, issueRegex), found);
- }
-
- /**
- * Verifies that the current model contains all the given statements.
- *
- * @param statements
- * list of statements to be verified.
- * @throws RepositoryException
- */
- public void assertContainsModel(Statement[] statements)
- throws RepositoryException {
- for (Statement statement : statements) {
- assertContains(statement);
- }
- }
-
- /**
- * Verifies that the current model contains all the statements declared in
- * the specified <code>modelFile</code>.
- *
- * @param modelResource
- * the resource containing the model.
- * @throws RDFHandlerException
- * @throws IOException
- * @throws RDFParseException
- * @throws RepositoryException
- */
- public void assertContainsModel(String modelResource)
- throws RDFHandlerException, IOException, RDFParseException,
- RepositoryException {
- getConnection().remove(null, SINDICE.getInstance().date, (Value) null,
- (Resource) null);
- getConnection().remove(null, SINDICE.getInstance().size, (Value) null,
- (Resource) null);
- assertContainsModel(RDFUtils.parseRDF(modelResource));
- }
-
- /**
- * Asserts that the given pattern <code>(s p o)</code> satisfies the
- * expected number of statements.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @param expected
- * expected matches.
- * @throws RepositoryException
- */
- protected void assertStatementsSize(Resource s, IRI p, Value o, int expected)
- throws RDFHandlerException, RepositoryException {
- int statementsSize = getStatementsSize(s, p, o);
- if (statementsSize != expected) {
- getConnection().exportStatements(s, p, o, true, Rio.createWriter(RDFFormat.NQUADS, System.out));
- }
-
- Assert.assertEquals("Unexpected number of matching statements.",
- expected, statementsSize);
- }
-
- /**
- * Asserts that the given pattern <code>(_ p o)</code> satisfies the
- * expected number of statements.
- *
- * @param p
- * predicate.
- * @param o
- * object.
- * @param expected
- * expected matches.
- * @throws RepositoryException
- */
- protected void assertStatementsSize(IRI p, Value o, int expected)
- throws RDFHandlerException, RepositoryException {
- assertStatementsSize(null, p, o, expected);
- }
-
- /**
- * Asserts that the given pattern <code>(_ p o)</code> satisfies the
- * expected number of statements.
- *
- * @param p
- * predicate.
- * @param o
- * object.
- * @param expected
- * expected matches.
- * @throws RepositoryException
- */
- protected void assertStatementsSize(IRI p, String o, int expected)
- throws RDFHandlerException, RepositoryException {
- assertStatementsSize(p, o == null ? null : RDFUtils.literal(o),
- expected);
- }
-
- /**
- * Asserts that the given pattern <code>(s p _)</code> is not present.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @throws RepositoryException
- */
- protected void assertNotFound(Resource s, IRI p) throws RepositoryException {
- RepositoryResult<Statement> statements = conn.getStatements(s, p, null,
- true);
- try {
- Assert.assertFalse("Expected no statements.", statements.hasNext());
- } finally {
- statements.close();
- }
- }
-
- /**
- * Returns the blank subject matching the pattern <code>(_:b p o)</code>, it
- * is expected to exists and be just one.
- *
- * @param p
- * predicate.
- * @param o
- * object.
- * @return the matching blank subject.
- * @throws RepositoryException
- */
- protected Resource findExactlyOneBlankSubject(IRI p, Value o)
- throws RepositoryException {
- RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
- try {
- Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
- Statement stmt = it.next();
- Resource result = stmt.getSubject();
- Assert.assertTrue(getFailedExtractionMessage(),
- result instanceof BNode);
- Assert.assertFalse(getFailedExtractionMessage(), it.hasNext());
- return result;
- } finally {
- it.close();
- }
- }
-
- /**
- * Returns the object matching the pattern <code>(s p o)</code>, it is
- * expected to exists and be just one.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @return the matching object.
- * @throws RepositoryException
- */
- protected Value findExactlyOneObject(Resource s, IRI p)
- throws RepositoryException {
- RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
- try {
- Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
- return it.next().getObject();
- } finally {
- it.close();
- }
- }
-
- /**
- * Returns all the subjects matching the pattern <code>(s? p o)</code>.
- *
- * @param p
- * predicate.
- * @param o
- * object.
- * @return list of matching subjects.
- * @throws RepositoryException
- */
- protected List<Resource> findSubjects(IRI p, Value o)
- throws RepositoryException {
- RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
- List<Resource> subjects = new ArrayList<Resource>();
- try {
- Statement statement;
- while (it.hasNext()) {
- statement = it.next();
- subjects.add(statement.getSubject());
- }
- } finally {
- it.close();
- }
- return subjects;
- }
-
- /**
- * Returns all the objects matching the pattern <code>(s p _)</code>.
- *
- * @param s
- * predicate.
- * @param p
- * predicate.
- * @return list of matching objects.
- * @throws RepositoryException
- */
- protected List<Value> findObjects(Resource s, IRI p)
- throws RepositoryException {
- RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
- List<Value> objects = new ArrayList<Value>();
- try {
- Statement statement;
- while (it.hasNext()) {
- statement = it.next();
- objects.add(statement.getObject());
- }
- } finally {
- it.close();
- }
- return objects;
- }
-
- /**
- * Finds the object matching the pattern <code>(s p _)</code>, asserts to
- * find exactly one result.
- *
- * @param s
- * subject.
- * @param p
- * predicate
- * @return matching object.
- * @throws org.openrdf.repository.RepositoryException
- */
- protected Value findObject(Resource s, IRI p) throws RepositoryException {
- RepositoryResult<Statement> statements = conn.getStatements(s, p, null,
- true);
- try {
- Assert.assertTrue("Expected at least a statement.",
- statements.hasNext());
- return (statements.next().getObject());
- } finally {
- statements.close();
- }
- }
-
- /**
- * Finds the resource object matching the pattern <code>(s p _)</code>,
- * asserts to find exactly one result.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @return matching object.
- * @throws RepositoryException
- */
- protected Resource findObjectAsResource(Resource s, IRI p)
- throws RepositoryException {
- final Value v = findObject(s, p);
- try {
- return (Resource) v;
- } catch (ClassCastException cce) {
- Assert.fail("Expected resource object, found: "
- + v.getClass().getSimpleName());
- throw new IllegalStateException();
- }
- }
-
- /**
- * Finds the literal object matching the pattern <code>(s p _)</code>,
- * asserts to find exactly one result.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @return matching object.
- * @throws RepositoryException
- */
- protected String findObjectAsLiteral(Resource s, IRI p)
- throws RepositoryException {
- return findObject(s, p).stringValue();
- }
-
- /**
- * Dumps the extracted model in <i>Turtle</i> format.
- *
- * @return a string containing the model in Turtle.
- * @throws RepositoryException
- */
- protected String dumpModelToTurtle() throws RepositoryException {
- StringWriter w = new StringWriter();
- try {
- conn.export(Rio.createWriter(RDFFormat.TURTLE, w));
- return w.toString();
- } catch (RDFHandlerException ex) {
- throw new RuntimeException(ex);
- }
- }
-
- /**
- * Dumps the extracted model in <i>NQuads</i> format.
- *
- * @return a string containing the model in NQuads.
- * @throws RepositoryException
- */
- protected String dumpModelToNQuads() throws RepositoryException {
- StringWriter w = new StringWriter();
- try {
- conn.export(Rio.createWriter(RDFFormat.NQUADS, w));
- return w.toString();
- } catch (RDFHandlerException ex) {
- throw new RuntimeException(ex);
- }
- }
-
- /**
- * Dumps the extracted model in <i>RDFXML</i> format.
- *
- * @return a string containing the model in RDFXML.
- * @throws RepositoryException
- */
- protected String dumpModelToRDFXML() throws RepositoryException {
- StringWriter w = new StringWriter();
- try {
- conn.export(Rio.createWriter(RDFFormat.RDFXML, w));
- return w.toString();
- } catch (RDFHandlerException ex) {
- throw new RuntimeException(ex);
- }
- }
-
- /**
- * Dumps the list of statements contained in the extracted model.
- *
- * @return list of extracted statements.
- * @throws RepositoryException
- */
- protected List<Statement> dumpAsListOfStatements()
- throws RepositoryException {
- return conn.getStatements(null, null, null, false).asList();
- }
-
- /**
- * @return string containing human readable statements.
- * @throws RepositoryException
- */
- protected String dumpHumanReadableTriples() throws RepositoryException {
- StringBuilder sb = new StringBuilder();
- RepositoryResult<Statement> result = conn.getStatements(null, null,
- null, false);
- while (result.hasNext()) {
- Statement statement = result.next();
- sb.append(String.format("%s %s %s %s\n", statement.getSubject(),
- statement.getPredicate(), statement.getObject(),
- statement.getContext()));
-
- }
- return sb.toString();
- }
-
- /**
- * Checks that a statement is contained in the extracted model. If the
- * statement declares bnodes, they are replaced with <code>_</code>
- * patterns.
- *
- * @param statement
- * @throws RepositoryException
- */
- // TODO: bnode check is too weak, introduce graph omomorphism check.
- protected void assertContains(Statement statement)
- throws RepositoryException {
- Assert.assertTrue("Cannot find statement " + statement + " in model.",
- conn.hasStatement(
- statement.getSubject() instanceof BNode ? null
- : statement.getSubject(), statement
- .getPredicate(),
- statement.getObject() instanceof BNode ? null
- : statement.getObject(), false));
- }
-
- /**
- * Assert that the model contains the statement <code>(s p l)</code> where
- * <code>l</code> is a literal.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param l
- * literal content.
- * @throws RepositoryException
- */
- protected void assertContains(Resource s, IRI p, String l)
- throws RepositoryException {
- assertContains(s, p, RDFUtils.literal(l));
- }
-
- /**
- * Assert that the model contains the statement <code>(s p l)</code> where
- * <code>l</code> is a language literal.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param l
- * literal content.
- * @param lang
- * literal language.
- * @throws RepositoryException
- */
- protected void assertContains(Resource s, IRI p, String l, String lang)
- throws RepositoryException {
- assertContains(s, p, RDFUtils.literal(l, lang));
- }
-
- /**
- * Returns all statements matching the pattern <code>(s p o)</code>.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @return list of statements.
- * @throws RepositoryException
- */
- protected RepositoryResult<Statement> getStatements(Resource s, IRI p,
- Value o) throws RepositoryException {
- return conn.getStatements(s, p, o, false);
- }
-
- /**
- * Counts all statements matching the pattern <code>(s p o)</code>.
- *
- * @param s
- * subject.
- * @param p
- * predicate.
- * @param o
- * object.
- * @return number of matches.
- * @throws RepositoryException
- */
- protected int getStatementsSize(Resource s, IRI p, Value o)
- throws RepositoryException {
- RepositoryResult<Statement> result = getStatements(s, p, o);
- int count = 0;
- try {
- while (result.hasNext()) {
- result.next();
- count++;
- }
- } finally {
- result.close();
- }
- return count;
- }
-
- private String getFailedExtractionMessage() throws RepositoryException {
- return "Assertion failed! Extracted triples:\n" + dumpModelToNQuads();
- }
+ /**
+ * Base test document.
+ */
+ //TODO: change base IRI string.
+ protected static IRI baseIRI = RDFUtils.iri("http://bob.example.com/");
+
+ /**
+ * Internal connection used to collect extraction results.
+ */
+ protected RepositoryConnection conn;
+
+ /**
+ * The latest generated report.
+ */
+ private SingleDocumentExtractionReport report;
+
+ private Sail store;
+
+ private SailRepository repository;
+
+ /**
+ * Constructor.
+ */
+ public AbstractExtractorTestCase() {
+ super();
+ }
+
+ /**
+ * @return the factory of the extractor to be tested.
+ */
+ protected abstract ExtractorFactory<?> getExtractorFactory();
+
+ /**
+ * Test case initialization.
+ *
+ * @throws Exception
+ */
+ @Before
+ public void setUp() throws Exception {
+ super.setUp();
+ store = new MemoryStore();
+ repository = new SailRepository(store);
+ repository.initialize();
+ conn = repository.getConnection();
+ }
+
+ /**
+ * Test case resources release.
+ *
+ * @throws RepositoryException
+ */
+ @After
+ public void tearDown() throws RepositoryException {
+ try {
+ conn.close();
+ } finally {
+ repository.shutDown();
+ }
+ conn = null;
+ report = null;
+ store = null;
+ repository = null;
+ }
+
+ /**
+ * @return the connection to the memory repository.
+ */
+ protected RepositoryConnection getConnection() {
+ return conn;
+ }
+
+ /**
+ * @return the last generated report.
+ */
+ protected SingleDocumentExtractionReport getReport() {
+ return report;
+ }
+
+ /**
+ * Returns the list of issues raised by a given extractor.
+ *
+ * @param extractorName
+ * name of the extractor.
+ * @return collection of issues.
+ */
+ protected Collection<IssueReport.Issue> getIssues(String extractorName) {
+ for (Map.Entry<String, Collection<IssueReport.Issue>> issueEntry : report
+ .getExtractorToIssues().entrySet()) {
+ if (issueEntry.getKey().equals(extractorName)) {
+ return issueEntry.getValue();
+ }
+ }
+ return Collections.emptyList();
+ }
+
+ /**
+ * Returns the list of issues raised by the extractor under testing.
+ *
+ * @return collection of issues.
+ */
+ protected Collection<IssueReport.Issue> getIssues() {
+ return getIssues(getExtractorFactory().getExtractorName());
+ }
+
+ /**
+ * Applies the extractor provided by the {@link #getExtractorFactory()} to
+ * the specified resource.
+ *
+ * @param resource
+ * resource name.
+ * @throws org.apache.any23.extractor.ExtractionException
+ * @throws IOException
+ */
+ // TODO: MimeType detector to null forces the execution of all extractors,
+ // but extraction
+ // tests should be based on mimetype detection.
+ protected void extract(String resource) throws ExtractionException,
+ IOException {
+ SingleDocumentExtraction ex = new SingleDocumentExtraction(
+ new HTMLFixture(copyResourceToTempFile(resource)).getOpener(baseIRI
+ .toString()), getExtractorFactory(),
+ new RepositoryWriter(conn));
+ ex.setMIMETypeDetector(null);
+ report = ex.run();
+ }
+
+ /**
+ * Performs data extraction over the content of a resource and assert that
+ * the extraction was fine.
+ *
+ * @param resource
+ * resource name.
+ * @param assertNoIssues
+ * if <code>true</code>invokes {@link #assertNoIssues()} after
+ * the extraction.
+ */
+ protected void assertExtract(String resource, boolean assertNoIssues) {
+ try {
+ extract(resource);
+ if (assertNoIssues)
+ assertNoIssues();
+ } catch (ExtractionException ex) {
+ throw new RuntimeException(ex);
+ } catch (IOException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * Performs data extraction over the content of a resource and assert that
+ * the extraction was fine and raised no issues.
+ *
+ * @param resource
+ */
+ protected void assertExtract(String resource) {
+ assertExtract(resource, true);
+ }
+
+ /**
+ * Asserts that the extracted triples contain the pattern
+ * <code>(_ p o)</code>.
+ *
+ * @param p
+ * predicate
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertContains(IRI p, Resource o) throws RepositoryException {
+ assertContains(null, p, o);
+ }
+
+ /**
+ * Asserts that the extracted triples contain the pattern
+ * <code>(_ p o)</code>.
+ *
+ * @param p
+ * predicate
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertContains(IRI p, String o) throws RepositoryException {
+ assertContains(null, p, RDFUtils.literal(o));
+ }
+
+ /**
+ * Asserts that the extracted triples contain the pattern
+ * <code>(_ p o)</code>.
+ *
+ * @param p
+ * predicate
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertNotContains(IRI p, Resource o)
+ throws RepositoryException {
+ assertNotContains(null, p, o);
+ }
+
+ /**
+ * Asserts that the extracted triples contain the pattern
+ * <code>(s p o)</code>.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertContains(Resource s, IRI p, Value o)
+ throws RepositoryException {
+ Assert.assertTrue(
+ getFailedExtractionMessage()
+ + String.format("Cannot find triple (%s %s %s)", s, p,
+ o), conn.hasStatement(s, p, o, false));
+ }
+
+ /**
+ * Asserts that the extracted triples contain the pattern
+ * <code>(s p o)</code>.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertNotContains(Resource s, IRI p, String o)
+ throws RepositoryException {
+ Assert.assertFalse(getFailedExtractionMessage(),
+ conn.hasStatement(s, p, RDFUtils.literal(o), false));
+ }
+
+ /**
+ * Asserts that the extracted triples contain the pattern
+ * <code>(s p o)</code>.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertNotContains(Resource s, IRI p, Resource o)
+ throws RepositoryException {
+ Assert.assertFalse(getFailedExtractionMessage(),
+ conn.hasStatement(s, p, o, false));
+ }
+
+ /**
+ * Asserts that the model contains at least a statement.
+ *
+ * @throws RepositoryException
+ */
+ protected void assertModelNotEmpty() throws RepositoryException {
+ Assert.assertFalse("The model is expected to not be empty."
+ + getFailedExtractionMessage(), conn.isEmpty());
+ }
+
+ /**
+ * Asserts that the model doesn't contain the pattern <code>(s p o)</code>
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @throws RepositoryException
+ */
+ protected void assertNotContains(Resource s, IRI p, Literal o)
+ throws RepositoryException {
+ Assert.assertFalse(getFailedExtractionMessage(),
+ conn.hasStatement(s, p, o, false));
+ }
+
+ /**
+ * Asserts that the model is expected to contains no statements.
+ *
+ * @throws RepositoryException
+ */
+ protected void assertModelEmpty() throws RepositoryException {
+ Assert.assertTrue(getFailedExtractionMessage(), conn.isEmpty());
+ }
+
+ /**
+ * Asserts that the extraction generated no issues.
+ */
+ protected void assertNoIssues() {
+ for (Map.Entry<String, Collection<IssueReport.Issue>> entry : report
+ .getExtractorToIssues().entrySet()) {
+ if (entry.getValue().size() > 0) {
+ System.out.println("Unexpected issue for extractor " + entry.getKey()
+ + " : " + entry.getValue());
+ }
+ for(Issue nextIssue : entry.getValue()) {
+ if(nextIssue.getLevel() == IssueLevel.ERROR || nextIssue.getLevel() == IssueLevel.FATAL) {
+ Assert.fail("Unexpected issue for extractor " + entry.getKey()
+ + " : " + entry.getValue());
+ }
+ }
+ }
+ }
+
+ /**
+ * Asserts that an issue has been produced by the processed
+ * {@link org.apache.any23.extractor.Extractor}.
+ *
+ * @param level
+ * expected issue level
+ * @param issueRegex
+ * regex matching the expected human readable issue message.
+ */
+ protected void assertIssue(IssueReport.IssueLevel level, String issueRegex) {
+ final Collection<IssueReport.Issue> issues = getIssues(getExtractorFactory()
+ .getExtractorName());
+ boolean found = false;
+ for (IssueReport.Issue issue : issues) {
+ if (issue.getLevel() == level
+ && issue.getMessage().matches(issueRegex)) {
+ found = true;
+ break;
+ }
+ }
+ Assert.assertTrue(String.format(
+ "Cannot find issue with level %s matching expression '%s'",
+ level, issueRegex), found);
+ }
+
+ /**
+ * Verifies that the current model contains all the given statements.
+ *
+ * @param statements
+ * list of statements to be verified.
+ * @throws RepositoryException
+ */
+ public void assertContainsModel(Statement[] statements)
+ throws RepositoryException {
+ for (Statement statement : statements) {
+ assertContains(statement);
+ }
+ }
+
+ /**
+ * Verifies that the current model contains all the statements declared in
+ * the specified <code>modelFile</code>.
+ *
+ * @param modelResource
+ * the resource containing the model.
+ * @throws RDFHandlerException
+ * @throws IOException
+ * @throws RDFParseException
+ * @throws RepositoryException
+ */
+ public void assertContainsModel(String modelResource)
+ throws RDFHandlerException, IOException, RDFParseException,
+ RepositoryException {
+ getConnection().remove(null, SINDICE.getInstance().date, (Value) null,
+ (Resource) null);
+ getConnection().remove(null, SINDICE.getInstance().size, (Value) null,
+ (Resource) null);
+ assertContainsModel(RDFUtils.parseRDF(modelResource));
+ }
+
+ /**
+ * Asserts that the given pattern <code>(s p o)</code> satisfies the
+ * expected number of statements.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @param expected
+ * expected matches.
+ * @throws RepositoryException
+ */
+ protected void assertStatementsSize(Resource s, IRI p, Value o, int expected)
+ throws RDFHandlerException, RepositoryException {
+ int statementsSize = getStatementsSize(s, p, o);
+ if (statementsSize != expected) {
+ getConnection().exportStatements(s, p, o, true, Rio.createWriter(RDFFormat.NQUADS, System.out));
+ }
+
+ Assert.assertEquals("Unexpected number of matching statements.",
+ expected, statementsSize);
+ }
+
+ /**
+ * Asserts that the given pattern <code>(_ p o)</code> satisfies the
+ * expected number of statements.
+ *
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @param expected
+ * expected matches.
+ * @throws RepositoryException
+ */
+ protected void assertStatementsSize(IRI p, Value o, int expected)
+ throws RDFHandlerException, RepositoryException {
+ assertStatementsSize(null, p, o, expected);
+ }
+
+ /**
+ * Asserts that the given pattern <code>(_ p o)</code> satisfies the
+ * expected number of statements.
+ *
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @param expected
+ * expected matches.
+ * @throws RepositoryException
+ */
+ protected void assertStatementsSize(IRI p, String o, int expected)
+ throws RDFHandlerException, RepositoryException {
+ assertStatementsSize(p, o == null ? null : RDFUtils.literal(o),
+ expected);
+ }
+
+ /**
+ * Asserts that the given pattern <code>(s p _)</code> is not present.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @throws RepositoryException
+ */
+ protected void assertNotFound(Resource s, IRI p) throws RepositoryException {
+ RepositoryResult<Statement> statements = conn.getStatements(s, p, null,
+ true);
+ try {
+ Assert.assertFalse("Expected no statements.", statements.hasNext());
+ } finally {
+ statements.close();
+ }
+ }
+
+ /**
+ * Returns the blank subject matching the pattern <code>(_:b p o)</code>, it
+ * is expected to exists and be just one.
+ *
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @return the matching blank subject.
+ * @throws RepositoryException
+ */
+ protected Resource findExactlyOneBlankSubject(IRI p, Value o)
+ throws RepositoryException {
+ RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
+ try {
+ Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
+ Statement stmt = it.next();
+ Resource result = stmt.getSubject();
+ Assert.assertTrue(getFailedExtractionMessage(),
+ result instanceof BNode);
+ Assert.assertFalse(getFailedExtractionMessage(), it.hasNext());
+ return result;
+ } finally {
+ it.close();
+ }
+ }
+
+ /**
+ * Returns the object matching the pattern <code>(s p o)</code>, it is
+ * expected to exists and be just one.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @return the matching object.
+ * @throws RepositoryException
+ */
+ protected Value findExactlyOneObject(Resource s, IRI p)
+ throws RepositoryException {
+ RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
+ try {
+ Assert.assertTrue(getFailedExtractionMessage(), it.hasNext());
+ return it.next().getObject();
+ } finally {
+ it.close();
+ }
+ }
+
+ /**
+ * Returns all the subjects matching the pattern <code>(s? p o)</code>.
+ *
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @return list of matching subjects.
+ * @throws RepositoryException
+ */
+ protected List<Resource> findSubjects(IRI p, Value o)
+ throws RepositoryException {
+ RepositoryResult<Statement> it = conn.getStatements(null, p, o, false);
+ List<Resource> subjects = new ArrayList<Resource>();
+ try {
+ Statement statement;
+ while (it.hasNext()) {
+ statement = it.next();
+ subjects.add(statement.getSubject());
+ }
+ } finally {
+ it.close();
+ }
+ return subjects;
+ }
+
+ /**
+ * Returns all the objects matching the pattern <code>(s p _)</code>.
+ *
+ * @param s
+ * predicate.
+ * @param p
+ * predicate.
+ * @return list of matching objects.
+ * @throws RepositoryException
+ */
+ protected List<Value> findObjects(Resource s, IRI p)
+ throws RepositoryException {
+ RepositoryResult<Statement> it = conn.getStatements(s, p, null, false);
+ List<Value> objects = new ArrayList<Value>();
+ try {
+ Statement statement;
+ while (it.hasNext()) {
+ statement = it.next();
+ objects.add(statement.getObject());
+ }
+ } finally {
+ it.close();
+ }
+ return objects;
+ }
+
+ /**
+ * Finds the object matching the pattern <code>(s p _)</code>, asserts to
+ * find exactly one result.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate
+ * @return matching object.
+ * @throws org.openrdf.repository.RepositoryException
+ */
+ protected Value findObject(Resource s, IRI p) throws RepositoryException {
+ RepositoryResult<Statement> statements = conn.getStatements(s, p, null,
+ true);
+ try {
+ Assert.assertTrue("Expected at least a statement.",
+ statements.hasNext());
+ return (statements.next().getObject());
+ } finally {
+ statements.close();
+ }
+ }
+
+ /**
+ * Finds the resource object matching the pattern <code>(s p _)</code>,
+ * asserts to find exactly one result.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @return matching object.
+ * @throws RepositoryException
+ */
+ protected Resource findObjectAsResource(Resource s, IRI p)
+ throws RepositoryException {
+ final Value v = findObject(s, p);
+ try {
+ return (Resource) v;
+ } catch (ClassCastException cce) {
+ Assert.fail("Expected resource object, found: "
+ + v.getClass().getSimpleName());
+ throw new IllegalStateException();
+ }
+ }
+
+ /**
+ * Finds the literal object matching the pattern <code>(s p _)</code>,
+ * asserts to find exactly one result.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @return matching object.
+ * @throws RepositoryException
+ */
+ protected String findObjectAsLiteral(Resource s, IRI p)
+ throws RepositoryException {
+ return findObject(s, p).stringValue();
+ }
+
+ /**
+ * Dumps the extracted model in <i>Turtle</i> format.
+ *
+ * @return a string containing the model in Turtle.
+ * @throws RepositoryException
+ */
+ protected String dumpModelToTurtle() throws RepositoryException {
+ StringWriter w = new StringWriter();
+ try {
+ conn.export(Rio.createWriter(RDFFormat.TURTLE, w));
+ return w.toString();
+ } catch (RDFHandlerException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * Dumps the extracted model in <i>NQuads</i> format.
+ *
+ * @return a string containing the model in NQuads.
+ * @throws RepositoryException
+ */
+ protected String dumpModelToNQuads() throws RepositoryException {
+ StringWriter w = new StringWriter();
+ try {
+ conn.export(Rio.createWriter(RDFFormat.NQUADS, w));
+ return w.toString();
+ } catch (RDFHandlerException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * Dumps the extracted model in <i>RDFXML</i> format.
+ *
+ * @return a string containing the model in RDFXML.
+ * @throws RepositoryException
+ */
+ protected String dumpModelToRDFXML() throws RepositoryException {
+ StringWriter w = new StringWriter();
+ try {
+ conn.export(Rio.createWriter(RDFFormat.RDFXML, w));
+ return w.toString();
+ } catch (RDFHandlerException ex) {
+ throw new RuntimeException(ex);
+ }
+ }
+
+ /**
+ * Dumps the list of statements contained in the extracted model.
+ *
+ * @return list of extracted statements.
+ * @throws RepositoryException
+ */
+ protected List<Statement> dumpAsListOfStatements()
+ throws RepositoryException {
+ return Iterations.asList(conn.getStatements(null, null, null, false));
+ }
+
+ /**
+ * @return string containing human readable statements.
+ * @throws RepositoryException
+ */
+ protected String dumpHumanReadableTriples() throws RepositoryException {
+ StringBuilder sb = new StringBuilder();
+ RepositoryResult<Statement> result = conn.getStatements(null, null,
+ null, false);
+ while (result.hasNext()) {
+ Statement statement = result.next();
+ sb.append(String.format("%s %s %s %s\n", statement.getSubject(),
+ statement.getPredicate(), statement.getObject(),
+ statement.getContext()));
+
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Checks that a statement is contained in the extracted model. If the
+ * statement declares bnodes, they are replaced with <code>_</code>
+ * patterns.
+ *
+ * @param statement
+ * @throws RepositoryException
+ */
+ // TODO: bnode check is too weak, introduce graph omomorphism check.
+ protected void assertContains(Statement statement)
+ throws RepositoryException {
+ Assert.assertTrue("Cannot find statement " + statement + " in model.",
+ conn.hasStatement(
+ statement.getSubject() instanceof BNode ? null
+ : statement.getSubject(), statement
+ .getPredicate(),
+ statement.getObject() instanceof BNode ? null
+ : statement.getObject(), false));
+ }
+
+ /**
+ * Assert that the model contains the statement <code>(s p l)</code> where
+ * <code>l</code> is a literal.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param l
+ * literal content.
+ * @throws RepositoryException
+ */
+ protected void assertContains(Resource s, IRI p, String l)
+ throws RepositoryException {
+ assertContains(s, p, RDFUtils.literal(l));
+ }
+
+ /**
+ * Assert that the model contains the statement <code>(s p l)</code> where
+ * <code>l</code> is a language literal.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param l
+ * literal content.
+ * @param lang
+ * literal language.
+ * @throws RepositoryException
+ */
+ protected void assertContains(Resource s, IRI p, String l, String lang)
+ throws RepositoryException {
+ assertContains(s, p, RDFUtils.literal(l, lang));
+ }
+
+ /**
+ * Returns all statements matching the pattern <code>(s p o)</code>.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @return list of statements.
+ * @throws RepositoryException
+ */
+ protected RepositoryResult<Statement> getStatements(Resource s, IRI p,
+ Value o) throws RepositoryException {
+ return conn.getStatements(s, p, o, false);
+ }
+
+ /**
+ * Counts all statements matching the pattern <code>(s p o)</code>.
+ *
+ * @param s
+ * subject.
+ * @param p
+ * predicate.
+ * @param o
+ * object.
+ * @return number of matches.
+ * @throws RepositoryException
+ */
+ protected int getStatementsSize(Resource s, IRI p, Value o)
+ throws RepositoryException {
+ RepositoryResult<Statement> result = getStatements(s, p, o);
+ int count = 0;
+ try {
+ while (result.hasNext()) {
+ result.next();
+ count++;
+ }
+ } finally {
+ result.close();
+ }
+ return count;
+ }
+
+ private String getFailedExtractionMessage() throws RepositoryException {
+ return "Assertion failed! Extracted triples:\n" + dumpModelToNQuads();
+ }
}
\ No newline at end of file