You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by an...@apache.org on 2013/09/02 01:52:23 UTC
git commit: ANY23-137 : Initial replacement of Any23 RDFA with Semargl
Updated Branches:
refs/heads/ANY23-137 [created] 9f60d3252
ANY23-137 : Initial replacement of Any23 RDFA with Semargl
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/9f60d325
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/9f60d325
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/9f60d325
Branch: refs/heads/ANY23-137
Commit: 9f60d3252fbd39cd6ea7670b43deeff0045d2b18
Parents: 43743fd
Author: Peter Ansell <p_...@yahoo.com>
Authored: Mon Sep 2 09:48:24 2013 +1000
Committer: Peter Ansell <p_...@yahoo.com>
Committed: Mon Sep 2 09:48:24 2013 +1000
----------------------------------------------------------------------
core/pom.xml | 4 +
.../any23/extractor/rdf/RDFParserFactory.java | 54 ++-
.../any23/extractor/rdfa/RDFa11Extractor.java | 78 +---
.../any23/extractor/rdfa/RDFaExtractor.java | 133 +-----
.../any23/filter/IgnoreAccidentalRDFa.java | 3 +-
.../test/java/org/apache/any23/Any23Test.java | 454 ++++++++++---------
.../extractor/rdfa/XSLTStylesheetTest.java | 84 ----
pom.xml | 6 +
8 files changed, 314 insertions(+), 502 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index 41978f9..7e83f5b 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -133,6 +133,10 @@
<groupId>org.openrdf.sesame</groupId>
<artifactId>sesame-repository-api</artifactId>
</dependency>
+ <dependency>
+ <groupId>org.semarglproject</groupId>
+ <artifactId>semargl-sesame</artifactId>
+ </dependency>
<!-- END: Sesame -->
<!-- BEGIN: Apache Commons, this version is hosted in the
http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
index 606364b..423f64f 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/RDFParserFactory.java
@@ -28,6 +28,8 @@ import org.openrdf.rio.RDFHandlerException;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.Rio;
+import org.openrdf.rio.helpers.RDFaParserSettings;
+import org.openrdf.rio.helpers.RDFaVersion;
import org.openrdf.rio.turtle.TurtleParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -56,7 +58,7 @@ public class RDFParserFactory {
}
/**
- * Returns a new instance of a configured {@link org.openrdf.rio.turtle.TurtleParser}.
+ * Returns a new instance of a configured TurtleParser.
*
* @param verifyDataType data verification enable if <code>true</code>.
* @param stopAtFirstError the parser stops at first error if <code>true</code>.
@@ -79,7 +81,49 @@ public class RDFParserFactory {
}
/**
- * Returns a new instance of a configured {@link org.openrdf.rio.rdfxml.RDFXMLParser}.
+ * Returns a new instance of a configured RDFaParser, set to RDFa-1.0 compatibility mode.
+ *
+ * @param verifyDataType data verification enable if <code>true</code>.
+ * @param stopAtFirstError the parser stops at first error if <code>true</code>.
+ * @param extractionContext the extraction context where the parser is used.
+ * @param extractionResult the output extraction result.
+ * @return a new instance of a configured RDFXML parser.
+ */
+ public RDFParser getRDFa10Parser(
+ final boolean verifyDataType,
+ final boolean stopAtFirstError,
+ final ExtractionContext extractionContext,
+ final ExtractionResult extractionResult
+ ) {
+ final RDFParser parser = Rio.createParser(RDFFormat.RDFA);
+ parser.getParserConfig().set(RDFaParserSettings.RDFA_COMPATIBILITY, RDFaVersion.RDFA_1_0);
+ configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
+ return parser;
+ }
+
+ /**
+ * Returns a new instance of a configured RDFaParser, set to RDFa-1.1 compatibility mode.
+ *
+ * @param verifyDataType data verification enable if <code>true</code>.
+ * @param stopAtFirstError the parser stops at first error if <code>true</code>.
+ * @param extractionContext the extraction context where the parser is used.
+ * @param extractionResult the output extraction result.
+ * @return a new instance of a configured RDFXML parser.
+ */
+ public RDFParser getRDFa11Parser(
+ final boolean verifyDataType,
+ final boolean stopAtFirstError,
+ final ExtractionContext extractionContext,
+ final ExtractionResult extractionResult
+ ) {
+ final RDFParser parser = Rio.createParser(RDFFormat.RDFA);
+ parser.getParserConfig().set(RDFaParserSettings.RDFA_COMPATIBILITY, RDFaVersion.RDFA_1_1);
+ configureParser(parser, verifyDataType, stopAtFirstError, extractionContext, extractionResult);
+ return parser;
+ }
+
+ /**
+ * Returns a new instance of a configured RDFXMLParser.
*
* @param verifyDataType data verification enable if <code>true</code>.
* @param stopAtFirstError the parser stops at first error if <code>true</code>.
@@ -99,7 +143,7 @@ public class RDFParserFactory {
}
/**
- * Returns a new instance of a configured {@link org.openrdf.rio.ntriples.NTriplesParser}.
+ * Returns a new instance of a configured NTriplesParser.
*
* @param verifyDataType data verification enable if <code>true</code>.
* @param stopAtFirstError the parser stops at first error if <code>true</code>.
@@ -119,7 +163,7 @@ public class RDFParserFactory {
}
/**
- * Returns a new instance of a configured {@link org.apache.any23.io.nquads.NQuadsParser}.
+ * Returns a new instance of a configured NQuadsParser.
*
* @param verifyDataType data verification enable if <code>true</code>.
* @param stopAtFirstError the parser stops at first error if <code>true</code>.
@@ -139,7 +183,7 @@ public class RDFParserFactory {
}
/**
- * Returns a new instance of a configured {@link TriXParser}.
+ * Returns a new instance of a configured TriXParser.
*
* @param verifyDataType data verification enable if <code>true</code>.
* @param stopAtFirstError the parser stops at first error if <code>true</code>.
http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
index 0a37adc..76d3fa3 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
@@ -18,91 +18,37 @@
package org.apache.any23.extractor.rdfa;
import org.apache.any23.extractor.ExtractionContext;
-import org.apache.any23.extractor.ExtractionException;
-import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
-import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
-import org.w3c.dom.Document;
-
-import java.io.IOException;
-import java.net.URL;
+import org.apache.any23.extractor.rdf.BaseRDFExtractor;
+import org.apache.any23.extractor.rdf.RDFParserFactory;
+import org.openrdf.rio.RDFParser;
/**
* {@link org.apache.any23.extractor.Extractor} implementation for
- * <a href="http://www.w3.org/TR/rdfa-syntax/">RDFa 1.1</a> specification.
+ * <a href="http://www.w3.org/TR/rdfa-core/">RDFa 1.1</a> specification.
*
* @author Michele Mostarda (mostarda@fbk.eu)
*/
-public class RDFa11Extractor implements Extractor.TagSoupDOMExtractor {
-
- private final RDFa11Parser parser;
-
- private boolean verifyDataType;
+public class RDFa11Extractor extends BaseRDFExtractor {
- private boolean stopAtFirstError;
-
- /**
- * Constructor, allows to specify the validation and error handling
- * policies.
- *
- * @param verifyDataType
- * if <code>true</code> the data types will be verified, if
- * <code>false</code> will be ignored.
- * @param stopAtFirstError
- * if <code>true</code> the parser will stop at first parsing
- * error, if <code>false</code> will ignore non blocking errors.
- */
public RDFa11Extractor(boolean verifyDataType, boolean stopAtFirstError) {
- this.parser = new RDFa11Parser();
- this.verifyDataType = verifyDataType;
- this.stopAtFirstError = stopAtFirstError;
+ super(verifyDataType, stopAtFirstError);
}
- /**
- * Default constructor, with no verification of data types and not stop at
- * first error.
- */
public RDFa11Extractor() {
this(false, false);
}
- public boolean isVerifyDataType() {
- return verifyDataType;
- }
-
- public void setVerifyDataType(boolean verifyDataType) {
- this.verifyDataType = verifyDataType;
- }
-
- public boolean isStopAtFirstError() {
- return stopAtFirstError;
- }
-
- public void setStopAtFirstError(boolean stopAtFirstError) {
- this.stopAtFirstError = stopAtFirstError;
- }
-
- @Override
- public void run(ExtractionParameters extractionParameters,
- ExtractionContext extractionContext, Document in,
- ExtractionResult out) throws IOException, ExtractionException {
- try {
- parser.processDocument(new URL(extractionContext.getDocumentURI()
- .toString()), in, out);
- } catch (RDFa11ParserException rpe) {
- throw new ExtractionException("Error while performing extraction.",
- rpe);
- }
- }
-
- /**
- * @return the {@link org.apache.any23.extractor.ExtractorDescription} of
- * this extractor
- */
@Override
public ExtractorDescription getDescription() {
return RDFa11ExtractorFactory.getDescriptionInstance();
}
+ @Override
+ protected RDFParser getParser(ExtractionContext extractionContext, ExtractionResult extractionResult) {
+ return RDFParserFactory.getInstance().getRDFa11Parser(
+ isVerifyDataType(), isStopAtFirstError(), extractionContext, extractionResult
+ );
+ }
}
http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
index aec0866..fc11ba8 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
@@ -17,147 +17,38 @@
package org.apache.any23.extractor.rdfa;
-import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.any23.extractor.ExtractionContext;
-import org.apache.any23.extractor.ExtractionException;
-import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.rdf.BaseRDFExtractor;
import org.apache.any23.extractor.rdf.RDFParserFactory;
-import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
-import org.openrdf.rio.RDFHandlerException;
-import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.RDFParser;
-import org.w3c.dom.Document;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.StringReader;
-import java.io.StringWriter;
/**
- * Extractor for RDFa in HTML, based on Fabien Gadon's XSLT transform, found
- * <a href="http://ns.inria.fr/grddl/rdfa/">here</a>. It works by first
- * parsing the HTML using a tagsoup parser, then applies the XSLT to the
- * DOM tree, then parses the resulting RDF/XML.
+ * {@link org.apache.any23.extractor.Extractor} implementation for
+ * <a href="http://www.w3.org/TR/rdfa-syntax/">RDFa 1.0</a> specification.
*
- * @author Gabriele Renzi
- * @author Richard Cyganiak (richard@cyganiak.de)
+ * @author Michele Mostarda (mostarda@fbk.eu)
*/
-public class RDFaExtractor implements TagSoupDOMExtractor {
-
- public final static String NAME = "html-rdfa";
-
- public final static String xsltFilename =
- DefaultConfiguration.singleton().getPropertyOrFail("any23.rdfa.extractor.xslt");
-
- private static XSLTStylesheet xslt = null;
-
- /**
- * Returns a {@link XSLTStylesheet} able to distill RDFa from
- * HTML pages.
- *
- * @return returns a not <code>null</code> XSLT instance.
- */
- public static synchronized XSLTStylesheet getXSLT() {
- // Lazily initialized static instance, so we don't parse
- // the XSLT unless really necessary, and only once
- if (xslt == null) {
- InputStream in = RDFaExtractor.class.getResourceAsStream(xsltFilename);
- if (in == null) {
- throw new RuntimeException("Couldn't load '" + xsltFilename +
- "', maybe the file is not bundled in the jar?");
- }
- xslt = new XSLTStylesheet(in);
- }
- return xslt;
- }
-
- private boolean verifyDataType;
+public class RDFaExtractor extends BaseRDFExtractor {
- private boolean stopAtFirstError;
-
- /**
- * Constructor, allows to specify the validation and error handling policies.
- *
- * @param verifyDataType if <code>true</code> the data types will be verified,
- * if <code>false</code> will be ignored.
- * @param stopAtFirstError if <code>true</code> the parser will stop at first parsing error,
- * if <code>false</code> will ignore non blocking errors.
- */
public RDFaExtractor(boolean verifyDataType, boolean stopAtFirstError) {
- this.verifyDataType = verifyDataType;
- this.stopAtFirstError = stopAtFirstError;
+ super(verifyDataType, stopAtFirstError);
}
- /**
- * Default constructor, with no verification of data types and not stop at first error.
- */
public RDFaExtractor() {
this(false, false);
}
- public boolean isVerifyDataType() {
- return verifyDataType;
- }
-
- public void setVerifyDataType(boolean verifyDataType) {
- this.verifyDataType = verifyDataType;
- }
-
- public boolean isStopAtFirstError() {
- return stopAtFirstError;
- }
-
- public void setStopAtFirstError(boolean stopAtFirstError) {
- this.stopAtFirstError = stopAtFirstError;
- }
-
- @Override
- public void run(
- ExtractionParameters extractionParameters,
- ExtractionContext extractionContext,
- Document in,
- ExtractionResult out
- ) throws IOException, ExtractionException {
-
- StringWriter buffer = new StringWriter();
- try {
- getXSLT().applyTo(in, buffer);
- } catch (XSLTStylesheetException xslte) {
- throw new ExtractionException("An error occurred during the XSLT application.", xslte);
- }
-
- try {
- RDFParser parser
- = RDFParserFactory.getInstance().getRDFXMLParser(
- verifyDataType, stopAtFirstError, extractionContext, out
- );
- parser.parse(
- new StringReader(buffer.getBuffer().toString()),
- extractionContext.getDocumentURI().stringValue()
- );
- } catch (RDFHandlerException ex) {
- throw new IllegalStateException(
- "Should not happen, RDFHandlerAdapter does not throw RDFHandlerException", ex
- );
- } catch (RDFParseException ex) {
- throw new ExtractionException(
- "Invalid RDF/XML produced by RDFa transform.", ex, out
- );
- }
- }
-
- private String getDocType(Document in) {
- return in.getDoctype().getPublicId();
- }
-
- /**
- * @return the {@link org.apache.any23.extractor.ExtractorDescription} of this extractor
- */
@Override
public ExtractorDescription getDescription() {
return RDFaExtractorFactory.getDescriptionInstance();
}
+ @Override
+ protected RDFParser getParser(ExtractionContext extractionContext, ExtractionResult extractionResult) {
+ return RDFParserFactory.getInstance().getRDFa10Parser(
+ isVerifyDataType(), isStopAtFirstError(), extractionContext, extractionResult
+ );
+ }
}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/src/main/java/org/apache/any23/filter/IgnoreAccidentalRDFa.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/filter/IgnoreAccidentalRDFa.java b/core/src/main/java/org/apache/any23/filter/IgnoreAccidentalRDFa.java
index 6fbd073..9c14744 100644
--- a/core/src/main/java/org/apache/any23/filter/IgnoreAccidentalRDFa.java
+++ b/core/src/main/java/org/apache/any23/filter/IgnoreAccidentalRDFa.java
@@ -19,6 +19,7 @@ package org.apache.any23.filter;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.rdfa.RDFaExtractor;
+import org.apache.any23.extractor.rdfa.RDFaExtractorFactory;
import org.apache.any23.vocab.XHTML;
import org.apache.any23.writer.TripleHandler;
import org.apache.any23.writer.TripleHandlerException;
@@ -95,7 +96,7 @@ public class IgnoreAccidentalRDFa implements TripleHandler {
}
private boolean isRDFaContext(ExtractionContext context) {
- return context.getExtractorName().equals(RDFaExtractor.NAME);
+ return context.getExtractorName().equals(RDFaExtractorFactory.NAME);
}
public void endDocument(URI documentURI) throws TripleHandlerException {
http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/src/test/java/org/apache/any23/Any23Test.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/Any23Test.java b/core/src/test/java/org/apache/any23/Any23Test.java
index 13ba903..ae6c13f 100644
--- a/core/src/test/java/org/apache/any23/Any23Test.java
+++ b/core/src/test/java/org/apache/any23/Any23Test.java
@@ -17,7 +17,7 @@
package org.apache.any23;
-import junit.framework.Assert;
+import org.junit.Assert;
import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.any23.configuration.ModifiableConfiguration;
import org.apache.any23.extractor.ExtractionException;
@@ -47,6 +47,7 @@ import org.apache.commons.io.IOUtils;
import org.junit.Ignore;
import org.junit.Test;
import org.openrdf.model.Statement;
+import org.openrdf.repository.Repository;
import org.openrdf.repository.RepositoryConnection;
import org.openrdf.repository.RepositoryException;
import org.openrdf.repository.RepositoryResult;
@@ -67,6 +68,7 @@ import static org.apache.any23.extractor.ExtractionParameters.ValidationMode;
/**
* Test case for {@link Any23} facade.
+ *
* @author Davide Palmisano ( dpalmisano@gmail.com )
* @author Michele Mostarda ( michele.mostarda@gmail.com )
*/
@@ -77,7 +79,8 @@ public class Any23Test extends Any23OnlineTestBase {
private static final String PAGE_URL = "http://bob.com";
- private static final Logger logger = LoggerFactory.getLogger(Any23Test.class);
+ private static final Logger logger = LoggerFactory
+ .getLogger(Any23Test.class);
@Test
public void testTTLDetection() throws Exception {
@@ -93,8 +96,7 @@ public class Any23Test extends Any23OnlineTestBase {
public void testN3Detection2() throws Exception {
assertDetection(
"<http://example.org/path> <http://foo.com> <http://example.org/Document/foo#> .",
- "rdf-nt"
- );
+ "rdf-nt");
}
@Test
@@ -103,28 +105,25 @@ public class Any23Test extends Any23OnlineTestBase {
}
/**
- * This tests the behavior of <i>Any23</i> to execute the extraction explicitly specifying the charset
- * encoding of the input.
- *
+ * This tests the behavior of <i>Any23</i> to execute the extraction
+ * explicitly specifying the charset encoding of the input.
+ *
* @throws org.apache.any23.extractor.ExtractionException
* @throws IOException
* @throws SailException
* @throws RepositoryException
*/
@Test
- public void testExplicitEncoding()
- throws Exception {
- assertEncodingDetection(
- "UTF-8",
- "/html/encoding-test.html",
- "Knud M\u00F6ller"
- );
+ public void testExplicitEncoding() throws Exception {
+ assertEncodingDetection("UTF-8", "/html/encoding-test.html",
+ "Knud M\u00F6ller");
}
/**
- * This tests the behavior of <i>Any23</i> to perform the extraction without passing it any charset encoding.
- * The encoding is therefore guessed using {@link org.apache.any23.encoding.TikaEncodingDetector} class.
- *
+ * This tests the behavior of <i>Any23</i> to perform the extraction without
+ * passing it any charset encoding. The encoding is therefore guessed using
+ * {@link org.apache.any23.encoding.TikaEncodingDetector} class.
+ *
* @throws org.apache.any23.extractor.ExtractionException
* @throws IOException
* @throws SailException
@@ -132,26 +131,19 @@ public class Any23Test extends Any23OnlineTestBase {
* @throws org.apache.any23.writer.TripleHandlerException
*/
@Test
- public void testImplicitEncoding()
- throws Exception {
- assertEncodingDetection(
- null, // The encoding will be auto detected.
- "/html/encoding-test.html",
- "Knud M\u00F6ller"
- );
+ public void testImplicitEncoding() throws Exception {
+ assertEncodingDetection(null, // The encoding will be auto detected.
+ "/html/encoding-test.html", "Knud M\u00F6ller");
}
@Test
- public void testRDFXMLDetectionAndExtraction()
- throws Exception {
- String rdfXML =
- "<?xml version='1.0'?> " +
- "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' " +
- "xmlns:dc='http://purl.org/dc/elements/1.1/'>" +
- "<rdf:Description rdf:about='http://www.example.com'>" +
- "<dc:title>x</dc:title>" +
- "</rdf:Description>" +
- "</rdf:RDF>";
+ public void testRDFXMLDetectionAndExtraction() throws Exception {
+ String rdfXML = "<?xml version='1.0'?> "
+ + "<rdf:RDF xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#' "
+ + "xmlns:dc='http://purl.org/dc/elements/1.1/'>"
+ + "<rdf:Description rdf:about='http://www.example.com'>"
+ + "<dc:title>x</dc:title>" + "</rdf:Description>"
+ + "</rdf:RDF>";
assertDetectionAndExtraction(rdfXML);
}
@@ -163,47 +155,47 @@ public class Any23Test extends Any23OnlineTestBase {
@Test
public void testNturtleDetectionAndExtraction() throws Exception {
- String nTurtle =
- "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n" +
- "@prefix dc: <http://purl.org/dc/elements/1.1/> .\n" +
- "@prefix ex: <http://example.org/stuff/1.0/> .\n" +
- "\n" +
- "<http://www.w3.org/TR/rdf-syntax-grammar>\n" +
- " dc:title \"RDF/XML Syntax Specification (Revised)\" ;\n" +
- " ex:editor [\n" +
- " ex:fullname \"Dave Beckett\";\n" +
- " ex:homePage <http://purl.org/net/dajobe/>\n" +
- " ] .";
+ String nTurtle = "@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n"
+ + "@prefix dc: <http://purl.org/dc/elements/1.1/> .\n"
+ + "@prefix ex: <http://example.org/stuff/1.0/> .\n"
+ + "\n"
+ + "<http://www.w3.org/TR/rdf-syntax-grammar>\n"
+ + " dc:title \"RDF/XML Syntax Specification (Revised)\" ;\n"
+ + " ex:editor [\n"
+ + " ex:fullname \"Dave Beckett\";\n"
+ + " ex:homePage <http://purl.org/net/dajobe/>\n" + " ] .";
assertDetectionAndExtraction(nTurtle);
}
/**
* Tests out the first code snipped used in <i>Developer Manual</i>.
- *
+ *
* @throws IOException
* @throws org.apache.any23.extractor.ExtractionException
*/
@Test
public void testDemoCodeSnippet1() throws Exception {
- /*1*/ Any23 runner = new Any23();
- /*2*/ final String content = "@prefix foo: <http://example.org/ns#> . " +
- "@prefix : <http://other.example.org/ns#> ." +
- "foo:bar foo: : . " +
- ":bar : foo:bar . ";
- // The second argument of StringDocumentSource() must be a valid URI.
- /*3*/ DocumentSource source = new StringDocumentSource(content, "http://host.com/service");
- /*4*/ ByteArrayOutputStream out = new ByteArrayOutputStream();
- /*5*/ TripleHandler handler = new NTriplesWriter(out);
- try {
- /*6*/ runner.extract(source, handler);
- } finally {
- /*7*/ handler.close();
- }
- /*8*/ String nt = out.toString("UTF-8");
+ /* 1 */Any23 runner = new Any23();
+ /* 2 */final String content = "@prefix foo: <http://example.org/ns#> . "
+ + "@prefix : <http://other.example.org/ns#> ."
+ + "foo:bar foo: : . "
+ + ":bar : foo:bar . ";
+ // The second argument of StringDocumentSource() must be a valid URI.
+ /* 3 */DocumentSource source = new StringDocumentSource(content,
+ "http://host.com/service");
+ /* 4 */ByteArrayOutputStream out = new ByteArrayOutputStream();
+ /* 5 */TripleHandler handler = new NTriplesWriter(out);
+ try {
+ /* 6 */runner.extract(source, handler);
+ } finally {
+ /* 7 */handler.close();
+ }
+ /* 8 */String nt = out.toString("UTF-8");
/*
- <http://example.org/ns#bar> <http://example.org/ns#> <http://other.example.org/ns#> .
- <http://other.example.org/ns#bar> <http://other.example.org/ns#> <http://example.org/ns#bar> .
+ * <http://example.org/ns#bar> <http://example.org/ns#>
+ * <http://other.example.org/ns#> . <http://other.example.org/ns#bar>
+ * <http://other.example.org/ns#> <http://example.org/ns#bar> .
*/
logger.debug("nt: " + nt);
Assert.assertTrue(nt.length() > 0);
@@ -211,52 +203,57 @@ public class Any23Test extends Any23OnlineTestBase {
/**
* Tests out the second code snipped used in <i>Developer Manual</i>.
- *
+ *
* @throws IOException
* @throws org.apache.any23.extractor.ExtractionException
*/
@Ignore("ANY23-140 - Revise Any23 tests to remove fetching of web content")
@Test
- public void testDemoCodeSnippet2() throws Exception{
+ public void testDemoCodeSnippet2() throws Exception {
assumeOnlineAllowed();
- /*1*/ Any23 runner = new Any23();
- /*2*/ runner.setHTTPUserAgent("test-user-agent");
- /*3*/ HTTPClient httpClient = runner.getHTTPClient();
- /*4*/ DocumentSource source = new HTTPDocumentSource(
- httpClient,
- "http://dbpedia.org/resource/Trento"
- );
- /*5*/ ByteArrayOutputStream out = new ByteArrayOutputStream();
- /*6*/ TripleHandler handler = new NTriplesWriter(out);
- try {
- /*7*/ runner.extract(source, handler);
- } finally {
- /*8*/ handler.close();
- }
- /*9*/ String n3 = out.toString("UTF-8");
+ /* 1 */Any23 runner = new Any23();
+ /* 2 */runner.setHTTPUserAgent("test-user-agent");
+ /* 3 */HTTPClient httpClient = runner.getHTTPClient();
+ /* 4 */DocumentSource source = new HTTPDocumentSource(httpClient,
+ "http://dbpedia.org/resource/Trento");
+ /* 5 */ByteArrayOutputStream out = new ByteArrayOutputStream();
+ /* 6 */TripleHandler handler = new NTriplesWriter(out);
+ try {
+ /* 7 */runner.extract(source, handler);
+ } finally {
+ /* 8 */handler.close();
+ }
+ /* 9 */String n3 = out.toString("UTF-8");
/*
- <http://dbpedia.org/resource/Trent> <http://dbpedia.org/ontology/wikiPageDisambiguates> <http://dbpedia.org/resource/Trento> .
- <http://dbpedia.org/resource/Andrea_Pozzo> <http://dbpedia.org/ontology/birthPlace> <http://dbpedia.org/resource/Trento> .
- <http://dbpedia.org/resource/Union_for_Trentino> <http://dbpedia.org/ontology/headquarter> <http://dbpedia.org/resource/Trento> .
- [...]
+ * <http://dbpedia.org/resource/Trent>
+ * <http://dbpedia.org/ontology/wikiPageDisambiguates>
+ * <http://dbpedia.org/resource/Trento> .
+ * <http://dbpedia.org/resource/Andrea_Pozzo>
+ * <http://dbpedia.org/ontology/birthPlace>
+ * <http://dbpedia.org/resource/Trento> .
+ * <http://dbpedia.org/resource/Union_for_Trentino>
+ * <http://dbpedia.org/ontology/headquarter>
+ * <http://dbpedia.org/resource/Trento> . [...]
*/
logger.debug("n3: " + n3);
Assert.assertTrue(n3.length() > 0);
}
/**
- * This test checks the extraction behavior when the library is used programatically.
- * This test is related to the issue #45, to verify the different behaviors between Maven and Ant.
- * The behavior was related to a 2nd-level dependency introduced by Maven.
- *
+ * This test checks the extraction behavior when the library is used
+ * programatically. This test is related to the issue #45, to verify the
+ * different behaviors between Maven and Ant. The behavior was related to a
+ * 2nd-level dependency introduced by Maven.
+ *
* @throws org.apache.any23.extractor.ExtractionException
* @throws IOException
* @throws URISyntaxException
*/
@Test
- public void testProgrammaticExtraction() throws ExtractionException, IOException, URISyntaxException {
+ public void testProgrammaticExtraction() throws ExtractionException,
+ IOException, URISyntaxException {
Any23 any23 = new Any23();
any23.setHTTPUserAgent("Any23-Servlet");
any23.setHTTPClient(new DefaultHTTPClient() {
@@ -276,10 +273,11 @@ public class Any23Test extends Any23OnlineTestBase {
ReportingTripleHandler reporting = new ReportingTripleHandler(rdfWriter);
DocumentSource source = getDocumentSourceFromResource(
- "/html/rdfa/ansa_2010-02-26_12645863.html",
- "http://host.com/service");
+ "/html/rdfa/ansa_2010-02-26_12645863.html",
+ "http://host.com/service");
- Assert.assertTrue( any23.extract(source, reporting).hasMatchingExtractors() );
+ Assert.assertTrue(any23.extract(source, reporting)
+ .hasMatchingExtractors());
try {
handler.close();
} catch (TripleHandlerException e) {
@@ -288,30 +286,30 @@ public class Any23Test extends Any23OnlineTestBase {
final String bufferContent = byteArrayOutputStream.toString();
logger.debug(bufferContent);
- Assert.assertSame("Unexpected number of triples.", 60, StringUtils.countNL(bufferContent));
-
+ Assert.assertSame("Unexpected number of triples.", 60,
+ StringUtils.countNL(bufferContent));
+
}
/**
- * This test checks if a URL that is supposed to be GZIPPED is correctly opened and parsed with
- * the {@link Any23} facade.
- *
+ * This test checks if a URL that is supposed to be GZIPPED is correctly
+ * opened and parsed with the {@link Any23} facade.
+ *
* @throws IOException
* @throws URISyntaxException
* @throws ExtractionException
*/
@Ignore("ANY23-140 - Revise Any23 tests to remove fetching of web content")
@Test
- public void testGZippedContent() throws IOException, URISyntaxException, ExtractionException {
+ public void testGZippedContent() throws IOException, URISyntaxException,
+ ExtractionException {
assumeOnlineAllowed();
Any23 runner = new Any23();
runner.setHTTPUserAgent("test-user-agent");
HTTPClient httpClient = runner.getHTTPClient();
- DocumentSource source = new HTTPDocumentSource(
- httpClient,
- "http://products.semweb.bestbuy.com/y/products/7590289/"
- );
+ DocumentSource source = new HTTPDocumentSource(httpClient,
+ "http://products.semweb.bestbuy.com/y/products/7590289/");
ByteArrayOutputStream out = new ByteArrayOutputStream();
TripleHandler handler = new NTriplesWriter(out);
runner.extract(source, handler);
@@ -323,13 +321,13 @@ public class Any23Test extends Any23OnlineTestBase {
}
@Test
- public void testExtractionParameters() throws IOException, ExtractionException, TripleHandlerException {
- final int EXPECTED_TRIPLES = 6;
+ public void testExtractionParameters() throws IOException,
+ ExtractionException, TripleHandlerException {
+ final int EXPECTED_TRIPLES = 6;
Any23 runner = new Any23();
DocumentSource source = getDocumentSourceFromResource(
"/org/apache/any23/validator/missing-og-namespace.html",
- "http://www.test.com"
- );
+ "http://www.test.com");
ByteArrayOutputStream baos = new ByteArrayOutputStream();
@@ -340,18 +338,14 @@ public class Any23Test extends Any23OnlineTestBase {
compositeTH1.addChild(ctw1);
try {
runner.extract(
- new ExtractionParameters(
- DefaultConfiguration.singleton(),
- ValidationMode.None
- ),
- source,
- compositeTH1
- );
+ new ExtractionParameters(DefaultConfiguration.singleton(),
+ ValidationMode.None), source, compositeTH1);
} finally {
compositeTH1.close();
}
logger.info(baos.toString());
- Assert.assertEquals("Unexpected number of triples.", EXPECTED_TRIPLES, cth1.getCount() );
+ Assert.assertEquals("Unexpected number of triples.", EXPECTED_TRIPLES,
+ cth1.getCount());
baos.reset();
CountingTripleHandler cth2 = new CountingTripleHandler();
@@ -360,26 +354,21 @@ public class Any23Test extends Any23OnlineTestBase {
compositeTH2.addChild(cth2);
compositeTH2.addChild(ctw2);
runner.extract(
- new ExtractionParameters(
- DefaultConfiguration.singleton(),
- ValidationMode.ValidateAndFix
- ),
- source,
- compositeTH2
- );
- logger.debug( baos.toString() );
- Assert.assertEquals("Unexpected number of triples.", EXPECTED_TRIPLES + 5, cth2.getCount() );
+ new ExtractionParameters(DefaultConfiguration.singleton(),
+ ValidationMode.ValidateAndFix), source, compositeTH2);
+ logger.debug(baos.toString());
+ Assert.assertEquals("Unexpected number of triples.",
+ EXPECTED_TRIPLES + 5, cth2.getCount());
}
@Test
public void testExtractionParametersWithNestingDisabled()
- throws IOException, ExtractionException, TripleHandlerException {
+ throws IOException, ExtractionException, TripleHandlerException {
final int EXPECTED_TRIPLES = 19;
Any23 runner = new Any23();
DocumentSource source = getDocumentSourceFromResource(
"/microformats/nested-microformats-a1.html",
- "http://www.test.com"
- );
+ "http://www.test.com");
ByteArrayOutputStream baos = new ByteArrayOutputStream();
@@ -389,16 +378,12 @@ public class Any23Test extends Any23OnlineTestBase {
compositeTH1.addChild(cth1);
compositeTH1.addChild(ctw1);
runner.extract(
- new ExtractionParameters(
- DefaultConfiguration.singleton(),
- ValidationMode.None, true
- ),
- source,
- compositeTH1
- );
+ new ExtractionParameters(DefaultConfiguration.singleton(),
+ ValidationMode.None, true), source, compositeTH1);
compositeTH1.close();
logger.debug("Out1: " + baos.toString());
- Assert.assertEquals("Unexpected number of triples.", EXPECTED_TRIPLES + 3, cth1.getCount() );
+ Assert.assertEquals("Unexpected number of triples.",
+ EXPECTED_TRIPLES + 3, cth1.getCount());
baos.reset();
CountingTripleHandler cth2 = new CountingTripleHandler();
@@ -407,24 +392,20 @@ public class Any23Test extends Any23OnlineTestBase {
compositeTH2.addChild(cth2);
compositeTH2.addChild(ctw2);
runner.extract(
- new ExtractionParameters(
- DefaultConfiguration.singleton(),
- ValidationMode.ValidateAndFix, false),
- source,
- compositeTH2
- );
+ new ExtractionParameters(DefaultConfiguration.singleton(),
+ ValidationMode.ValidateAndFix, false), source,
+ compositeTH2);
compositeTH2.close();
logger.debug("Out2: " + baos.toString());
- Assert.assertEquals("Unexpected number of triples.", EXPECTED_TRIPLES, cth2.getCount() );
+ Assert.assertEquals("Unexpected number of triples.", EXPECTED_TRIPLES,
+ cth2.getCount());
}
@Test
public void testExceptionPropagation() throws IOException {
Any23 any23 = new Any23();
DocumentSource source = getDocumentSourceFromResource(
- "/application/turtle/geolinkeddata.ttl",
- "http://www.test.com"
- );
+ "/application/turtle/geolinkeddata.ttl", "http://www.test.com");
CountingTripleHandler cth1 = new CountingTripleHandler();
try {
any23.extract(source, cth1);
@@ -436,16 +417,19 @@ public class Any23Test extends Any23OnlineTestBase {
/**
* Test correct management of general <i>XML</i> content.
- *
+ *
* @throws IOException
* @throws ExtractionException
*/
@Test
- public void testXMLMimeTypeManagement() throws IOException, ExtractionException {
+ public void testXMLMimeTypeManagement() throws IOException,
+ ExtractionException {
final String documentURI = "http://www.test.com/resource.xml";
final String contentType = "application/xml";
- final String in = StreamUtils.asString( this.getClass().getResourceAsStream("any23-xml-mimetype.xml") );
- final DocumentSource doc = new StringDocumentSource(in, documentURI, contentType);
+ final String in = StreamUtils.asString(this.getClass()
+ .getResourceAsStream("any23-xml-mimetype.xml"));
+ final DocumentSource doc = new StringDocumentSource(in, documentURI,
+ contentType);
final Any23 any23 = new Any23();
final CountingTripleHandler cth = new CountingTripleHandler(false);
final ReportingTripleHandler rth = new ReportingTripleHandler(cth);
@@ -455,20 +439,23 @@ public class Any23Test extends Any23OnlineTestBase {
}
/**
- * Test correct management of general <i>XML</i> content from <i>URL</i> source.
- *
+ * Test correct management of general <i>XML</i> content from <i>URL</i>
+ * source.
+ *
* @throws IOException
* @throws ExtractionException
*/
@Ignore("ANY23-140 - Revise Any23 tests to remove fetching of web content")
@Test
- public void testXMLMimeTypeManagementViaURL() throws IOException, ExtractionException {
+ public void testXMLMimeTypeManagementViaURL() throws IOException,
+ ExtractionException {
assumeOnlineAllowed();
final Any23 any23 = new Any23();
any23.setHTTPUserAgent("test-user-agent");
final CountingTripleHandler cth = new CountingTripleHandler(false);
final ReportingTripleHandler rth = new ReportingTripleHandler(cth);
- final ExtractionReport report = any23.extract("http://www.nativeremedies.com/XML/combos.xml", rth);
+ final ExtractionReport report = any23.extract(
+ "http://www.nativeremedies.com/XML/combos.xml", rth);
Assert.assertFalse(report.hasMatchingExtractors());
Assert.assertEquals(0, cth.getCount());
}
@@ -481,23 +468,26 @@ public class Any23Test extends Any23OnlineTestBase {
any23.setHTTPUserAgent("test-user-agent");
final CountingTripleHandler cth = new CountingTripleHandler(false);
final ReportingTripleHandler rth = new ReportingTripleHandler(cth);
- final ExtractionReport report = any23.extract("http://www.usarab.org/news/?tag=england", rth);
- Assert.assertTrue( report.hasMatchingExtractors() );
+ final ExtractionReport report = any23.extract(
+ "http://www.usarab.org/news/?tag=england", rth);
+ Assert.assertTrue(report.hasMatchingExtractors());
}
@Test
public void testMicrodataSupport() throws Exception {
- final String htmlWithMicrodata = IOUtils.toString(
- this.getClass().getResourceAsStream("/microdata/microdata-basic.html")
- );
+ final String htmlWithMicrodata = IOUtils.toString(this.getClass()
+ .getResourceAsStream("/microdata/microdata-basic.html"));
assertExtractorActivation(htmlWithMicrodata, MicrodataExtractor.class);
}
@Test
- public void testAbstractMethodErrorIssue186_1() throws IOException, ExtractionException{
+ public void testAbstractMethodErrorIssue186_1() throws IOException,
+ ExtractionException {
final Any23 runner = new Any23();
- final String content = FileUtils.readResourceContent("/html/rdfa/rdfa-issue186-1.xhtml");
- final DocumentSource source = new StringDocumentSource(content, "http://base.com");
+ final String content = FileUtils
+ .readResourceContent("/html/rdfa/rdfa-issue186-1.xhtml");
+ final DocumentSource source = new StringDocumentSource(content,
+ "http://base.com");
final ByteArrayOutputStream out = new ByteArrayOutputStream();
final TripleHandler handler = new NTriplesWriter(out);
runner.extract(source, handler);
@@ -506,10 +496,13 @@ public class Any23Test extends Any23OnlineTestBase {
}
@Test
- public void testAbstractMethodErrorIssue186_2() throws IOException, ExtractionException{
+ public void testAbstractMethodErrorIssue186_2() throws IOException,
+ ExtractionException {
final Any23 runner = new Any23();
- final String content = FileUtils.readResourceContent("/html/rdfa/rdfa-issue186-2.xhtml");
- final DocumentSource source = new StringDocumentSource(content, "http://richard.cyganiak.de/");
+ final String content = FileUtils
+ .readResourceContent("/html/rdfa/rdfa-issue186-2.xhtml");
+ final DocumentSource source = new StringDocumentSource(content,
+ "http://richard.cyganiak.de/");
final ByteArrayOutputStream out = new ByteArrayOutputStream();
final TripleHandler handler = new NTriplesWriter(out);
runner.extract(source, handler);
@@ -519,12 +512,15 @@ public class Any23Test extends Any23OnlineTestBase {
@Test
public void testModifiableConfiguration_issue183() throws Exception {
- final ModifiableConfiguration modifiableConf = DefaultConfiguration.copy();
+ final ModifiableConfiguration modifiableConf = DefaultConfiguration
+ .copy();
modifiableConf.setProperty("any23.extraction.metadata.timesize", "off");
final Any23 any23 = new Any23(modifiableConf);
- final String content = FileUtils.readResourceContent("/rdf/rdf-issue183.ttl");
- final DocumentSource source = new StringDocumentSource(content, "http://base.com");
+ final String content = FileUtils
+ .readResourceContent("/rdf/rdf-issue183.ttl");
+ final DocumentSource source = new StringDocumentSource(content,
+ "http://base.com");
final ByteArrayOutputStream out = new ByteArrayOutputStream();
final TripleHandler handler = new NTriplesWriter(out);
any23.extract(source, handler);
@@ -534,19 +530,18 @@ public class Any23Test extends Any23OnlineTestBase {
logger.debug(n3);
Assert.assertFalse(
"Should not contain triple with http://vocab.sindice.net/date",
- n3.contains("http://vocab.sindice.net/date")
- );
+ n3.contains("http://vocab.sindice.net/date"));
Assert.assertFalse(
"Should not contain triple with http://vocab.sindice.net/size",
- n3.contains("http://vocab.sindice.net/size")
- );
+ n3.contains("http://vocab.sindice.net/size"));
}
/**
- * Performs detection and extraction on the given input string
- * and return the {@link ExtractionReport}.
- *
- * @param in input string.
+ * Performs detection and extraction on the given input string and return
+ * the {@link ExtractionReport}.
+ *
+ * @param in
+ * input string.
* @return
* @throws IOException
* @throws ExtractionException
@@ -555,19 +550,17 @@ public class Any23Test extends Any23OnlineTestBase {
Any23 any23 = new Any23();
ByteArrayOutputStream out = new ByteArrayOutputStream();
ReportingTripleHandler outputHandler = new ReportingTripleHandler(
- new IgnoreAccidentalRDFa(
- new IgnoreTitlesOfEmptyDocuments(
- new NTriplesWriter(out)
- )
- )
- );
+ new IgnoreAccidentalRDFa(new IgnoreTitlesOfEmptyDocuments(
+ new NTriplesWriter(out))));
return any23.extract(in, "http://host.com/path", outputHandler);
}
/**
- * Asserts that a list an {@link Extractor} has been activated for the given input data.
- *
- * @param in input data as string.
+ * Asserts that a list an {@link Extractor} has been activated for the given
+ * input data.
+ *
+ * @param in
+ * input data as string.
* @throws IOException
* @throws ExtractionException
*/
@@ -575,36 +568,38 @@ public class Any23Test extends Any23OnlineTestBase {
final ExtractionReport extractionReport = detectAndExtract(in);
Assert.assertTrue(
"Detection and extraction failed, no matching extractors.",
- extractionReport.hasMatchingExtractors()
- );
+ extractionReport.hasMatchingExtractors());
}
/**
- * Assert the correct activation of the given list of {@link Extractor}s for the given input string.
- *
- * @param in input data as string.
+ * Assert the correct activation of the given list of {@link Extractor}s for
+ * the given input string.
+ *
+ * @param in
+ * input data as string.
* @param expectedExtractors
* @throws IOException
* @throws ExtractionException
*/
- private void assertExtractorActivation(String in, Class<? extends Extractor>... expectedExtractors)
- throws Exception {
+ private void assertExtractorActivation(String in,
+ Class<? extends Extractor>... expectedExtractors) throws Exception {
final ExtractionReport extractionReport = detectAndExtract(in);
for (Class<? extends Extractor> expectedExtractorClass : expectedExtractors) {
Assert.assertTrue(
String.format(
"Detection and extraction failed, expected extractor [%s] not found.",
- expectedExtractorClass
- ),
- containsClass( extractionReport.getMatchingExtractors(), expectedExtractorClass )
- );
+ expectedExtractorClass),
+ containsClass(extractionReport.getMatchingExtractors(),
+ expectedExtractorClass));
}
}
/**
* Asserts the correct encoding detection for a specified data.
- *
- * @param encoding the expected specified encoding, if <code>null</code> will be auto detected.
+ *
+ * @param encoding
+ * the expected specified encoding, if <code>null</code> will be
+ * auto detected.
* @param input
* @param expectedContent
* @throws Exception
@@ -613,49 +608,60 @@ public class Any23Test extends Any23OnlineTestBase {
throws Exception {
DocumentSource fileDocumentSource = getDocumentSourceFromResource(input);
Any23 any23;
- RepositoryConnection conn;
- RepositoryWriter repositoryWriter;
+ RepositoryConnection conn = null;
+ RepositoryWriter repositoryWriter = null;
any23 = new Any23();
- Sail store = new MemoryStore();
+ Repository store = new SailRepository(new MemoryStore());
store.initialize();
- conn = new SailRepository(store).getConnection();
- repositoryWriter = new RepositoryWriter(conn);
- Assert.assertTrue( any23.extract(fileDocumentSource, repositoryWriter, encoding).hasMatchingExtractors() );
-
- RepositoryResult<Statement> statements = conn.getStatements(null, vDCTERMS.title, null, false);
- try {
- while (statements.hasNext()) {
- Statement statement = statements.next();
- printStatement(statement);
- org.junit.Assert.assertTrue(statement.getObject().stringValue().contains(expectedContent));
+ try
+ {
+ conn = store.getConnection();
+ repositoryWriter = new RepositoryWriter(conn);
+ Assert.assertTrue( any23.extract(fileDocumentSource, repositoryWriter, encoding).hasMatchingExtractors() );
+
+ RepositoryResult<Statement> statements = conn.getStatements(null, vDCTERMS.title, null, false);
+ try {
+ while (statements.hasNext()) {
+ Statement statement = statements.next();
+ printStatement(statement);
+ Assert.assertTrue(statement.getObject().stringValue().contains(expectedContent));
+ }
+ } finally {
+ statements.close();
+ }
+ }
+ finally {
+ if(conn != null) {
+ conn.close();
+ }
+ if(repositoryWriter != null) {
+ repositoryWriter.close();
}
- } finally {
- statements.close();
}
-
fileDocumentSource = null;
any23 = null;
- conn.close();
- repositoryWriter.close();
}
/**
* Will try to detect the <i>content</i> trying sequentially with all
* specified parser.
- *
+ *
* @param content
* @param parsers
* @throws Exception
*/
- private void assertDetection(String content, String... parsers) throws Exception {
+ private void assertDetection(String content, String... parsers)
+ throws Exception {
ByteArrayOutputStream out = new ByteArrayOutputStream();
Any23 runner = new Any23(parsers.length == 0 ? null : parsers);
if (parsers.length != 0) {
- runner.setMIMETypeDetector(null); // Use all the provided extractors.
+ runner.setMIMETypeDetector(null); // Use all the provided
+ // extractors.
}
final NTriplesWriter tripleHandler = new NTriplesWriter(out);
- runner.extract(new StringDocumentSource(content, PAGE_URL), tripleHandler);
+ runner.extract(new StringDocumentSource(content, PAGE_URL),
+ tripleHandler);
tripleHandler.close();
String result = out.toString("us-ascii");
Assert.assertNotNull(result);
@@ -663,19 +669,17 @@ public class Any23Test extends Any23OnlineTestBase {
}
private void printStatement(Statement statement) {
- logger.debug(String.format("%s\t%s\t%s",
- statement.getSubject(),
- statement.getPredicate(),
- statement.getObject()));
+ logger.debug(String.format("%s\t%s\t%s", statement.getSubject(),
+ statement.getPredicate(), statement.getObject()));
}
private boolean containsClass(List<?> list, Class clazz) {
- for(Object o : list) {
- if(o.getClass().equals(clazz)) {
+ for (Object o : list) {
+ if (o.getClass().equals(clazz)) {
return true;
}
}
return false;
}
-
+
}
http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/core/src/test/java/org/apache/any23/extractor/rdfa/XSLTStylesheetTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/rdfa/XSLTStylesheetTest.java b/core/src/test/java/org/apache/any23/extractor/rdfa/XSLTStylesheetTest.java
deleted file mode 100644
index c8052c7..0000000
--- a/core/src/test/java/org/apache/any23/extractor/rdfa/XSLTStylesheetTest.java
+++ /dev/null
@@ -1,84 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.any23.extractor.rdfa;
-
-import org.apache.any23.extractor.html.TagSoupParser;
-import org.junit.Assert;
-import org.junit.Test;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.io.StringWriter;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * Test case for {@link XSLTStylesheet} class.
- * Through this test we verify regressions on the <i>RDFa XSLT transformer</i> for <i>HTML/XHTML</i>
- *
- * @author Michele Mostarda (mostarda@fbk.eu)
- */
-public class XSLTStylesheetTest {
-
- private static final Logger logger = LoggerFactory.getLogger(XSLTStylesheetTest.class);
-
- /**
- * This test verifies the correct handling of base management for an <i>HTML</i> input.
- *
- * @throws java.io.IOException
- * @throws XSLTStylesheetException
- */
- @Test
- public void testHTMLRDFaBaseHanding() throws IOException, XSLTStylesheetException {
- final String[] vars = checkPageBaseHandling("/html/rdfa/base-handling.html");
- Assert.assertEquals("Unexpected value for this_location", "http://di2.deri.ie/people/", vars[0]);
- Assert.assertEquals("Unexpected value for this_root" , "http://di2.deri.ie/" , vars[1]);
- Assert.assertEquals("Unexpected value for html_base" , "http://di2.deri.ie/people/", vars[2]);
- }
-
- /**
- * This test verifies the correct handling of base management for an <i>XHTML</i> input.
- *
- * @throws java.io.IOException
- * @throws XSLTStylesheetException
- */
- @Test
- public void testXHTMLRDFaBaseHanding() throws IOException, XSLTStylesheetException {
- final String[] vars = checkPageBaseHandling("/html/rdfa/base-handling.xhtml");
- Assert.assertEquals("Unexpected value for this_location", "http://example.org/john-d/", vars[0]);
- Assert.assertEquals("Unexpected value for this_root" , "http://example.org/" , vars[1]);
- Assert.assertEquals("Unexpected value for html_base" , "http://example.org/john-d/", vars[2]);
- }
-
- private String[] checkPageBaseHandling(String testFile) throws IOException, XSLTStylesheetException {
- final TagSoupParser tagSoupParser = new TagSoupParser(
- this.getClass().getResourceAsStream(testFile),
- "http://test/document/uri"
- );
- final StringWriter sw = new StringWriter();
- RDFaExtractor.getXSLT().applyTo(tagSoupParser.getDOM(), sw);
- final String content = sw.toString();
- logger.debug(content);
- final Pattern pattern = Pattern.compile("<!--this_location: '(.+)' this_root: '(.+)' html_base: '(.+)'-->");
- final Matcher matcher = pattern.matcher(content);
- Assert.assertTrue("Cannot find comment matching within generated output.", matcher.find());
- return new String[]{ matcher.group(1), matcher.group(2), matcher.group(3) };
- }
-
-}
http://git-wip-us.apache.org/repos/asf/any23/blob/9f60d325/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 60ec1e7..43dfb39 100644
--- a/pom.xml
+++ b/pom.xml
@@ -228,6 +228,7 @@
<implementation.build.tstamp>${implementation.build}; ${maven.build.timestamp}</implementation.build.tstamp>
<slf4j.logger.version>1.7.5</slf4j.logger.version>
<sesame.version>2.7.5</sesame.version>
+ <semargl.version>0.6</semargl.version>
<latest.stable.released>0.8.0</latest.stable.released>
<!-- Maven Plugin Versions -->
@@ -385,6 +386,11 @@
<artifactId>sesame-repository-api</artifactId>
<version>${sesame.version}</version>
</dependency>
+ <dependency>
+ <groupId>org.semarglproject</groupId>
+ <artifactId>semargl-sesame</artifactId>
+ <version>${semargl.version}</version>
+ </dependency>
<!-- END: Sesame -->
<!-- BEGIN: Apache Commons -->