You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by an...@apache.org on 2015/03/21 06:07:14 UTC
[6/8] any23 git commit: ANY23-226 : Make JSONLD extraction work
ANY23-226 : Make JSONLD extraction work
Also make services work in eclipse using M2E.
@MetaInfServices output isn't recognised by M2E, and M2E insist it isn't
their issue.
Project: http://git-wip-us.apache.org/repos/asf/any23/repo
Commit: http://git-wip-us.apache.org/repos/asf/any23/commit/fd822849
Tree: http://git-wip-us.apache.org/repos/asf/any23/tree/fd822849
Diff: http://git-wip-us.apache.org/repos/asf/any23/diff/fd822849
Branch: refs/heads/master
Commit: fd822849190240b8cf981ecc7abd0b4f592381d5
Parents: 1e3eb9c
Author: Peter Ansell <p_...@yahoo.com>
Authored: Sat Mar 21 15:49:33 2015 +1100
Committer: Peter Ansell <p_...@yahoo.com>
Committed: Sat Mar 21 15:54:15 2015 +1100
----------------------------------------------------------------------
core/pom.xml | 9 -
.../any23/cli/ExtractorDocumentation.java | 2 -
.../org/apache/any23/cli/MicrodataParser.java | 2 -
.../java/org/apache/any23/cli/MimeDetector.java | 2 -
.../org/apache/any23/cli/PluginVerifier.java | 2 -
.../main/java/org/apache/any23/cli/Rover.java | 2 -
.../java/org/apache/any23/cli/VocabPrinter.java | 2 -
.../extractor/csv/CSVExtractorFactory.java | 2 -
.../extractor/html/AdrExtractorFactory.java | 2 -
.../extractor/html/EmbeddedJSONLDExtractor.java | 398 ++--
.../html/EmbeddedJSONLDExtractorFactory.java | 2 -
.../extractor/html/GeoExtractorFactory.java | 2 -
.../html/HCalendarExtractorFactory.java | 2 -
.../extractor/html/HCardExtractorFactory.java | 2 -
.../html/HListingExtractorFactory.java | 2 -
.../extractor/html/HRecipeExtractorFactory.java | 2 -
.../extractor/html/HResumeExtractorFactory.java | 2 -
.../html/HReviewAggregateExtractorFactory.java | 2 -
.../extractor/html/HReviewExtractorFactory.java | 2 -
.../html/HTMLMetaExtractorFactory.java | 2 -
.../html/HeadLinkExtractorFactory.java | 2 -
.../extractor/html/ICBMExtractorFactory.java | 2 -
.../extractor/html/LicenseExtractorFactory.java | 2 -
.../extractor/html/SpeciesExtractorFactory.java | 2 -
.../extractor/html/TitleExtractorFactory.java | 2 -
.../html/TurtleHTMLExtractorFactory.java | 2 -
.../extractor/html/XFNExtractorFactory.java | 2 -
.../microdata/MicrodataExtractorFactory.java | 2 -
.../any23/extractor/rdf/BaseRDFExtractor.java | 2 +-
.../extractor/rdf/JSONLDExtractorFactory.java | 2 -
.../extractor/rdf/NQuadsExtractorFactory.java | 2 -
.../extractor/rdf/NTriplesExtractorFactory.java | 2 -
.../extractor/rdf/RDFXMLExtractorFactory.java | 2 -
.../extractor/rdf/TriXExtractorFactory.java | 2 -
.../extractor/rdf/TurtleExtractorFactory.java | 2 -
.../extractor/rdfa/RDFa11ExtractorFactory.java | 2 -
.../extractor/rdfa/RDFaExtractorFactory.java | 2 -
.../extractor/xpath/XPathExtractorFactory.java | 2 -
.../apache/any23/writer/JSONWriterFactory.java | 2 -
.../any23/writer/NQuadsWriterFactory.java | 2 -
.../any23/writer/NTriplesWriterFactory.java | 2 -
.../any23/writer/RDFXMLWriterFactory.java | 2 -
.../apache/any23/writer/TriXWriterFactory.java | 2 -
.../any23/writer/TurtleWriterFactory.java | 2 -
.../any23/writer/URIListWriterFactory.java | 2 -
.../META-INF/services/org.apache.any23.cli.Tool | 6 +
.../org.apache.any23.extractor.ExtractorFactory | 28 +
.../org.apache.any23.writer.WriterFactory | 7 +
.../any23/extractor/csv/CSVExtractorTest.java | 164 +-
.../example/ExampleExtractorFactory.java | 2 -
.../html/AbstractExtractorTestCase.java | 1459 +++++++-------
.../html/EmbeddedJSONLDExtractorTest.java | 34 +-
.../extractor/html/HCalendarExtractorTest.java | 730 +++----
.../extractor/html/HCardExtractorTest.java | 1872 +++++++++---------
.../extractor/html/HListingExtractorTest.java | 600 +++---
.../extractor/html/HRecipeExtractorTest.java | 56 +-
.../extractor/html/HResumeExtractorTest.java | 244 ++-
.../extractor/html/HReviewExtractorTest.java | 539 ++---
.../extractor/html/HTMLMetaExtractorTest.java | 95 +-
.../any23/extractor/html/RDFMergerTest.java | 920 ++++-----
.../extractor/html/SpeciesExtractorTest.java | 2 +-
.../extractor/html/TurtleHTMLExtractorTest.java | 2 +-
.../extractor/rdf/JSONLDExtractorTest.java | 8 -
.../rdfa/AbstractRDFaExtractorTestCase.java | 243 ++-
.../any23/io/nquads/NQuadsParserFactory.java | 2 -
.../any23/io/nquads/NQuadsWriterFactory.java | 2 -
.../main/java/org/apache/any23/cli/Crawler.java | 2 -
.../META-INF/services/org.apache.any23.cli.Tool | 1 +
.../htmlscraper/HTMLScraperExtractor.java | 2 -
.../HTMLScraperExtractorFactory.java | 2 -
.../org.apache.any23.extractor.ExtractorFactory | 1 +
.../plugin/officescraper/ExcelExtractor.java | 2 -
.../officescraper/ExcelExtractorFactory.java | 2 -
.../org.apache.any23.extractor.ExtractorFactory | 1 +
src/site/apt/any23-plugins.apt | 1 -
...html-embedded-jsonld-extractor-multiple.html | 45 +
.../html/html-embedded-jsonld-extractor.html | 4 +-
77 files changed, 3833 insertions(+), 3738 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/pom.xml
----------------------------------------------------------------------
diff --git a/core/pom.xml b/core/pom.xml
index 8e38051..b6a0427 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -153,15 +153,6 @@
</dependency>
<!-- END: Apache Commons CSV -->
- <!-- BEGIN: plugins -->
- <dependency>
- <groupId>org.kohsuke.metainf-services</groupId>
- <artifactId>metainf-services</artifactId>
- <scope>compile</scope>
- <optional>true</optional>
- </dependency>
- <!-- END: plugins -->
-
<!-- BEGIN: Test Dependencies -->
<dependency>
<groupId>junit</groupId>
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java b/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
index 16d7b4f..eb5dd7e 100644
--- a/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
+++ b/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
@@ -28,7 +28,6 @@ import org.apache.any23.extractor.Extractor.ContentExtractor;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.ExtractorRegistry;
-import org.kohsuke.MetaInfServices;
import java.io.IOException;
import java.util.LinkedList;
@@ -38,7 +37,6 @@ import java.util.List;
* This class provides some command-line documentation
* about available extractors and their usage.
*/
-@MetaInfServices
@Parameters( commandNames = { "extractor" }, commandDescription= "Utility for obtaining documentation about metadata extractors.")
public class ExtractorDocumentation implements Tool {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/cli/MicrodataParser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/MicrodataParser.java b/core/src/main/java/org/apache/any23/cli/MicrodataParser.java
index 3fa6348..9a593e4 100644
--- a/core/src/main/java/org/apache/any23/cli/MicrodataParser.java
+++ b/core/src/main/java/org/apache/any23/cli/MicrodataParser.java
@@ -27,7 +27,6 @@ import org.apache.any23.source.DocumentSource;
import org.apache.any23.source.FileDocumentSource;
import org.apache.any23.source.HTTPDocumentSource;
import org.apache.any23.util.StreamUtils;
-import org.kohsuke.MetaInfServices;
import java.io.File;
import java.io.InputStream;
@@ -44,7 +43,6 @@ import java.util.regex.Pattern;
*
* @author Michele Mostarda (mostarda@fbk.eu)
*/
-@MetaInfServices
@Parameters( commandNames = { "microdata" }, commandDescription = "Commandline Tool for extracting Microdata from file/HTTP source.")
public class MicrodataParser implements Tool {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/cli/MimeDetector.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/MimeDetector.java b/core/src/main/java/org/apache/any23/cli/MimeDetector.java
index 5684473..87148ca 100644
--- a/core/src/main/java/org/apache/any23/cli/MimeDetector.java
+++ b/core/src/main/java/org/apache/any23/cli/MimeDetector.java
@@ -30,7 +30,6 @@ import org.apache.any23.source.DocumentSource;
import org.apache.any23.source.FileDocumentSource;
import org.apache.any23.source.HTTPDocumentSource;
import org.apache.any23.source.StringDocumentSource;
-import org.kohsuke.MetaInfServices;
import java.io.File;
import java.net.URISyntaxException;
@@ -44,7 +43,6 @@ import java.util.List;
*
* @author Michele Mostarda (mostarda@fbk.eu)
*/
-@MetaInfServices
@Parameters(commandNames = { "mimes" }, commandDescription = "MIME Type Detector Tool.")
public class MimeDetector implements Tool{
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/cli/PluginVerifier.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/PluginVerifier.java b/core/src/main/java/org/apache/any23/cli/PluginVerifier.java
index 295c86f..70b72f5 100644
--- a/core/src/main/java/org/apache/any23/cli/PluginVerifier.java
+++ b/core/src/main/java/org/apache/any23/cli/PluginVerifier.java
@@ -25,7 +25,6 @@ import org.apache.any23.mime.MIMEType;
import org.apache.any23.plugin.Any23PluginManager;
import org.apache.any23.plugin.Author;
import org.apache.any23.plugin.ExtractorPlugin;
-import org.kohsuke.MetaInfServices;
import java.io.File;
import java.io.PrintStream;
@@ -40,7 +39,6 @@ import java.util.List;
*
* @author Michele Mostarda (mostarda@fbk.eu)
*/
-@MetaInfServices
@Parameters(commandNames = { "verify" }, commandDescription = "Utility for plugin management verification.")
public class PluginVerifier implements Tool {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/cli/Rover.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/Rover.java b/core/src/main/java/org/apache/any23/cli/Rover.java
index 6324872..1c77860 100644
--- a/core/src/main/java/org/apache/any23/cli/Rover.java
+++ b/core/src/main/java/org/apache/any23/cli/Rover.java
@@ -36,7 +36,6 @@ import org.apache.any23.writer.ReportingTripleHandler;
import org.apache.any23.writer.TripleHandler;
import org.apache.any23.writer.TripleHandlerException;
import org.apache.any23.writer.WriterFactoryRegistry;
-import org.kohsuke.MetaInfServices;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -59,7 +58,6 @@ import static java.lang.String.format;
* @author Richard Cyganiak (richard@cyganiak.de)
* @author Gabriele Renzi
*/
-@MetaInfServices
@Parameters(commandNames = { "rover" }, commandDescription = "Any23 Command Line Tool.")
public class Rover implements Tool {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/cli/VocabPrinter.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/cli/VocabPrinter.java b/core/src/main/java/org/apache/any23/cli/VocabPrinter.java
index 411b67a..70bf16f 100644
--- a/core/src/main/java/org/apache/any23/cli/VocabPrinter.java
+++ b/core/src/main/java/org/apache/any23/cli/VocabPrinter.java
@@ -18,7 +18,6 @@
package org.apache.any23.cli;
import org.apache.any23.vocab.RDFSchemaUtils;
-import org.kohsuke.MetaInfServices;
import org.openrdf.rio.RDFFormat;
import com.beust.jcommander.IStringConverter;
@@ -30,7 +29,6 @@ import com.beust.jcommander.Parameters;
*
* @author Michele Mostarda (mostarda@fbk.eu)
*/
-@MetaInfServices
@Parameters(commandNames = { "vocab" }, commandDescription = "Prints out the RDF Schema of the vocabularies used by Any23.")
public class VocabPrinter implements Tool {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java
index 5f1dc8f..822cfd2 100644
--- a/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java
@@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class CSVExtractorFactory extends SimpleExtractorFactory<CSVExtractor> implements
ExtractorFactory<CSVExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java
index be7bb7a..6584e0c 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class AdrExtractorFactory extends SimpleExtractorFactory<AdrExtractor> implements
ExtractorFactory<AdrExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
index 5506a10..fbf2832 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractor.java
@@ -42,201 +42,215 @@ import java.util.Map;
import java.util.Set;
/**
- * This extractor represents the HTML script tags used to embed blocks of data in documents.
- * This way, JSON-LD content can be easily embedded in HTML by placing it in a script element
- * with the type attribute set to application/ld+json
- * according the <a href="http://www.w3.org/TR/json-ld/#embedding-json-ld-in-html-documents">JSON-LD specification</a>.
+ * This extractor represents the HTML script tags used to embed blocks of data
+ * in documents. This way, JSON-LD content can be easily embedded in HTML by
+ * placing it in a script element with the type attribute set to
+ * application/ld+json according the <a
+ * href="http://www.w3.org/TR/json-ld/#embedding-json-ld-in-html-documents"
+ * >JSON-LD specification</a>.
*
*/
public class EmbeddedJSONLDExtractor implements Extractor.TagSoupDOMExtractor {
- private static final SINDICE vSINDICE = SINDICE.getInstance();
-
- private URI profile;
-
- private Map<String, URI> prefixes = new HashMap<String, URI>();
-
- private String documentLang;
-
- private JSONLDExtractor extractor;
-
- /**
- * {@inheritDoc}
- */
- @Override
- public void run(
- ExtractionParameters extractionParameters,
- ExtractionContext extractionContext,
- Document in,
- ExtractionResult out
- ) throws IOException, ExtractionException {
- profile = extractProfile(in);
- documentLang = getDocumentLanguage(in);
- extractLinkDefinedPrefixes(in);
-
- String baseProfile = vSINDICE.NS;
- if(profile != null) {
- baseProfile = profile.toString();
- }
-
- final URI documentURI = extractionContext.getDocumentURI();
- Set<JSONLDScript> jsonldScripts = extractJSONLDScript(in, baseProfile, extractionParameters, extractionContext, out);
- for(JSONLDScript jsonldScript : jsonldScripts) {
- String lang = documentLang;
- if(jsonldScript.getLang() != null) {
- lang = jsonldScript.getLang();
- }
- out.writeTriple(
- documentURI,
- jsonldScript.getName(),
- new LiteralImpl(jsonldScript.getContent(), lang)
- );
- }
- }
-
- /**
- * Returns the {@link Document} language if declared, <code>null</code> otherwise.
- *
- * @param in a instance of {@link Document}.
- * @return the language declared, could be <code>null</code>.
- */
- private String getDocumentLanguage(Document in) {
- String lang = DomUtils.find(in, "string(/HTML/@lang)");
- if (lang.equals("")) {
- return null;
- }
- return lang;
- }
-
- private URI extractProfile(Document in) {
- String profile = DomUtils.find(in, "string(/HTML/@profile)");
- if (profile.equals("")) {
- return null;
- }
- return new URIImpl(profile);
- }
-
- /**
- * It extracts prefixes defined in the <i>LINK</i> meta tags.
- *
- * @param in
- */
- private void extractLinkDefinedPrefixes(Document in) {
- List<Node> linkNodes = DomUtils.findAll(in, "/HTML/HEAD/LINK");
- for(Node linkNode : linkNodes) {
- NamedNodeMap attributes = linkNode.getAttributes();
- String rel = attributes.getNamedItem("rel").getTextContent();
- String href = attributes.getNamedItem("href").getTextContent();
- if(rel != null && href !=null && RDFUtils.isAbsoluteURI(href)) {
- prefixes.put(rel, new URIImpl(href));
- }
- }
- }
-
- private Set<JSONLDScript> extractJSONLDScript(Document in, String baseProfile, ExtractionParameters extractionParameters,
- ExtractionContext extractionContext, ExtractionResult out) throws IOException, ExtractionException {
- List<Node> scriptNodes = DomUtils.findAll(in, "/HTML/HEAD/SCRIPT");
- Set<JSONLDScript> result = new HashSet<JSONLDScript>();
- extractor = new JSONLDExtractorFactory().createExtractor();
- for (Node jsonldNode : scriptNodes) {
- NamedNodeMap attributes = jsonldNode.getAttributes();
- for (int i = 0; i < attributes.getLength(); i++) {
- if (attributes.item(i).getTextContent().equalsIgnoreCase("application/ld+json")) {
- extractor.run(extractionParameters, extractionContext, DomUtils.nodeToInputStream(jsonldNode), out);
- }
- }
- Node nameAttribute = attributes.getNamedItem("name");
- Node contentAttribute = attributes.getNamedItem("content");
- if (nameAttribute == null || contentAttribute == null) {
- continue;
- }
- String name = nameAttribute.getTextContent();
- String content = contentAttribute.getTextContent();
- String xpath = DomUtils.getXPathForNode(jsonldNode);
- URI nameAsURI = getPrefixIfExists(name);
- if (nameAsURI == null) {
- nameAsURI = new URIImpl(baseProfile + name);
- }
- JSONLDScript jsonldScript = new JSONLDScript(xpath, nameAsURI, content);
- result.add(jsonldScript);
- }
- return result;
- }
-
- private URI getPrefixIfExists(String name) {
- String[] split = name.split("\\.");
- if(split.length == 2 && prefixes.containsKey(split[0])) {
- return new URIImpl(prefixes.get(split[0]) + split[1]);
- }
- return null;
- }
-
- @Override
- public ExtractorDescription getDescription() {
- return HTMLMetaExtractorFactory.getDescriptionInstance();
- }
-
- private class JSONLDScript {
-
- private String xpath;
-
- private URI name;
-
- private String lang;
-
- private String content;
-
- public JSONLDScript(String xpath, URI name, String content) {
- this.xpath = xpath;
- this.name = name;
- this.content = content;
- }
-
- public JSONLDScript(String xpath, URI name, String content, String lang) {
- this(xpath, name, content);
- this.lang = lang;
- }
-
- public URI getName() {
- return name;
- }
-
- public void setName(URI name) {
- this.name = name;
- }
-
- public String getLang() {
- return lang;
- }
-
- public void setLang(String lang) {
- this.lang = lang;
- }
-
- public String getContent() {
- return content;
- }
-
- public void setContent(String content) {
- this.content = content;
- }
-
- @Override
- public boolean equals(Object o) {
- if (this == o) return true;
- if (o == null || getClass() != o.getClass()) return false;
-
- JSONLDScript meta = (JSONLDScript) o;
-
- if (xpath != null ? !xpath.equals(meta.xpath) : meta.xpath != null) return false;
-
- return true;
- }
-
- @Override
- public int hashCode() {
- return xpath != null ? xpath.hashCode() : 0;
- }
- }
+ private static final SINDICE vSINDICE = SINDICE.getInstance();
+
+ private URI profile;
+
+ private Map<String, URI> prefixes = new HashMap<String, URI>();
+
+ private String documentLang;
+
+ private JSONLDExtractor extractor;
+
+ /**
+ * {@inheritDoc}
+ */
+ @Override
+ public void run(ExtractionParameters extractionParameters,
+ ExtractionContext extractionContext, Document in,
+ ExtractionResult out) throws IOException, ExtractionException {
+ profile = extractProfile(in);
+ documentLang = getDocumentLanguage(in);
+ extractLinkDefinedPrefixes(in);
+
+ String baseProfile = vSINDICE.NS;
+ if (profile != null) {
+ baseProfile = profile.toString();
+ }
+
+ final URI documentURI = extractionContext.getDocumentURI();
+ Set<JSONLDScript> jsonldScripts = extractJSONLDScript(in, baseProfile,
+ extractionParameters, extractionContext, out);
+ for (JSONLDScript jsonldScript : jsonldScripts) {
+ //String lang = documentLang;
+ //if (jsonldScript.getLang() != null) {
+ // lang = jsonldScript.getLang();
+ //}
+ //out.writeTriple(documentURI, jsonldScript.getName(),
+ // new LiteralImpl(jsonldScript.getContent(), lang));
+ }
+ }
+
+ /**
+ * Returns the {@link Document} language if declared, <code>null</code>
+ * otherwise.
+ *
+ * @param in
+ * a instance of {@link Document}.
+ * @return the language declared, could be <code>null</code>.
+ */
+ private String getDocumentLanguage(Document in) {
+ String lang = DomUtils.find(in, "string(/HTML/@lang)");
+ if (lang.equals("")) {
+ return null;
+ }
+ return lang;
+ }
+
+ private URI extractProfile(Document in) {
+ String profile = DomUtils.find(in, "string(/HTML/@profile)");
+ if (profile.equals("")) {
+ return null;
+ }
+ return new URIImpl(profile);
+ }
+
+ /**
+ * It extracts prefixes defined in the <i>LINK</i> meta tags.
+ *
+ * @param in
+ */
+ private void extractLinkDefinedPrefixes(Document in) {
+ List<Node> linkNodes = DomUtils.findAll(in, "/HTML/HEAD/LINK");
+ for (Node linkNode : linkNodes) {
+ NamedNodeMap attributes = linkNode.getAttributes();
+ String rel = attributes.getNamedItem("rel").getTextContent();
+ String href = attributes.getNamedItem("href").getTextContent();
+ if (rel != null && href != null && RDFUtils.isAbsoluteURI(href)) {
+ prefixes.put(rel, new URIImpl(href));
+ }
+ }
+ }
+
+ private Set<JSONLDScript> extractJSONLDScript(Document in,
+ String baseProfile, ExtractionParameters extractionParameters,
+ ExtractionContext extractionContext, ExtractionResult out)
+ throws IOException, ExtractionException {
+ List<Node> scriptNodes = DomUtils.findAll(in, "/HTML/HEAD/SCRIPT");
+ Set<JSONLDScript> result = new HashSet<JSONLDScript>();
+ extractor = new JSONLDExtractorFactory().createExtractor();
+ for (Node jsonldNode : scriptNodes) {
+ NamedNodeMap attributes = jsonldNode.getAttributes();
+ for (int i = 0; i < attributes.getLength(); i++) {
+ if (attributes.item(i).getTextContent()
+ .equalsIgnoreCase("application/ld+json")) {
+ extractor.run(extractionParameters, extractionContext,
+ DomUtils.nodeToInputStream(jsonldNode
+ .getFirstChild()), out);
+ }
+ }
+ Node nameAttribute = attributes.getNamedItem("name");
+ Node contentAttribute = attributes.getNamedItem("content");
+ if (nameAttribute == null || contentAttribute == null) {
+ continue;
+ }
+ String name = nameAttribute.getTextContent();
+ String content = contentAttribute.getTextContent();
+ String xpath = DomUtils.getXPathForNode(jsonldNode);
+ URI nameAsURI = getPrefixIfExists(name);
+ if (nameAsURI == null) {
+ nameAsURI = new URIImpl(baseProfile + name);
+ }
+ JSONLDScript jsonldScript = new JSONLDScript(xpath, nameAsURI,
+ content);
+ result.add(jsonldScript);
+ }
+ return result;
+ }
+
+ private URI getPrefixIfExists(String name) {
+ String[] split = name.split("\\.");
+ if (split.length == 2 && prefixes.containsKey(split[0])) {
+ return new URIImpl(prefixes.get(split[0]) + split[1]);
+ }
+ return null;
+ }
+
+ @Override
+ public ExtractorDescription getDescription() {
+ return HTMLMetaExtractorFactory.getDescriptionInstance();
+ }
+
+ private class JSONLDScript {
+
+ private String xpath;
+
+ private URI name;
+
+ private String lang;
+
+ private String content;
+
+ public JSONLDScript(String xpath, URI name, String content) {
+ this.xpath = xpath;
+ this.name = name;
+ this.content = content;
+ }
+
+ public JSONLDScript(String xpath, URI name, String content, String lang) {
+ this(xpath, name, content);
+ this.lang = lang;
+ }
+
+ public URI getName() {
+ return name;
+ }
+
+ public void setName(URI name) {
+ this.name = name;
+ }
+
+ public String getLang() {
+ return lang;
+ }
+
+ public void setLang(String lang) {
+ this.lang = lang;
+ }
+
+ public String getContent() {
+ return content;
+ }
+
+ public void setContent(String content) {
+ this.content = content;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) {
+ return true;
+ }
+ if (o == null) {
+ return false;
+ }
+ if (!(o instanceof JSONLDScript)) {
+ return false;
+ }
+
+ JSONLDScript meta = (JSONLDScript) o;
+
+ if (xpath != null ? !xpath.equals(meta.xpath) : meta.xpath != null) {
+ return false;
+ }
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ return xpath != null ? xpath.hashCode() : 0;
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorFactory.java
index 2e7810f..714a227 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/EmbeddedJSONLDExtractorFactory.java
@@ -24,12 +24,10 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class EmbeddedJSONLDExtractorFactory extends SimpleExtractorFactory<EmbeddedJSONLDExtractor> implements
ExtractorFactory<EmbeddedJSONLDExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java
index eefe764..3fe1204 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class GeoExtractorFactory extends SimpleExtractorFactory<GeoExtractor> implements
ExtractorFactory<GeoExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java
index d0bce6a..239edf7 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class HCalendarExtractorFactory extends SimpleExtractorFactory<HCalendarExtractor> implements
ExtractorFactory<HCalendarExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java
index e28c83f..70c8480 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class HCardExtractorFactory extends SimpleExtractorFactory<HCardExtractor> implements
ExtractorFactory<HCardExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java
index 0a8d1bc..b060290 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class HListingExtractorFactory extends SimpleExtractorFactory<HListingExtractor> implements
ExtractorFactory<HListingExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java
index 3c1376d..9a64b49 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class HRecipeExtractorFactory extends SimpleExtractorFactory<HRecipeExtractor> implements
ExtractorFactory<HRecipeExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java
index 876ee1e..d8f5b48 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class HResumeExtractorFactory extends SimpleExtractorFactory<HResumeExtractor> implements
ExtractorFactory<HResumeExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java
index a37ce7e..8022a66 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HReviewAggregateExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
*
* @author Peter Ansell p_ansell@yahoo.com
*/
-@MetaInfServices(ExtractorFactory.class)
public class HReviewAggregateExtractorFactory extends SimpleExtractorFactory<HReviewAggregateExtractor> implements
ExtractorFactory<HReviewAggregateExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java
index 7fcf4c7..c943800 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class HReviewExtractorFactory extends SimpleExtractorFactory<HReviewExtractor> implements
ExtractorFactory<HReviewExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java
index 3a010ba..0d24ebd 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class HTMLMetaExtractorFactory extends SimpleExtractorFactory<HTMLMetaExtractor> implements
ExtractorFactory<HTMLMetaExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java
index 3586049..b9dc280 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class HeadLinkExtractorFactory extends SimpleExtractorFactory<HeadLinkExtractor> implements
ExtractorFactory<HeadLinkExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java
index 45066f1..7f4dd80 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class ICBMExtractorFactory extends SimpleExtractorFactory<ICBMExtractor> implements
ExtractorFactory<ICBMExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java
index a83aace..d8ed293 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class LicenseExtractorFactory extends SimpleExtractorFactory<LicenseExtractor> implements
ExtractorFactory<LicenseExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java
index ebbe45c..14b7e08 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class SpeciesExtractorFactory extends SimpleExtractorFactory<SpeciesExtractor> implements
ExtractorFactory<SpeciesExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java
index 1585659..ca96dd1 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class TitleExtractorFactory extends SimpleExtractorFactory<TitleExtractor> implements
ExtractorFactory<TitleExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java
index 0445941..0810eb9 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class TurtleHTMLExtractorFactory extends SimpleExtractorFactory<TurtleHTMLExtractor> implements
ExtractorFactory<TurtleHTMLExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java
index 7a7c5b1..33de0b3 100644
--- a/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class XFNExtractorFactory extends SimpleExtractorFactory<XFNExtractor> implements
ExtractorFactory<XFNExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java
index 95cf208..1dca82e 100644
--- a/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java
@@ -24,13 +24,11 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class MicrodataExtractorFactory extends SimpleExtractorFactory<MicrodataExtractor> implements
ExtractorFactory<MicrodataExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
index 052bfa9..e32ec51 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/BaseRDFExtractor.java
@@ -106,7 +106,7 @@ public abstract class BaseRDFExtractor implements Extractor.ContentExtractor {
} catch (RDFHandlerException ex) {
throw new IllegalStateException("Unexpected exception.", ex);
} catch (RDFParseException ex) {
-// throw new ExtractionException("Error while parsing RDF document.", ex, extractionResult);
+ throw new ExtractionException("Error while parsing RDF document.", ex, extractionResult);
}
}
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java
index bedd200..0c19919 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/JSONLDExtractorFactory.java
@@ -23,12 +23,10 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class JSONLDExtractorFactory extends SimpleExtractorFactory<JSONLDExtractor> implements
ExtractorFactory<JSONLDExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java
index 3231b0d..4a6c6ab 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java
@@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class NQuadsExtractorFactory extends SimpleExtractorFactory<NQuadsExtractor> implements
ExtractorFactory<NQuadsExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java
index 40fa269..8a886f5 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java
@@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class NTriplesExtractorFactory extends SimpleExtractorFactory<NTriplesExtractor> implements
ExtractorFactory<NTriplesExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java
index 8dc1b1f..eed9cc4 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java
@@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class RDFXMLExtractorFactory extends SimpleExtractorFactory<RDFXMLExtractor> implements
ExtractorFactory<RDFXMLExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java
index 2bfc908..cbdb45a 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java
@@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class TriXExtractorFactory extends SimpleExtractorFactory<TriXExtractor> implements
ExtractorFactory<TriXExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java
index e31bba1..204c2f1 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java
@@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class TurtleExtractorFactory extends SimpleExtractorFactory<TurtleExtractor> implements
ExtractorFactory<TurtleExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java
index 9f51864..4c2ffe4 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java
@@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class RDFa11ExtractorFactory extends SimpleExtractorFactory<RDFa11Extractor> implements
ExtractorFactory<RDFa11Extractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java
index 4523bc9..75caafb 100644
--- a/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java
@@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class RDFaExtractorFactory extends SimpleExtractorFactory<RDFaExtractor> implements
ExtractorFactory<RDFaExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java b/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java
index b4e20e2..e240d30 100644
--- a/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java
+++ b/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java
@@ -23,13 +23,11 @@ import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices(ExtractorFactory.class)
public class XPathExtractorFactory extends SimpleExtractorFactory<XPathExtractor> implements
ExtractorFactory<XPathExtractor> {
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java b/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java
index d5a6d03..2b08552 100644
--- a/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java
+++ b/core/src/main/java/org/apache/any23/writer/JSONWriterFactory.java
@@ -19,14 +19,12 @@ package org.apache.any23.writer;
import java.io.OutputStream;
-import org.kohsuke.MetaInfServices;
import org.openrdf.rio.RDFFormat;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices
public class JSONWriterFactory implements WriterFactory {
public static final String MIME_TYPE = "text/json";
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/NQuadsWriterFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/writer/NQuadsWriterFactory.java b/core/src/main/java/org/apache/any23/writer/NQuadsWriterFactory.java
index 85adc82..8bd0b53 100644
--- a/core/src/main/java/org/apache/any23/writer/NQuadsWriterFactory.java
+++ b/core/src/main/java/org/apache/any23/writer/NQuadsWriterFactory.java
@@ -19,14 +19,12 @@ package org.apache.any23.writer;
import java.io.OutputStream;
-import org.kohsuke.MetaInfServices;
import org.openrdf.rio.RDFFormat;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices
public class NQuadsWriterFactory implements WriterFactory {
public static final String MIME_TYPE = RDFFormat.NQUADS.getDefaultMIMEType();
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/NTriplesWriterFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/writer/NTriplesWriterFactory.java b/core/src/main/java/org/apache/any23/writer/NTriplesWriterFactory.java
index 8032174..059c91e 100644
--- a/core/src/main/java/org/apache/any23/writer/NTriplesWriterFactory.java
+++ b/core/src/main/java/org/apache/any23/writer/NTriplesWriterFactory.java
@@ -19,14 +19,12 @@ package org.apache.any23.writer;
import java.io.OutputStream;
-import org.kohsuke.MetaInfServices;
import org.openrdf.rio.RDFFormat;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices
public class NTriplesWriterFactory implements WriterFactory {
public static final String MIME_TYPE = RDFFormat.NTRIPLES.getDefaultMIMEType();
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/RDFXMLWriterFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/writer/RDFXMLWriterFactory.java b/core/src/main/java/org/apache/any23/writer/RDFXMLWriterFactory.java
index 39c3b27..f7c2340 100644
--- a/core/src/main/java/org/apache/any23/writer/RDFXMLWriterFactory.java
+++ b/core/src/main/java/org/apache/any23/writer/RDFXMLWriterFactory.java
@@ -19,14 +19,12 @@ package org.apache.any23.writer;
import java.io.OutputStream;
-import org.kohsuke.MetaInfServices;
import org.openrdf.rio.RDFFormat;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices
public class RDFXMLWriterFactory implements WriterFactory {
public static final String MIME_TYPE = RDFFormat.RDFXML.getDefaultMIMEType();
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/TriXWriterFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/writer/TriXWriterFactory.java b/core/src/main/java/org/apache/any23/writer/TriXWriterFactory.java
index 29de9cf..5b34869 100644
--- a/core/src/main/java/org/apache/any23/writer/TriXWriterFactory.java
+++ b/core/src/main/java/org/apache/any23/writer/TriXWriterFactory.java
@@ -19,14 +19,12 @@ package org.apache.any23.writer;
import java.io.OutputStream;
-import org.kohsuke.MetaInfServices;
import org.openrdf.rio.RDFFormat;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices
public class TriXWriterFactory implements WriterFactory {
public static final String MIME_TYPE = RDFFormat.TRIX.getDefaultMIMEType();
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/TurtleWriterFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/writer/TurtleWriterFactory.java b/core/src/main/java/org/apache/any23/writer/TurtleWriterFactory.java
index aa4b7b5..df6f9fa 100644
--- a/core/src/main/java/org/apache/any23/writer/TurtleWriterFactory.java
+++ b/core/src/main/java/org/apache/any23/writer/TurtleWriterFactory.java
@@ -19,14 +19,12 @@ package org.apache.any23.writer;
import java.io.OutputStream;
-import org.kohsuke.MetaInfServices;
import org.openrdf.rio.RDFFormat;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices
public class TurtleWriterFactory implements WriterFactory {
public static final String MIME_TYPE = RDFFormat.TURTLE.getDefaultMIMEType();
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/java/org/apache/any23/writer/URIListWriterFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/any23/writer/URIListWriterFactory.java b/core/src/main/java/org/apache/any23/writer/URIListWriterFactory.java
index ebdc494..9fd0c75 100644
--- a/core/src/main/java/org/apache/any23/writer/URIListWriterFactory.java
+++ b/core/src/main/java/org/apache/any23/writer/URIListWriterFactory.java
@@ -19,14 +19,12 @@ package org.apache.any23.writer;
import java.io.OutputStream;
-import org.kohsuke.MetaInfServices;
import org.openrdf.rio.RDFFormat;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
-@MetaInfServices
public class URIListWriterFactory implements WriterFactory {
public static final String MIME_TYPE = "text/plain";
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/resources/META-INF/services/org.apache.any23.cli.Tool
----------------------------------------------------------------------
diff --git a/core/src/main/resources/META-INF/services/org.apache.any23.cli.Tool b/core/src/main/resources/META-INF/services/org.apache.any23.cli.Tool
new file mode 100644
index 0000000..6e5a533
--- /dev/null
+++ b/core/src/main/resources/META-INF/services/org.apache.any23.cli.Tool
@@ -0,0 +1,6 @@
+org.apache.any23.cli.ExtractorDocumentation
+org.apache.any23.cli.MicrodataParser
+org.apache.any23.cli.MimeDetector
+org.apache.any23.cli.PluginVerifier
+org.apache.any23.cli.Rover
+org.apache.any23.cli.VocabPrinter
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
----------------------------------------------------------------------
diff --git a/core/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory b/core/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
new file mode 100644
index 0000000..cb350f2
--- /dev/null
+++ b/core/src/main/resources/META-INF/services/org.apache.any23.extractor.ExtractorFactory
@@ -0,0 +1,28 @@
+org.apache.any23.extractor.csv.CSVExtractorFactory
+org.apache.any23.extractor.html.AdrExtractorFactory
+org.apache.any23.extractor.html.EmbeddedJSONLDExtractorFactory
+org.apache.any23.extractor.html.GeoExtractorFactory
+org.apache.any23.extractor.html.HCalendarExtractorFactory
+org.apache.any23.extractor.html.HCardExtractorFactory
+org.apache.any23.extractor.html.HeadLinkExtractorFactory
+org.apache.any23.extractor.html.HListingExtractorFactory
+org.apache.any23.extractor.html.HRecipeExtractorFactory
+org.apache.any23.extractor.html.HResumeExtractorFactory
+org.apache.any23.extractor.html.HReviewAggregateExtractorFactory
+org.apache.any23.extractor.html.HReviewExtractorFactory
+org.apache.any23.extractor.html.HTMLMetaExtractorFactory
+org.apache.any23.extractor.html.ICBMExtractorFactory
+org.apache.any23.extractor.html.LicenseExtractorFactory
+org.apache.any23.extractor.html.SpeciesExtractorFactory
+org.apache.any23.extractor.html.TitleExtractorFactory
+org.apache.any23.extractor.html.XFNExtractorFactory
+org.apache.any23.extractor.microdata.MicrodataExtractorFactory
+org.apache.any23.extractor.rdf.JSONLDExtractorFactory
+org.apache.any23.extractor.rdf.NQuadsExtractorFactory
+org.apache.any23.extractor.rdf.NTriplesExtractorFactory
+org.apache.any23.extractor.rdf.RDFXMLExtractorFactory
+org.apache.any23.extractor.rdf.TriXExtractorFactory
+org.apache.any23.extractor.rdf.TurtleExtractorFactory
+org.apache.any23.extractor.rdfa.RDFa11ExtractorFactory
+org.apache.any23.extractor.rdfa.RDFaExtractorFactory
+org.apache.any23.extractor.xpath.XPathExtractorFactory
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/main/resources/META-INF/services/org.apache.any23.writer.WriterFactory
----------------------------------------------------------------------
diff --git a/core/src/main/resources/META-INF/services/org.apache.any23.writer.WriterFactory b/core/src/main/resources/META-INF/services/org.apache.any23.writer.WriterFactory
new file mode 100644
index 0000000..03f32cd
--- /dev/null
+++ b/core/src/main/resources/META-INF/services/org.apache.any23.writer.WriterFactory
@@ -0,0 +1,7 @@
+org.apache.any23.writer.JSONWriterFactory
+org.apache.any23.writer.NQuadsWriterFactory
+org.apache.any23.writer.NTriplesWriterFactory
+org.apache.any23.writer.RDFXMLWriterFactory
+org.apache.any23.writer.TriXWriterFactory
+org.apache.any23.writer.TurtleWriterFactory
+org.apache.any23.writer.URIListWriterFactory
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java b/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
index 271b498..b985b90 100644
--- a/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
+++ b/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
@@ -35,81 +35,93 @@ import org.slf4j.LoggerFactory;
*/
public class CSVExtractorTest extends AbstractExtractorTestCase {
- private static final Logger logger = LoggerFactory.getLogger(CSVExtractorTest.class);
-
- @Override
- protected ExtractorFactory<?> getExtractorFactory() {
- return new CSVExtractorFactory();
- }
-
- @Test
- public void testExtractionCommaSeparated() throws RepositoryException {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-comma.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 28);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, new LiteralImpl("4", XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER));
- }
-
- @Test
- public void testExtractionSemicolonSeparated() throws RepositoryException {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 28);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, new LiteralImpl("4", XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER));
- }
-
- @Test
- public void testExtractionTabSeparated() throws RepositoryException {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-tab.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 28);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, new LiteralImpl("4", XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER));
- }
-
- @Test
- public void testTypeManagement() throws RepositoryException {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-type.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 21);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, new LiteralImpl("2", XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER));
- assertContains(null, null, new LiteralImpl("5.2", XMLSchema.FLOAT));
- assertContains(null, null, new LiteralImpl("7.9", XMLSchema.FLOAT));
- assertContains(null, null, new LiteralImpl("10" , XMLSchema.INTEGER));
- }
-
- @Test
- public void testExtractionEmptyValue() throws RepositoryException {
- CSV csv = CSV.getInstance();
- assertExtract("/org/apache/any23/extractor/csv/test-missing.csv");
- logger.debug(dumpModelToRDFXML());
-
- assertModelNotEmpty();
- assertStatementsSize(null, null, null, 25);
- assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
- assertContains(null, csv.numberOfColumns, new LiteralImpl("4", XMLSchema.INTEGER));
- assertContains(null, csv.numberOfRows, new LiteralImpl("3", XMLSchema.INTEGER));
- assertContains(null, null, new LiteralImpl("Michele", XMLSchema.STRING));
- assertContains(null, null, new LiteralImpl("Giovanni", XMLSchema.STRING));
- }
+ private static final Logger logger = LoggerFactory
+ .getLogger(CSVExtractorTest.class);
+
+ @Override
+ protected ExtractorFactory<?> getExtractorFactory() {
+ return new CSVExtractorFactory();
+ }
+
+ @Test
+ public void testExtractionCommaSeparated() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-comma.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 28);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, new LiteralImpl("4",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, new LiteralImpl("3",
+ XMLSchema.INTEGER));
+ }
+
+ @Test
+ public void testExtractionSemicolonSeparated() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-semicolon.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 28);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, new LiteralImpl("4",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, new LiteralImpl("3",
+ XMLSchema.INTEGER));
+ }
+
+ @Test
+ public void testExtractionTabSeparated() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-tab.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 28);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, new LiteralImpl("4",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, new LiteralImpl("3",
+ XMLSchema.INTEGER));
+ }
+
+ @Test
+ public void testTypeManagement() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-type.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 21);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, new LiteralImpl("2",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, new LiteralImpl("3",
+ XMLSchema.INTEGER));
+ assertContains(null, null, new LiteralImpl("5.2", XMLSchema.FLOAT));
+ assertContains(null, null, new LiteralImpl("7.9", XMLSchema.FLOAT));
+ assertContains(null, null, new LiteralImpl("10", XMLSchema.INTEGER));
+ }
+
+ @Test
+ public void testExtractionEmptyValue() throws Exception {
+ CSV csv = CSV.getInstance();
+ assertExtract("/org/apache/any23/extractor/csv/test-missing.csv");
+ logger.debug(dumpModelToRDFXML());
+
+ assertModelNotEmpty();
+ assertStatementsSize(null, null, null, 25);
+ assertStatementsSize(null, RDF.TYPE, csv.rowType, 3);
+ assertContains(null, csv.numberOfColumns, new LiteralImpl("4",
+ XMLSchema.INTEGER));
+ assertContains(null, csv.numberOfRows, new LiteralImpl("3",
+ XMLSchema.INTEGER));
+ assertContains(null, null, new LiteralImpl("Michele", XMLSchema.STRING));
+ assertContains(null, null,
+ new LiteralImpl("Giovanni", XMLSchema.STRING));
+ }
}
http://git-wip-us.apache.org/repos/asf/any23/blob/fd822849/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java b/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java
index 24da686..04a6ecc 100644
--- a/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java
+++ b/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java
@@ -24,14 +24,12 @@ import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.Prefixes;
-import org.kohsuke.MetaInfServices;
/**
* @author Peter Ansell p_ansell@yahoo.com
*
*/
// NOTE: Not enabling this in META-INF/services
-//@MetaInfServices(ExtractorFactory.class)
public class ExampleExtractorFactory extends SimpleExtractorFactory<ExampleExtractor> implements
ExtractorFactory<ExampleExtractor> {