You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2013/01/19 22:47:24 UTC
svn commit: r1435720 [2/3] - in /any23/trunk:
api/src/main/java/org/apache/any23/extractor/
api/src/main/java/org/apache/any23/plugin/ core/src/main/assembly/
core/src/main/java/org/apache/any23/cli/
core/src/main/java/org/apache/any23/extractor/ core/...
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java Sat Jan 19 21:47:22 2013
@@ -20,10 +20,7 @@ package org.apache.any23.extractor.html;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.extractor.TagSoupExtractionResult;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.vocab.WO;
import org.openrdf.model.BNode;
import org.openrdf.model.Resource;
@@ -31,8 +28,6 @@ import org.openrdf.model.URI;
import org.openrdf.model.vocabulary.RDF;
import org.w3c.dom.Node;
-import java.util.Arrays;
-
/**
* Extractor able to extract the <a href="http://microformats.org/wiki/species">Species Microformat</a>.
* The data are represented using the
@@ -56,15 +51,6 @@ public class SpeciesExtractor extends En
"class",
};
- public final static ExtractorFactory<SpeciesExtractor> factory =
- SimpleExtractorFactory.create(
- "html-mf-species",
- PopularPrefixes.createSubset("rdf", "wo"),
- Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
- "example-mf-species.html",
- SpeciesExtractor.class
- );
-
/**
* Returns the description of this extractor.
*
@@ -72,7 +58,7 @@ public class SpeciesExtractor extends En
*/
@Override
public ExtractorDescription getDescription() {
- return factory;
+ return SpeciesExtractorFactory.getDescriptionInstance();
}
/**
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class SpeciesExtractorFactory extends SimpleExtractorFactory<SpeciesExtractor> implements
+ ExtractorFactory<SpeciesExtractor> {
+
+ public static final String NAME = "html-mf-species";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "wo");
+
+ private static final ExtractorDescription descriptionInstance = new SpeciesExtractorFactory();
+
+ public SpeciesExtractorFactory() {
+ super(
+ SpeciesExtractorFactory.NAME,
+ SpeciesExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+ "example-mf-species.html");
+ }
+
+ @Override
+ public SpeciesExtractor createExtractor() {
+ return new SpeciesExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java Sat Jan 19 21:47:22 2013
@@ -22,17 +22,13 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.Any23ValueFactoryWrapper;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.vocab.DCTERMS;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
import org.openrdf.model.impl.ValueFactoryImpl;
import org.w3c.dom.Document;
import java.io.IOException;
-import java.util.Arrays;
/**
* Extracts the value of the <title> element of an
@@ -42,19 +38,9 @@ import java.util.Arrays;
*/
public class TitleExtractor implements TagSoupDOMExtractor {
- public static final String NAME = "html-head-title";
-
private static final DCTERMS vDCTERMS = DCTERMS.getInstance();
- public final static ExtractorFactory<TitleExtractor> factory =
- SimpleExtractorFactory.create(
- NAME,
- PopularPrefixes.createSubset("dcterms"),
- Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
- "example-title.html",
- TitleExtractor.class
- );
-
+ @Override
public void run(
ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
@@ -75,8 +61,9 @@ public class TitleExtractor implements T
}
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return TitleExtractorFactory.getDescriptionInstance();
}
}
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class TitleExtractorFactory extends SimpleExtractorFactory<TitleExtractor> implements
+ ExtractorFactory<TitleExtractor> {
+
+ public static final String NAME = "html-head-title";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("dcterms");
+
+ private static final ExtractorDescription descriptionInstance = new TitleExtractorFactory();
+
+ public TitleExtractorFactory() {
+ super(
+ TitleExtractorFactory.NAME,
+ TitleExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
+ "example-title.html");
+ }
+
+ @Override
+ public TitleExtractor createExtractor() {
+ return new TitleExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java Sat Jan 19 21:47:22 2013
@@ -24,14 +24,10 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.extractor.rdf.RDFParserFactory;
-import org.apache.any23.rdf.PopularPrefixes;
import org.openrdf.model.URI;
import org.openrdf.rio.RDFParseException;
import org.openrdf.rio.RDFParser;
-import org.openrdf.rio.turtle.TurtleParser;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
@@ -50,19 +46,9 @@ import java.util.List;
*/
public class TurtleHTMLExtractor implements Extractor.TagSoupDOMExtractor {
- public final static String NAME = "html-script-turtle";
-
- public final static ExtractorFactory<TurtleHTMLExtractor> factory =
- SimpleExtractorFactory.create(
- NAME,
- PopularPrefixes.get(),
- Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
- "example-script-turtle.html",
- TurtleHTMLExtractor.class
- );
-
private RDFParser turtleParser;
+ @Override
public void run(
ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
@@ -83,8 +69,9 @@ public class TurtleHTMLExtractor impleme
processScriptNodes(documentURI, extractionContext,out, scriptNodes);
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return TurtleHTMLExtractorFactory.getDescriptionInstance();
}
/**
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class TurtleHTMLExtractorFactory extends SimpleExtractorFactory<TurtleHTMLExtractor> implements
+ ExtractorFactory<TurtleHTMLExtractor> {
+
+ public static final String NAME = "html-script-turtle";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.get();
+
+ private static final ExtractorDescription descriptionInstance = new TurtleHTMLExtractorFactory();
+
+ public TurtleHTMLExtractorFactory() {
+ super(
+ TurtleHTMLExtractorFactory.NAME,
+ TurtleHTMLExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
+ "example-script-turtle.html");
+ }
+
+ @Override
+ public TurtleHTMLExtractor createExtractor() {
+ return new TurtleHTMLExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java Sat Jan 19 21:47:22 2013
@@ -22,10 +22,7 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.Any23ValueFactoryWrapper;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.vocab.FOAF;
import org.apache.any23.vocab.XFN;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
@@ -37,7 +34,6 @@ import org.w3c.dom.Document;
import org.w3c.dom.Node;
import java.io.IOException;
-import java.util.Arrays;
/**
* Extractor for the <a href="http://microformats.org/wiki/xfn">XFN</a>
@@ -56,19 +52,12 @@ public class XFNExtractor implements Tag
private HTMLDocument document;
private ExtractionResult out;
- public final static ExtractorFactory<XFNExtractor> factory =
- SimpleExtractorFactory.create(
- "html-mf-xfn",
- PopularPrefixes.createSubset("rdf", "foaf", "xfn"),
- Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
- "example-mf-xfn.html",
- XFNExtractor.class
- );
-
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return XFNExtractorFactory.getDescriptionInstance();
}
+ @Override
public void run(
ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class XFNExtractorFactory extends SimpleExtractorFactory<XFNExtractor> implements
+ ExtractorFactory<XFNExtractor> {
+
+ public static final String NAME = "html-mf-xfn";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "foaf", "xfn");
+
+ private static final ExtractorDescription descriptionInstance = new XFNExtractorFactory();
+
+ public XFNExtractorFactory() {
+ super(
+ XFNExtractorFactory.NAME,
+ XFNExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+ "example-mf-xfn.html");
+ }
+
+ @Override
+ public XFNExtractor createExtractor() {
+ return new XFNExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java Sat Jan 19 21:47:22 2013
@@ -24,10 +24,7 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.extractor.html.DomUtils;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.DCTERMS;
import org.apache.any23.vocab.XHTML;
@@ -44,7 +41,6 @@ import org.w3c.dom.NodeList;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
-import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@@ -64,23 +60,15 @@ public class MicrodataExtractor implemen
private static final URI MICRODATA_ITEM
= RDFUtils.uri("http://www.w3.org/1999/xhtml/microdata#item");
- public final static ExtractorFactory<MicrodataExtractor> factory =
- SimpleExtractorFactory.create(
- "html-microdata",
- PopularPrefixes.createSubset("rdf", "doac", "foaf"),
- Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
- "example-microdata.html",
- MicrodataExtractor.class
- );
-
private String documentLanguage;
private boolean isStrict;
private String defaultNamespace;
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return MicrodataExtractorFactory.getDescriptionInstance();
}
/**
@@ -90,6 +78,7 @@ public class MicrodataExtractor implemen
* to avoid performing actions 5.2.1, 5.2.2, 5.2.3, 5.2.4 if step 5.2.6 doesn't detect any
* Microdata.
*/
+ @Override
public void run(
ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.microdata;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class MicrodataExtractorFactory extends SimpleExtractorFactory<MicrodataExtractor> implements
+ ExtractorFactory<MicrodataExtractor> {
+
+ public static final String NAME = "html-microdata";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "doac", "foaf");
+
+ private static final ExtractorDescription descriptionInstance = new MicrodataExtractorFactory();
+
+ public MicrodataExtractorFactory() {
+ super(
+ MicrodataExtractorFactory.NAME,
+ MicrodataExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+ "example-microdata.html");
+ }
+
+ @Override
+ public MicrodataExtractor createExtractor() {
+ return new MicrodataExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractor.java Sat Jan 19 21:47:22 2013
@@ -20,12 +20,7 @@ package org.apache.any23.extractor.rdf;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.openrdf.rio.RDFParser;
-import org.openrdf.rio.helpers.RDFParserBase;
-
-import java.util.Arrays;
/**
* Concrete implementation of {@link org.apache.any23.extractor.Extractor.ContentExtractor}
@@ -35,21 +30,6 @@ import java.util.Arrays;
*/
public class NQuadsExtractor extends BaseRDFExtractor {
- public final static ExtractorFactory<NQuadsExtractor> factory =
- SimpleExtractorFactory.create(
- "rdf-nq",
- null,
- Arrays.asList(
- "text/x-nquads;q=0.1",
- "text/rdf+nq;q=0.1",
- "text/nq;q=0.1",
- "text/nquads;q=0.1",
- "text/n-quads;q=0.1"
- ),
- "example-nquads.nq",
- NQuadsExtractor.class
- );
-
public NQuadsExtractor(boolean verifyDataType, boolean stopAtFirstError) {
super(verifyDataType, stopAtFirstError);
}
@@ -58,8 +38,9 @@ public class NQuadsExtractor extends Bas
this(false, false);
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return NQuadsExtractorFactory.getDescriptionInstance();
}
@Override
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,50 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.rdf;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class NQuadsExtractorFactory extends SimpleExtractorFactory<NQuadsExtractor> implements
+ ExtractorFactory<NQuadsExtractor> {
+
+ public static final String NAME = "rdf-nq";
+
+ public static final Prefixes PREFIXES = null;
+
+ private static final ExtractorDescription descriptionInstance = new NQuadsExtractorFactory();
+
+ public NQuadsExtractorFactory() {
+ super(
+ NQuadsExtractorFactory.NAME,
+ NQuadsExtractorFactory.PREFIXES,
+ Arrays.asList(
+ "text/x-nquads;q=0.1",
+ "text/rdf+nq;q=0.1",
+ "text/nq;q=0.1",
+ "text/nquads;q=0.1",
+ "text/n-quads;q=0.1"
+ ),
+ "example-nquads.nq");
+ }
+
+ @Override
+ public NQuadsExtractor createExtractor() {
+ return new NQuadsExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractor.java Sat Jan 19 21:47:22 2013
@@ -20,12 +20,7 @@ package org.apache.any23.extractor.rdf;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.openrdf.rio.RDFParser;
-import org.openrdf.rio.helpers.RDFParserBase;
-
-import java.util.Arrays;
/**
* Concrete implementation of {@link org.apache.any23.extractor.Extractor.ContentExtractor}
@@ -33,19 +28,6 @@ import java.util.Arrays;
*/
public class NTriplesExtractor extends BaseRDFExtractor {
- public final static ExtractorFactory<NTriplesExtractor> factory =
- SimpleExtractorFactory.create(
- "rdf-nt",
- null,
- Arrays.asList(
- "text/nt;q=0.1",
- "text/ntriples;q=0.1",
- "text/plain;q=0.1"
- ),
- "example-ntriples.nt",
- NTriplesExtractor.class
- );
-
public NTriplesExtractor(boolean verifyDataType, boolean stopAtFirstError) {
super(verifyDataType, stopAtFirstError);
}
@@ -57,8 +39,9 @@ public class NTriplesExtractor extends B
this(false, false);
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return NTriplesExtractorFactory.getDescriptionInstance();
}
@Override
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,48 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.rdf;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class NTriplesExtractorFactory extends SimpleExtractorFactory<NTriplesExtractor> implements
+ ExtractorFactory<NTriplesExtractor> {
+
+ public static final String NAME = "rdf-nt";
+
+ public static final Prefixes PREFIXES = null;
+
+ private static final ExtractorDescription descriptionInstance = new NTriplesExtractorFactory();
+
+ public NTriplesExtractorFactory() {
+ super(
+ NTriplesExtractorFactory.NAME,
+ NTriplesExtractorFactory.PREFIXES,
+ Arrays.asList(
+ "text/nt;q=0.1",
+ "text/ntriples;q=0.1",
+ "text/plain;q=0.1"
+ ),
+ "example-ntriples.nt");
+ }
+
+ @Override
+ public NTriplesExtractor createExtractor() {
+ return new NTriplesExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java Sat Jan 19 21:47:22 2013
@@ -20,12 +20,7 @@ package org.apache.any23.extractor.rdf;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.openrdf.rio.RDFParser;
-import org.openrdf.rio.helpers.RDFParserBase;
-
-import java.util.Arrays;
/**
* Concrete implementation of {@link org.apache.any23.extractor.Extractor.ContentExtractor}
@@ -34,22 +29,6 @@ import java.util.Arrays;
*/
public class RDFXMLExtractor extends BaseRDFExtractor {
- public final static ExtractorFactory<RDFXMLExtractor> factory =
- SimpleExtractorFactory.create(
- "rdf-xml",
- null,
- Arrays.asList(
- "application/rdf+xml",
- "text/rdf",
- "text/rdf+xml",
- "application/rdf"
- // "application/xml;q=0.2",
- // "text/xml;q=0.2"
- ),
- "example-rdfxml.rdf",
- RDFXMLExtractor.class
- );
-
/**
* Constructor, allows to specify the validation and error handling policies.
*
@@ -69,8 +48,9 @@ public class RDFXMLExtractor extends Bas
this(true, true);
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return RDFXMLExtractorFactory.getDescriptionInstance();
}
@Override
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,51 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.rdf;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class RDFXMLExtractorFactory extends SimpleExtractorFactory<RDFXMLExtractor> implements
+ ExtractorFactory<RDFXMLExtractor> {
+
+ public static final String NAME = "rdf-xml";
+
+ public static final Prefixes PREFIXES = null;
+
+ private static final ExtractorDescription descriptionInstance = new RDFXMLExtractorFactory();
+
+ public RDFXMLExtractorFactory() {
+ super(
+ RDFXMLExtractorFactory.NAME,
+ RDFXMLExtractorFactory.PREFIXES,
+ Arrays.asList(
+ "application/rdf+xml",
+ "text/rdf",
+ "text/rdf+xml",
+ "application/rdf"
+ // "application/xml;q=0.2",
+ // "text/xml;q=0.2"
+ ),
+ "example-rdfxml.rdf");
+ }
+
+ @Override
+ public RDFXMLExtractor createExtractor() {
+ return new RDFXMLExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java Sat Jan 19 21:47:22 2013
@@ -20,12 +20,7 @@ package org.apache.any23.extractor.rdf;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.openrdf.rio.RDFParser;
-import org.openrdf.rio.helpers.RDFParserBase;
-
-import java.util.Arrays;
/**
* Concrete implementation of {@link org.apache.any23.extractor.Extractor.ContentExtractor}
@@ -35,17 +30,6 @@ import java.util.Arrays;
*/
public class TriXExtractor extends BaseRDFExtractor {
- public final static ExtractorFactory<TriXExtractor> factory =
- SimpleExtractorFactory.create(
- "rdf-trix",
- null,
- Arrays.asList(
- "application/trix"
- ),
- "example-trix.trx",
- TriXExtractor.class
- );
-
/**
* Constructor, allows to specify the validation and error handling policies.
*
@@ -65,8 +49,9 @@ public class TriXExtractor extends BaseR
this(true, true);
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return TriXExtractorFactory.getDescriptionInstance();
}
@Override
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,46 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.rdf;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class TriXExtractorFactory extends SimpleExtractorFactory<TriXExtractor> implements
+ ExtractorFactory<TriXExtractor> {
+
+ public static final String NAME = "rdf-trix";
+
+ public static final Prefixes PREFIXES = null;
+
+ private static final ExtractorDescription descriptionInstance = new TriXExtractorFactory();
+
+ public TriXExtractorFactory() {
+ super(
+ TriXExtractorFactory.NAME,
+ TriXExtractorFactory.PREFIXES,
+ Arrays.asList(
+ "application/trix"
+ ),
+ "example-trix.trx");
+ }
+
+ @Override
+ public TriXExtractor createExtractor() {
+ return new TriXExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractor.java Sat Jan 19 21:47:22 2013
@@ -20,12 +20,7 @@ package org.apache.any23.extractor.rdf;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.openrdf.rio.RDFParser;
-import org.openrdf.rio.helpers.RDFParserBase;
-
-import java.util.Arrays;
/**
*
@@ -35,22 +30,6 @@ import java.util.Arrays;
*/
public class TurtleExtractor extends BaseRDFExtractor {
- public static final ExtractorFactory<TurtleExtractor> factory =
- SimpleExtractorFactory.create(
- "rdf-turtle",
- null,
- Arrays.asList(
- "text/rdf+n3",
- "text/n3",
- "application/n3",
- "application/x-turtle",
- "application/turtle",
- "text/turtle"
- ),
- "example-turtle.ttl",
- TurtleExtractor.class
- );
-
/**
* Constructor, allows to specify the validation and error handling policies.
*
@@ -70,8 +49,9 @@ public class TurtleExtractor extends Bas
this(false, false);
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return TurtleExtractorFactory.getDescriptionInstance();
}
@Override
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,51 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.rdf;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class TurtleExtractorFactory extends SimpleExtractorFactory<TurtleExtractor> implements
+ ExtractorFactory<TurtleExtractor> {
+
+ public static final String NAME = "rdf-turtle";
+
+ public static final Prefixes PREFIXES = null;
+
+ private static final ExtractorDescription descriptionInstance = new TurtleExtractorFactory();
+
+ public TurtleExtractorFactory() {
+ super(
+ TurtleExtractorFactory.NAME,
+ TurtleExtractorFactory.PREFIXES,
+ Arrays.asList(
+ "text/rdf+n3",
+ "text/n3",
+ "application/n3",
+ "application/x-turtle",
+ "application/turtle",
+ "text/turtle"
+ ),
+ "example-turtle.ttl");
+ }
+
+ @Override
+ public TurtleExtractor createExtractor() {
+ return new TurtleExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java Sat Jan 19 21:47:22 2013
@@ -23,13 +23,10 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.w3c.dom.Document;
import java.io.IOException;
import java.net.URL;
-import java.util.Arrays;
/**
* {@link org.apache.any23.extractor.Extractor} implementation for
@@ -39,78 +36,73 @@ import java.util.Arrays;
*/
public class RDFa11Extractor implements Extractor.TagSoupDOMExtractor {
- public final static String NAME = "html-rdfa11";
-
- public final static ExtractorFactory<RDFa11Extractor> factory =
- SimpleExtractorFactory.create(
- NAME,
- null,
- Arrays.asList("text/html;q=0.3", "application/xhtml+xml;q=0.3"),
- "example-rdfa11.html",
- RDFa11Extractor.class
- );
-
private final RDFa11Parser parser;
- private boolean verifyDataType;
+ private boolean verifyDataType;
- private boolean stopAtFirstError;
+ private boolean stopAtFirstError;
- /**
- * Constructor, allows to specify the validation and error handling policies.
- *
- * @param verifyDataType if <code>true</code> the data types will be verified,
- * if <code>false</code> will be ignored.
- * @param stopAtFirstError if <code>true</code> the parser will stop at first parsing error,
- * if <code>false</code> will ignore non blocking errors.
- */
- public RDFa11Extractor(boolean verifyDataType, boolean stopAtFirstError) {
- this.parser = new RDFa11Parser();
- this.verifyDataType = verifyDataType;
- this.stopAtFirstError = stopAtFirstError;
- }
-
- /**
- * Default constructor, with no verification of data types and not stop at first error.
- */
- public RDFa11Extractor() {
- this(false, false);
- }
-
- public boolean isVerifyDataType() {
- return verifyDataType;
- }
-
- public void setVerifyDataType(boolean verifyDataType) {
- this.verifyDataType = verifyDataType;
- }
-
- public boolean isStopAtFirstError() {
- return stopAtFirstError;
- }
-
- public void setStopAtFirstError(boolean stopAtFirstError) {
- this.stopAtFirstError = stopAtFirstError;
- }
-
- public void run(
- ExtractionParameters extractionParameters,
- ExtractionContext extractionContext,
- Document in,
- ExtractionResult out
- ) throws IOException, ExtractionException {
- try {
- parser.processDocument( new URL(extractionContext.getDocumentURI().toString() ), in, out );
- } catch (RDFa11ParserException rpe) {
- throw new ExtractionException("Error while performing extraction.", rpe);
- }
- }
-
- /**
- * @return the {@link org.apache.any23.extractor.ExtractorDescription} of this extractor
- */
- public ExtractorDescription getDescription() {
- return factory;
- }
+ /**
+ * Constructor, allows to specify the validation and error handling
+ * policies.
+ *
+ * @param verifyDataType
+ * if <code>true</code> the data types will be verified, if
+ * <code>false</code> will be ignored.
+ * @param stopAtFirstError
+ * if <code>true</code> the parser will stop at first parsing
+ * error, if <code>false</code> will ignore non blocking errors.
+ */
+ public RDFa11Extractor(boolean verifyDataType, boolean stopAtFirstError) {
+ this.parser = new RDFa11Parser();
+ this.verifyDataType = verifyDataType;
+ this.stopAtFirstError = stopAtFirstError;
+ }
+
+ /**
+ * Default constructor, with no verification of data types and not stop at
+ * first error.
+ */
+ public RDFa11Extractor() {
+ this(false, false);
+ }
+
+ public boolean isVerifyDataType() {
+ return verifyDataType;
+ }
+
+ public void setVerifyDataType(boolean verifyDataType) {
+ this.verifyDataType = verifyDataType;
+ }
+
+ public boolean isStopAtFirstError() {
+ return stopAtFirstError;
+ }
+
+ public void setStopAtFirstError(boolean stopAtFirstError) {
+ this.stopAtFirstError = stopAtFirstError;
+ }
+
+ @Override
+ public void run(ExtractionParameters extractionParameters,
+ ExtractionContext extractionContext, Document in,
+ ExtractionResult out) throws IOException, ExtractionException {
+ try {
+ parser.processDocument(new URL(extractionContext.getDocumentURI()
+ .toString()), in, out);
+ } catch (RDFa11ParserException rpe) {
+ throw new ExtractionException("Error while performing extraction.",
+ rpe);
+ }
+ }
+
+ /**
+ * @return the {@link org.apache.any23.extractor.ExtractorDescription} of
+ * this extractor
+ */
+ @Override
+ public ExtractorDescription getDescription() {
+ return RDFa11ExtractorFactory.getDescriptionInstance();
+ }
}
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,44 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.rdfa;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class RDFa11ExtractorFactory extends SimpleExtractorFactory<RDFa11Extractor> implements
+ ExtractorFactory<RDFa11Extractor> {
+
+ public static final String NAME = "html-rdfa11";
+
+ public static final Prefixes PREFIXES = null;
+
+ private static final ExtractorDescription descriptionInstance = new RDFa11ExtractorFactory();
+
+ public RDFa11ExtractorFactory() {
+ super(
+ RDFa11ExtractorFactory.NAME,
+ RDFa11ExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.3", "application/xhtml+xml;q=0.3"),
+ "example-rdfa11.html");
+ }
+
+ @Override
+ public RDFa11Extractor createExtractor() {
+ return new RDFa11Extractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java Sat Jan 19 21:47:22 2013
@@ -23,8 +23,6 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.extractor.rdf.RDFParserFactory;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
import org.openrdf.rio.RDFHandlerException;
@@ -36,7 +34,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.StringReader;
import java.io.StringWriter;
-import java.util.Arrays;
/**
* Extractor for RDFa in HTML, based on Fabien Gadon's XSLT transform, found
@@ -56,15 +53,6 @@ public class RDFaExtractor implements Ta
private static XSLTStylesheet xslt = null;
- public final static ExtractorFactory<RDFaExtractor> factory =
- SimpleExtractorFactory.create(
- NAME,
- null,
- Arrays.asList("text/html;q=0.3", "application/xhtml+xml;q=0.3"),
- null,
- RDFaExtractor.class
- );
-
/**
* Returns a {@link XSLTStylesheet} able to distill RDFa from
* HTML pages.
@@ -125,6 +113,7 @@ public class RDFaExtractor implements Ta
this.stopAtFirstError = stopAtFirstError;
}
+ @Override
public void run(
ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
@@ -166,8 +155,9 @@ public class RDFaExtractor implements Ta
/**
* @return the {@link org.apache.any23.extractor.ExtractorDescription} of this extractor
*/
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return RDFaExtractorFactory.getDescriptionInstance();
}
}
\ No newline at end of file
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,44 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.rdfa;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class RDFaExtractorFactory extends SimpleExtractorFactory<RDFaExtractor> implements
+ ExtractorFactory<RDFaExtractor> {
+
+ public static final String NAME = "html-rdfa";
+
+ public static final Prefixes PREFIXES = null;
+
+ private static final ExtractorDescription descriptionInstance = new RDFaExtractorFactory();
+
+ public RDFaExtractorFactory() {
+ super(
+ RDFaExtractorFactory.NAME,
+ RDFaExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.3", "application/xhtml+xml;q=0.3"),
+ null);
+ }
+
+ @Override
+ public RDFaExtractor createExtractor() {
+ return new RDFaExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractor.java Sat Jan 19 21:47:22 2013
@@ -23,14 +23,11 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.openrdf.model.URI;
import org.w3c.dom.Document;
import java.io.IOException;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
/**
@@ -42,19 +39,11 @@ import java.util.List;
*/
public class XPathExtractor implements Extractor.TagSoupDOMExtractor {
- public final static String NAME = "html-xpath";
-
- public final static ExtractorFactory<XPathExtractor> factory =
- SimpleExtractorFactory.create(
- NAME,
- null,
- Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
- null,
- XPathExtractor.class
- );
-
private final List<XPathExtractionRule> xPathExtractionRules = new ArrayList<XPathExtractionRule>();
+ public XPathExtractor() {
+ }
+
public XPathExtractor(List<XPathExtractionRule> rules) {
xPathExtractionRules.addAll(rules);
}
@@ -71,6 +60,7 @@ public class XPathExtractor implements E
return xPathExtractionRules.contains(rule);
}
+ @Override
public void run(
ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
@@ -86,8 +76,9 @@ public class XPathExtractor implements E
}
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return XPathExtractorFactory.getDescriptionInstance();
}
}
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.xpath;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class XPathExtractorFactory extends SimpleExtractorFactory<XPathExtractor> implements
+ ExtractorFactory<XPathExtractor> {
+
+ public static final String NAME = "html-xpath";
+
+ public static final Prefixes PREFIXES = null;
+
+ private static final ExtractorDescription descriptionInstance = new XPathExtractorFactory();
+
+ public XPathExtractorFactory() {
+ super(
+ XPathExtractorFactory.NAME,
+ XPathExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
+ null);
+ }
+
+ @Override
+ public XPathExtractor createExtractor() {
+ return new XPathExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/filter/IgnoreTitlesOfEmptyDocuments.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/filter/IgnoreTitlesOfEmptyDocuments.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/filter/IgnoreTitlesOfEmptyDocuments.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/filter/IgnoreTitlesOfEmptyDocuments.java Sat Jan 19 21:47:22 2013
@@ -19,6 +19,7 @@ package org.apache.any23.filter;
import org.apache.any23.extractor.ExtractionContext;
import org.apache.any23.extractor.html.TitleExtractor;
+import org.apache.any23.extractor.html.TitleExtractorFactory;
import org.apache.any23.writer.TripleHandler;
import org.apache.any23.writer.TripleHandlerException;
import org.openrdf.model.Resource;
@@ -75,7 +76,7 @@ public class IgnoreTitlesOfEmptyDocument
}
private boolean isTitleContext(ExtractionContext context) {
- return context.getExtractorName().equals(TitleExtractor.NAME);
+ return context.getExtractorName().equals(TitleExtractorFactory.NAME);
}
public void endDocument(URI documentURI) throws TripleHandlerException {
Modified: any23/trunk/core/src/test/java/org/apache/any23/Any23Test.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/Any23Test.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/Any23Test.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/Any23Test.java Sat Jan 19 21:47:22 2013
@@ -43,6 +43,7 @@ import org.apache.any23.writer.Reporting
import org.apache.any23.writer.RepositoryWriter;
import org.apache.any23.writer.TripleHandler;
import org.apache.any23.writer.TripleHandlerException;
+import org.apache.commons.io.IOUtils;
import org.junit.Test;
import org.openrdf.model.Statement;
import org.openrdf.repository.RepositoryConnection;
@@ -481,7 +482,7 @@ public class Any23Test extends Any23Onli
@Test
public void testMicrodataSupport() throws Exception {
- final String htmlWithMicrodata = StreamUtils.asString(
+ final String htmlWithMicrodata = IOUtils.toString(
this.getClass().getResourceAsStream("/microdata/microdata-basic.html")
);
assertExtractorActivation(htmlWithMicrodata, MicrodataExtractor.class);
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -39,7 +39,7 @@ public class CSVExtractorTest extends Ab
@Override
protected ExtractorFactory<?> getExtractorFactory() {
- return CSVExtractor.factory;
+ return new CSVExtractorFactory();
}
@Test
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractor.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractor.java Sat Jan 19 21:47:22 2013
@@ -22,16 +22,12 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.vocab.FOAF;
import org.apache.any23.extractor.Extractor.BlindExtractor;
import org.openrdf.model.URI;
import org.openrdf.model.vocabulary.RDF;
import java.io.IOException;
-import java.util.Collections;
/**
* Example concrete implementation of {@link org.apache.any23.extractor.Extractor.BlindExtractor}.
@@ -40,6 +36,7 @@ public class ExampleExtractor implements
private static final FOAF vFOAF = FOAF.getInstance();
+ @Override
public void run(
ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
@@ -50,16 +47,8 @@ public class ExampleExtractor implements
out.writeTriple(documentURI, RDF.TYPE, vFOAF.Document);
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return ExampleExtractorFactory.getDescriptionInstance();
}
-
- public static final ExtractorFactory<ExampleExtractor> factory =
- SimpleExtractorFactory.create(
- "example",
- PopularPrefixes.createSubset("rdf", "foaf"),
- Collections.singleton("*/*;q=0.01"),
- "http://example.com/",
- ExampleExtractor.class
- );
}
Added: any23/trunk/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java (added)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,46 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.example;
+
+import java.util.Collections;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+// NOTE: Not enabling this in META-INF/services
+//@MetaInfServices(ExtractorFactory.class)
+public class ExampleExtractorFactory extends SimpleExtractorFactory<ExampleExtractor> implements
+ ExtractorFactory<ExampleExtractor> {
+
+ public static final String NAME = "example";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "foaf");
+
+ private static final ExtractorDescription descriptionInstance = new ExampleExtractorFactory();
+
+ public ExampleExtractorFactory() {
+ super(
+ ExampleExtractorFactory.NAME,
+ ExampleExtractorFactory.PREFIXES,
+ Collections.singleton("*/*;q=0.01"),
+ "http://example.com/");
+ }
+
+ @Override
+ public ExampleExtractor createExtractor() {
+ return new ExampleExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/AdrExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/AdrExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/AdrExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/AdrExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -38,7 +38,7 @@ public class AdrExtractorTest extends Ab
private static final VCARD vVCARD = VCARD.getInstance();
protected ExtractorFactory<?> getExtractorFactory() {
- return AdrExtractor.factory;
+ return new AdrExtractorFactory();
}
@Test
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCalendarExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCalendarExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCalendarExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCalendarExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -52,7 +52,7 @@ public class HCalendarExtractorTest exte
private final static URI vtodo = vICAL.Vtodo;
protected ExtractorFactory<?> getExtractorFactory() {
- return HCalendarExtractor.factory;
+ return new HCalendarExtractorFactory();
}
@Test
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -44,7 +44,7 @@ public class HCardExtractorTest extends
private static final VCARD vVCARD = VCARD.getInstance();
protected ExtractorFactory<?> getExtractorFactory() {
- return HCardExtractor.factory;
+ return new HCardExtractorFactory();
}
@Test
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -46,7 +46,7 @@ public class HListingExtractorTest exten
private static final Logger logger = LoggerFactory.getLogger(HListingExtractorTest.class);
protected ExtractorFactory<?> getExtractorFactory() {
- return HListingExtractor.factory;
+ return new HListingExtractorFactory();
}
@Test
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HRecipeExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HRecipeExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HRecipeExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HRecipeExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -36,7 +36,7 @@ public class HRecipeExtractorTest extend
@Override
protected ExtractorFactory<?> getExtractorFactory() {
- return HRecipeExtractor.factory;
+ return new HRecipeExtractorFactory();
}
@Test
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HResumeExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HResumeExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HResumeExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HResumeExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -51,7 +51,7 @@ public class HResumeExtractorTest extend
private static final Logger logger = LoggerFactory.getLogger(HReviewExtractorTest.class);
protected ExtractorFactory<?> getExtractorFactory() {
- return HResumeExtractor.factory;
+ return new HResumeExtractorFactory();
}
@Test
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HReviewExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HReviewExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HReviewExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HReviewExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -48,7 +48,7 @@ public class HReviewExtractorTest extend
private static final Logger logger = LoggerFactory.getLogger(HReviewExtractorTest.class);
protected ExtractorFactory<?> getExtractorFactory() {
- return HReviewExtractor.factory;
+ return new HReviewExtractorFactory();
}
@Test
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HTMLMetaExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HTMLMetaExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HTMLMetaExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HTMLMetaExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -33,7 +33,7 @@ public class HTMLMetaExtractorTest exten
private static final SINDICE vSINDICE = SINDICE.getInstance();
protected ExtractorFactory<?> getExtractorFactory() {
- return HTMLMetaExtractor.factory;
+ return new HTMLMetaExtractorFactory();
}
@Test
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HeadLinkExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HeadLinkExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HeadLinkExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HeadLinkExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -33,7 +33,7 @@ public class HeadLinkExtractorTest exten
@Override
protected ExtractorFactory<?> getExtractorFactory() {
- return HeadLinkExtractor.factory;
+ return new HeadLinkExtractorFactory();
}
@Test
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/LicenseExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/LicenseExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/LicenseExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/LicenseExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -44,7 +44,7 @@ public class LicenseExtractorTest extend
private URI apache = RDFUtils.uri("http://www.apache.org/licenses/LICENSE-2.0");
public ExtractorFactory<?> getExtractorFactory() {
- return LicenseExtractor.factory;
+ return new LicenseExtractorFactory();
}
@Test
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/RDFMergerTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/RDFMergerTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/RDFMergerTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/RDFMergerTest.java Sat Jan 19 21:47:22 2013
@@ -436,7 +436,7 @@ public class RDFMergerTest extends Abstr
InputStream input = new BufferedInputStream(this.getClass().getResourceAsStream(filename));
Document document = new TagSoupParser(input, baseURI.stringValue()).getDOM();
- HCardExtractor hCardExtractor = HCardExtractor.factory.createExtractor();
+ HCardExtractor hCardExtractor = new HCardExtractorFactory().createExtractor();
ExtractionContext hcExtractionContext = new ExtractionContext(
hCardExtractor.getDescription().getExtractorName(),
baseURI
@@ -451,7 +451,7 @@ public class RDFMergerTest extends Abstr
new RepositoryWriter(getConnection())
)
);
- XFNExtractor xfnExtractor = XFNExtractor.factory.createExtractor();
+ XFNExtractor xfnExtractor = new XFNExtractorFactory().createExtractor();
ExtractionContext xfnExtractionContext = new ExtractionContext(
xfnExtractor.getDescription().getExtractorName(),
baseURI
@@ -473,7 +473,7 @@ public class RDFMergerTest extends Abstr
InputStream input = new BufferedInputStream(this.getClass().getResourceAsStream(filename));
Document document = new TagSoupParser(input, baseURI.stringValue()).getDOM();
- HCardExtractor hCardExtractor = HCardExtractor.factory.createExtractor();
+ HCardExtractor hCardExtractor = new HCardExtractorFactory().createExtractor();
ExtractionContext hCardExtractionContext = new ExtractionContext(
hCardExtractor.getDescription().getExtractorName(), baseURI
);
@@ -487,7 +487,7 @@ public class RDFMergerTest extends Abstr
)
);
- GeoExtractor geoExtractor = GeoExtractor.factory.createExtractor();
+ GeoExtractor geoExtractor = new GeoExtractorFactory().createExtractor();
ExtractionContext geoExtractionContext = new ExtractionContext(
geoExtractor.getDescription().getExtractorName(), baseURI
);
@@ -502,7 +502,7 @@ public class RDFMergerTest extends Abstr
)
);
- AdrExtractor adrExtractor = AdrExtractor.factory.createExtractor();
+ AdrExtractor adrExtractor = new AdrExtractorFactory().createExtractor();
ExtractionContext adrExtractionContext = new ExtractionContext(
adrExtractor.getDescription().getExtractorName(), baseURI
);
@@ -523,7 +523,7 @@ public class RDFMergerTest extends Abstr
extractHCardAndRelated(filename);
InputStream input = new BufferedInputStream(this.getClass().getResourceAsStream(filename));
Document document = new TagSoupParser(input, baseURI.stringValue()).getDOM();
- HReviewExtractor hReviewExtractor = HReviewExtractor.factory.createExtractor();
+ HReviewExtractor hReviewExtractor = new HReviewExtractorFactory().createExtractor();
ExtractionContext hreviewExtractionContext = new ExtractionContext(
hReviewExtractor.getDescription().getExtractorName(), baseURI
);
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/SpeciesExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/SpeciesExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/SpeciesExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/SpeciesExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -36,8 +36,9 @@ public class SpeciesExtractorTest extend
private static final Logger logger = LoggerFactory.getLogger(SpeciesExtractorTest.class);
+ @Override
protected ExtractorFactory<?> getExtractorFactory() {
- return SpeciesExtractor.factory;
+ return new SpeciesExtractorFactory();
}
/**
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/TitleExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/TitleExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/TitleExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/TitleExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -36,8 +36,9 @@ public class TitleExtractorTest extends
private Literal helloLiteral = RDFUtils.literal("Hello World!");
+ @Override
protected ExtractorFactory<?> getExtractorFactory() {
- return TitleExtractor.factory;
+ return new TitleExtractorFactory();
}
@Test
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/TurtleHTMLExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/TurtleHTMLExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/TurtleHTMLExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/TurtleHTMLExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -38,7 +38,7 @@ public class TurtleHTMLExtractorTest ext
@Override
protected ExtractorFactory<?> getExtractorFactory() {
- return TurtleHTMLExtractor.factory;
+ return new TurtleHTMLExtractorFactory();
}
/**
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/html/XFNExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/html/XFNExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/html/XFNExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/html/XFNExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -47,7 +47,7 @@ public class XFNExtractorTest extends Ab
private final static URI charliesHomepage = RDFUtils.uri("http://charlie.example.com/");
protected ExtractorFactory<?> getExtractorFactory() {
- return XFNExtractor.factory;
+ return new XFNExtractorFactory();
}
@Test
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -52,7 +52,7 @@ public class MicrodataExtractorTest exte
@Override
protected ExtractorFactory<?> getExtractorFactory() {
- return MicrodataExtractor.factory;
+ return new MicrodataExtractorFactory();
}
/**
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -231,7 +231,7 @@ public class RDFa11ExtractorTest extends
@Override
protected ExtractorFactory<?> getExtractorFactory() {
- return RDFa11Extractor.factory;
+ return new RDFa11ExtractorFactory();
}
}
Modified: any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -74,7 +74,7 @@ public class RDFaExtractorTest extends A
@Override
protected ExtractorFactory<?> getExtractorFactory() {
- return RDFaExtractor.factory;
+ return new RDFaExtractorFactory();
}
}
Modified: any23/trunk/core/src/test/java/org/apache/any23/plugin/Any23PluginManagerTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/test/java/org/apache/any23/plugin/Any23PluginManagerTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/test/java/org/apache/any23/plugin/Any23PluginManagerTest.java (original)
+++ any23/trunk/core/src/test/java/org/apache/any23/plugin/Any23PluginManagerTest.java Sat Jan 19 21:47:22 2013
@@ -31,6 +31,7 @@ import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.any23.cli.Tool;
+import org.apache.any23.extractor.ExtractorFactory;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
@@ -62,8 +63,8 @@ public class Any23PluginManagerTest {
@Test
public void testGetPlugins() throws IOException {
- Iterator<ExtractorPlugin> extractorPlugins = manager.getExtractors();
- assertFalse(extractorPlugins.hasNext());
+ Iterator<ExtractorFactory> extractorPlugins = manager.getExtractors();
+ assertTrue(extractorPlugins.hasNext());
}
// TODO: move in FileUtils
Modified: any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java (original)
+++ any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java Sat Jan 19 21:47:22 2013
@@ -28,8 +28,7 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.vocab.SINDICE;
import org.kohsuke.MetaInfServices;
import org.openrdf.model.URI;
@@ -39,7 +38,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.List;
/**
@@ -51,8 +49,6 @@ import java.util.List;
@MetaInfServices( value = Extractor.class )
public class HTMLScraperExtractor implements Extractor.ContentExtractor {
- public final static String NAME = "html-scraper";
-
public final static URI PAGE_CONTENT_DE_PROPERTY =
ValueFactoryImpl.getInstance().createURI(SINDICE.NS + "pagecontent/de");
public final static URI PAGE_CONTENT_AE_PROPERTY =
@@ -62,15 +58,6 @@ public class HTMLScraperExtractor implem
public final static URI PAGE_CONTENT_CE_PROPERTY =
ValueFactoryImpl.getInstance().createURI(SINDICE.NS + "pagecontent/ce");
- protected final static ExtractorFactory<HTMLScraperExtractor> factory =
- SimpleExtractorFactory.create(
- NAME,
- null,
- Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
- null,
- HTMLScraperExtractor.class
- );
-
private final List<ExtractionRule> extractionRules = new ArrayList<ExtractionRule>();
public HTMLScraperExtractor() {
@@ -89,6 +76,7 @@ public class HTMLScraperExtractor implem
return extractors.toArray( new String[extractors.size()] );
}
+ @Override
public void run(
ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
@@ -110,10 +98,12 @@ public class HTMLScraperExtractor implem
}
}
- public ExtractorFactory getDescription() {
- return factory;
+ @Override
+ public ExtractorDescription getDescription() {
+ return HTMLScraperExtractorFactory.getDescriptionInstance();
}
+ @Override
public void setStopAtFirstError(boolean b) {
// Ignored.
}