You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2013/01/19 22:47:24 UTC
svn commit: r1435720 [1/3] - in /any23/trunk:
api/src/main/java/org/apache/any23/extractor/
api/src/main/java/org/apache/any23/plugin/ core/src/main/assembly/
core/src/main/java/org/apache/any23/cli/
core/src/main/java/org/apache/any23/extractor/ core/...
Author: lewismc
Date: Sat Jan 19 21:47:22 2013
New Revision: 1435720
URL: http://svn.apache.org/viewvc?rev=1435720&view=rev
Log:
ANY23-26 part1 - Improvement to spi-extractors
Added:
any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TitleExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/XFNExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractorFactory.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractorFactory.java
any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorFactory.java
any23/trunk/plugins/integration-test/src/test/resources/log4j.properties
any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractorFactory.java
Modified:
any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorDescription.java
any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorFactory.java
any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorRegistry.java
any23/trunk/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java
any23/trunk/api/src/main/java/org/apache/any23/plugin/ExtractorPlugin.java
any23/trunk/core/src/main/assembly/bin.xml
any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
any23/trunk/core/src/main/java/org/apache/any23/cli/PluginVerifier.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/SpeciesExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TitleExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/TurtleHTMLExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/html/XFNExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/microdata/MicrodataExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NQuadsExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/NTriplesExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/RDFXMLExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TriXExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdf/TurtleExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFa11Extractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/rdfa/RDFaExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/extractor/xpath/XPathExtractor.java
any23/trunk/core/src/main/java/org/apache/any23/filter/IgnoreTitlesOfEmptyDocuments.java
any23/trunk/core/src/test/java/org/apache/any23/Any23Test.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/csv/CSVExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/example/ExampleExtractor.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/AdrExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCalendarExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HCardExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HListingExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HRecipeExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HResumeExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HReviewExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HTMLMetaExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/HeadLinkExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/LicenseExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/RDFMergerTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/SpeciesExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/TitleExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/TurtleHTMLExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/html/XFNExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/microdata/MicrodataExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFa11ExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/extractor/rdfa/RDFaExtractorTest.java
any23/trunk/core/src/test/java/org/apache/any23/plugin/Any23PluginManagerTest.java
any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractor.java
any23/trunk/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java
any23/trunk/plugins/integration-test/pom.xml
any23/trunk/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java
any23/trunk/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java
Modified: any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorDescription.java
URL: http://svn.apache.org/viewvc/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorDescription.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorDescription.java (original)
+++ any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorDescription.java Sat Jan 19 21:47:22 2013
@@ -35,6 +35,13 @@ public interface ExtractorDescription {
String getExtractorName();
/**
+ * Returns the label for extractors created from this factory.
+ *
+ * @return A string label describing the type of extractors created from this factory.
+ */
+ String getExtractorLabel();
+
+ /**
* An instance defining the prefixes supported by this extractor.
*
* @return prefixes instance.
Modified: any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorFactory.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorFactory.java (original)
+++ any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -29,13 +29,6 @@ import java.util.Collection;
public interface ExtractorFactory<T extends Extractor<?>> extends ExtractorDescription {
/**
- * Returns the extractor type.
- *
- * @return the not <code>null</code> extractor class.
- */
- Class<T> getExtractorType();
-
- /**
* Creates an extractor instance.
*
* @return an instance of the extractor associated to this factory.
Modified: any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorRegistry.java
URL: http://svn.apache.org/viewvc/any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorRegistry.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorRegistry.java (original)
+++ any23/trunk/api/src/main/java/org/apache/any23/extractor/ExtractorRegistry.java Sat Jan 19 21:47:22 2013
@@ -81,4 +81,6 @@ public interface ExtractorRegistry {
*/
List<String> getAllNames();
+ void unregister(String name);
+
}
\ No newline at end of file
Modified: any23/trunk/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java
URL: http://svn.apache.org/viewvc/any23/trunk/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java (original)
+++ any23/trunk/api/src/main/java/org/apache/any23/plugin/Any23PluginManager.java Sat Jan 19 21:47:22 2013
@@ -52,11 +52,6 @@ public class Any23PluginManager {
public static final String CLI_PACKAGE = Tool.class.getPackage().getName();
/**
- * Any23 Plugins package.
- */
- public static final String PLUGINS_PACKAGE = ExtractorPlugin.class.getPackage().getName();
-
- /**
* Property where look for plugins.
*/
public static final String PLUGIN_DIRS_PROPERTY = "any23.plugin.dirs";
@@ -263,8 +258,8 @@ public class Any23PluginManager {
* @return not <code>null</code> list of plugin classes.
* @throws IOException
*/
- public synchronized Iterator<ExtractorPlugin> getExtractors() throws IOException {
- return getPlugins(ExtractorPlugin.class);
+ public synchronized Iterator<ExtractorFactory> getExtractors() throws IOException {
+ return getPlugins(ExtractorFactory.class);
}
/**
@@ -297,7 +292,6 @@ public class Any23PluginManager {
* Configures a new list of extractors containing the extractors declared in <code>initialExtractorGroup</code>
* and also the extractors detected in classpath specified by <code>pluginLocations</code>.
*
- * @param initialExtractorGroup initial list of extractors.
* @param pluginLocations
* @return full list of extractors.
* @throws java.io.IOException
@@ -305,10 +299,10 @@ public class Any23PluginManager {
* @throws InstantiationException
*/
public synchronized ExtractorGroup configureExtractors(
- final ExtractorGroup initialExtractorGroup,
+ //final ExtractorGroup initialExtractorGroup,
final File... pluginLocations
) throws IOException, IllegalAccessException, InstantiationException {
- if (initialExtractorGroup == null) throw new NullPointerException("inExtractorGroup cannot be null");
+ //if (initialExtractorGroup == null) throw new NullPointerException("inExtractorGroup cannot be null");
final String pluginsReport = loadPlugins(pluginLocations);
logger.info(pluginsReport);
@@ -316,9 +310,9 @@ public class Any23PluginManager {
final StringBuilder report = new StringBuilder();
try {
final List<ExtractorFactory<?>> newFactoryList = new ArrayList<ExtractorFactory<?>>();
- Iterator<ExtractorPlugin> extractors = getExtractors();
+ Iterator<ExtractorFactory> extractors = getExtractors();
while (extractors.hasNext()) {
- ExtractorFactory<?> factory = extractors.next().getExtractorFactory();
+ ExtractorFactory<?> factory = extractors.next();
report.append("\n - found plugin: ").append(factory.getExtractorName()).append("\n");
@@ -329,9 +323,9 @@ public class Any23PluginManager {
report.append("\n=== No plugins have been found.===\n");
}
- for (ExtractorFactory<?> extractorFactory : initialExtractorGroup) {
- newFactoryList.add(extractorFactory);
- }
+ //for (ExtractorFactory<?> extractorFactory : initialExtractorGroup) {
+ // newFactoryList.add(extractorFactory);
+ //}
return new ExtractorGroup(newFactoryList);
} finally {
@@ -353,7 +347,7 @@ public class Any23PluginManager {
throws IOException, InstantiationException, IllegalAccessException {
final String pluginDirs = DefaultConfiguration.singleton().getPropertyOrFail(PLUGIN_DIRS_PROPERTY);
final File[] pluginLocations = getPluginLocations(pluginDirs);
- return configureExtractors(initialExtractorGroup, pluginLocations);
+ return configureExtractors(pluginLocations);
}
/**
@@ -369,8 +363,7 @@ public class Any23PluginManager {
*/
public synchronized ExtractorGroup getApplicableExtractors(ExtractorRegistry registry, File... pluginLocations)
throws IOException, IllegalAccessException, InstantiationException {
- final ExtractorGroup defaultExtractors = registry.getExtractorGroup();
- return configureExtractors(defaultExtractors, pluginLocations);
+ return configureExtractors(pluginLocations);
}
/**
Modified: any23/trunk/api/src/main/java/org/apache/any23/plugin/ExtractorPlugin.java
URL: http://svn.apache.org/viewvc/any23/trunk/api/src/main/java/org/apache/any23/plugin/ExtractorPlugin.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/api/src/main/java/org/apache/any23/plugin/ExtractorPlugin.java (original)
+++ any23/trunk/api/src/main/java/org/apache/any23/plugin/ExtractorPlugin.java Sat Jan 19 21:47:22 2013
@@ -25,7 +25,9 @@ import org.apache.any23.extractor.Extrac
* extractor plugin that can be detected and registered from the library classpath.
*
* @author Michele Mostarda (mostarda@fbk.eu)
+ * @deprecated ExtractorFactory now supports META-INF/services discovery, deprecating this class.
*/
+@Deprecated
public interface ExtractorPlugin<T extends Extractor<?>> {
/**
Modified: any23/trunk/core/src/main/assembly/bin.xml
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/assembly/bin.xml?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/assembly/bin.xml (original)
+++ any23/trunk/core/src/main/assembly/bin.xml Sat Jan 19 21:47:22 2013
@@ -25,7 +25,11 @@
</formats>
<includeBaseDirectory>true</includeBaseDirectory>
<baseDirectory>${project.build.finalName}</baseDirectory>
-
+ <containerDescriptorHandlers>
+ <containerDescriptorHandler>
+ <handlerName>metaInf-services</handlerName>
+ </containerDescriptorHandler>
+ </containerDescriptorHandlers>
<files>
<file>
<source>${basedir}/src/main/assembly/README.txt</source>
Modified: any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/cli/ExtractorDocumentation.java Sat Jan 19 21:47:22 2013
@@ -91,7 +91,7 @@ public class ExtractorDocumentation impl
*/
public void printExtractorList(ExtractorRegistry registry) {
for (ExtractorFactory factory : registry.getExtractorGroup()) {
- System.out.println( String.format("%25s [%15s]", factory.getExtractorName(), factory.getExtractorType()));
+ System.out.println( String.format("%25s [%15s]", factory.getExtractorName(), factory.getExtractorLabel()));
}
}
Modified: any23/trunk/core/src/main/java/org/apache/any23/cli/PluginVerifier.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/cli/PluginVerifier.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/cli/PluginVerifier.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/cli/PluginVerifier.java Sat Jan 19 21:47:22 2013
@@ -64,7 +64,7 @@ public class PluginVerifier implements T
pluginManager.loadJARDir(pluginsDir);
- final Iterator<ExtractorPlugin> plugins = pluginManager.getExtractors();
+ final Iterator<ExtractorFactory> plugins = pluginManager.getExtractors();
while (plugins.hasNext()) {
printPluginData(plugins.next(), System.out);
@@ -80,10 +80,8 @@ public class PluginVerifier implements T
return sb.toString();
}
- private void printPluginData(ExtractorPlugin instance, PrintStream ps) {
- final Author authorAnnotation = instance.getClass().getAnnotation(Author.class);
- final ExtractorFactory<?> extractorFactory = instance.getExtractorFactory();
- ps.printf("Plugin class : %s\n", instance.getClass());
+ private void printPluginData(ExtractorFactory extractorFactory, PrintStream ps) {
+ final Author authorAnnotation = extractorFactory.getClass().getAnnotation(Author.class);
ps.printf("Plugin author : %s\n", authorAnnotation == null ? "<unknown>" : authorAnnotation.name());
ps.printf("Plugin factory : %s\n", extractorFactory.getClass());
ps.printf("Plugin mime-types: %s\n", getMimeTypesStr( extractorFactory.getSupportedMIMETypes() ));
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/ExtractorRegistryImpl.java Sat Jan 19 21:47:22 2013
@@ -18,54 +18,31 @@
package org.apache.any23.extractor;
import org.apache.any23.configuration.DefaultConfiguration;
-import org.apache.any23.extractor.csv.CSVExtractor;
-import org.apache.any23.extractor.html.AdrExtractor;
-import org.apache.any23.extractor.html.GeoExtractor;
-import org.apache.any23.extractor.html.HCalendarExtractor;
-import org.apache.any23.extractor.html.HCardExtractor;
-import org.apache.any23.extractor.html.HListingExtractor;
-import org.apache.any23.extractor.html.HRecipeExtractor;
-import org.apache.any23.extractor.html.HResumeExtractor;
-import org.apache.any23.extractor.html.HReviewExtractor;
-import org.apache.any23.extractor.html.HTMLMetaExtractor;
-import org.apache.any23.extractor.html.HeadLinkExtractor;
-import org.apache.any23.extractor.html.ICBMExtractor;
-import org.apache.any23.extractor.html.LicenseExtractor;
-import org.apache.any23.extractor.html.SpeciesExtractor;
-import org.apache.any23.extractor.html.TitleExtractor;
-import org.apache.any23.extractor.html.TurtleHTMLExtractor;
-import org.apache.any23.extractor.html.XFNExtractor;
-import org.apache.any23.extractor.microdata.MicrodataExtractor;
-import org.apache.any23.extractor.rdf.NQuadsExtractor;
-import org.apache.any23.extractor.rdf.NTriplesExtractor;
-import org.apache.any23.extractor.rdf.RDFXMLExtractor;
-import org.apache.any23.extractor.rdf.TriXExtractor;
-import org.apache.any23.extractor.rdf.TurtleExtractor;
-import org.apache.any23.extractor.rdfa.RDFa11Extractor;
-import org.apache.any23.extractor.rdfa.RDFaExtractor;
+import org.apache.any23.extractor.html.HTMLMetaExtractorFactory;
+import org.apache.any23.extractor.rdfa.RDFa11ExtractorFactory;
+import org.apache.any23.extractor.rdfa.RDFaExtractorFactory;
import java.util.ArrayList;
import java.util.Collections;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
/**
* Singleton class acting as a register for all the various
* {@link Extractor}.
*/
-public class ExtractorRegistryImpl implements ExtractorRegistry {
+public class ExtractorRegistryImpl extends info.aduna.lang.service.ServiceRegistry<String, ExtractorFactory> implements ExtractorRegistry {
/**
- * The instance.
+ * Public constructor for ExtractorRegistryImpl. Should normally call getInstance.
*/
- private static ExtractorRegistry instance = null;
+ public ExtractorRegistryImpl() {
+ super(ExtractorFactory.class);
+ }
/**
- * maps containing the related {@link ExtractorFactory} for each
- * registered {@link Extractor}.
+ * The instance.
*/
- private Map<String, ExtractorFactory<?>> factories = new HashMap<String, ExtractorFactory<?>>();
+ private static ExtractorRegistry instance = null;
/**
* @return returns the {@link ExtractorRegistry} instance.
@@ -77,35 +54,42 @@ public class ExtractorRegistryImpl imple
if (instance == null) {
instance = new ExtractorRegistryImpl();
// FIXME: Remove these hardcoded links to the extractor factories by turning them into SPI interfaces
- instance.register(RDFXMLExtractor.factory);
- instance.register(TurtleExtractor.factory);
- instance.register(NTriplesExtractor.factory);
- instance.register(NQuadsExtractor.factory);
- instance.register(TriXExtractor.factory);
+ //instance.register(RDFXMLExtractor.factory);
+ //instance.register(TurtleExtractor.factory);
+ //instance.register(NTriplesExtractor.factory);
+ //instance.register(NQuadsExtractor.factory);
+ //instance.register(TriXExtractor.factory);
+ //instance.register(HeadLinkExtractor.factory);
+ //instance.register(LicenseExtractor.factory);
+ //instance.register(TitleExtractor.factory);
+ //instance.register(XFNExtractor.factory);
+ //instance.register(ICBMExtractor.factory);
+ //instance.register(AdrExtractor.factory);
+ //instance.register(GeoExtractor.factory);
+ //instance.register(HCalendarExtractor.factory);
+ //instance.register(HCardExtractor.factory);
+ //instance.register(HListingExtractor.factory);
+ //instance.register(HResumeExtractor.factory);
+ //instance.register(HReviewExtractor.factory);
+ //instance.register(HRecipeExtractor.factory);
+ //instance.register(SpeciesExtractor.factory);
+ //instance.register(TurtleHTMLExtractor.factory);
+ //instance.register(MicrodataExtractor.factory);
+ //instance.register(CSVExtractor.factory);
+
if(conf.getFlagProperty("any23.extraction.rdfa.programmatic")) {
- instance.register(RDFa11Extractor.factory);
+ instance.unregister(RDFaExtractorFactory.NAME);
+ // FIXME: Unregister RDFaExtractor if flag is not set
+ //instance.register(RDFa11Extractor.factory);
} else {
- instance.register(RDFaExtractor.factory);
+ instance.unregister(RDFa11ExtractorFactory.NAME);
+ // FIXME: Unregister RDFaExtractor if flag is set
+ //instance.register(RDFaExtractor.factory);
}
- instance.register(HeadLinkExtractor.factory);
- instance.register(LicenseExtractor.factory);
- instance.register(TitleExtractor.factory);
- instance.register(XFNExtractor.factory);
- instance.register(ICBMExtractor.factory);
- instance.register(AdrExtractor.factory);
- instance.register(GeoExtractor.factory);
- instance.register(HCalendarExtractor.factory);
- instance.register(HCardExtractor.factory);
- instance.register(HListingExtractor.factory);
- instance.register(HResumeExtractor.factory);
- instance.register(HReviewExtractor.factory);
- instance.register(HRecipeExtractor.factory);
- instance.register(SpeciesExtractor.factory);
- instance.register(TurtleHTMLExtractor.factory);
- instance.register(MicrodataExtractor.factory);
- instance.register(CSVExtractor.factory);
- if(conf.getFlagProperty("any23.extraction.head.meta")) {
- instance.register(HTMLMetaExtractor.factory);
+ if(!conf.getFlagProperty("any23.extraction.head.meta")) {
+ instance.unregister(HTMLMetaExtractorFactory.NAME);
+ // FIXME: Unregister HTMLMetaExtractor if this flag is not set
+ //instance.register(HTMLMetaExtractor.factory);
}
}
}
@@ -119,14 +103,23 @@ public class ExtractorRegistryImpl imple
* @throws IllegalArgumentException if trying to register a {@link ExtractorFactory}
* with a that already exists in the registry.
*/
+ @Override
public void register(ExtractorFactory<?> factory) {
- if (factories.containsKey(factory.getExtractorName())) {
- throw new IllegalArgumentException(String.format("Extractor name clash: %s",
- factory.getExtractorName()));
+ this.add(factory);
+ }
+
+ /**
+ * Unregisters the {@link ExtractorFactory} with the given name.
+ *
+ * @param name The name of the ExtractorFactory to unregister.
+ */
+ @Override
+ public void unregister(String name) {
+ if(this.has(name)) {
+ this.remove(this.get(name));
}
- factories.put(factory.getExtractorName(), factory);
}
-
+
/**
*
* Retrieves a {@link ExtractorFactory} given its name
@@ -136,17 +129,20 @@ public class ExtractorRegistryImpl imple
* @throws IllegalArgumentException if there is not a
* {@link ExtractorFactory} associated to the provided name.
*/
+ @Override
public ExtractorFactory<?> getFactory(String name) {
- if (!factories.containsKey(name)) {
+ ExtractorFactory<?> result = this.get(name);
+ if (result == null) {
throw new IllegalArgumentException("Unregistered extractor name: " + name);
}
- return factories.get(name);
+ return result;
}
/**
* @return an {@link ExtractorGroup} with all the registered
* {@link Extractor}.
*/
+ @Override
public ExtractorGroup getExtractorGroup() {
return getExtractorGroup(getAllNames());
}
@@ -157,6 +153,7 @@ public class ExtractorRegistryImpl imple
* @param names a {@link java.util.List} containing the names of the desired {@link ExtractorFactory}.
* @return the extraction group.
*/
+ @Override
public ExtractorGroup getExtractorGroup(List<String> names) {
List<ExtractorFactory<?>> members = new ArrayList<ExtractorFactory<?>>(names.size());
for (String name : names) {
@@ -171,17 +168,24 @@ public class ExtractorRegistryImpl imple
* @return <code>true</code> if is there a {@link ExtractorFactory}
* associated to the provided name.
*/
+ @Override
public boolean isRegisteredName(String name) {
- return factories.containsKey(name);
+ return this.has(name);
}
/**
* Returns the names of all registered extractors, sorted alphabetically.
*/
+ @Override
public List<String> getAllNames() {
- List<String> result = new ArrayList<String>(factories.keySet());
+ List<String> result = new ArrayList<String>(this.getKeys());
Collections.sort(result);
return result;
}
+ @Override
+ protected String getKey(ExtractorFactory service) {
+ return service.getExtractorName();
+ }
+
}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/SimpleExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -28,45 +28,28 @@ import java.util.Collection;
*
* @param <T> the type of the {@link Extractor} served by this factory.
*/
-public class SimpleExtractorFactory<T extends Extractor<?>> implements ExtractorFactory<T> {
+public abstract class SimpleExtractorFactory<T extends Extractor<?>> implements ExtractorFactory<T> {
private final String name;
private final Prefixes prefixes;
- private final Collection<MIMEType> supportedMIMETypes = new ArrayList<MIMEType>();
+ private Collection<MIMEType> supportedMIMETypes = new ArrayList<MIMEType>();
- private final String exampleInput;
+ private String exampleInput;
- private final Class<T> extractorClass;
-
/**
- * Creates an instance of a {@link ExtractorFactory} serving concrete implementation
- * instances of {@link Extractor}.
- *
- * @param name of the {@link Extractor}.
- * @param prefixes handled {@link org.apache.any23.rdf.Prefixes}.
- * @param supportedMIMETypes collection of supported MIME Types.
- * @param exampleInput a string acting as a input example.
- * @param extractorClass concrete implementation class of the {@link Extractor}.
- * @param <S> the concrete type of the {@link Extractor}.
- * @return an {@link ExtractorFactory}.
+ * @return the name of the {@link Extractor}
*/
- public static <S extends Extractor<?>> ExtractorFactory<S> create(
- String name,
- Prefixes prefixes,
- Collection<String> supportedMIMETypes,
- String exampleInput,
- Class<S> extractorClass
- ) {
- return new SimpleExtractorFactory<S>(name, prefixes, supportedMIMETypes, exampleInput, extractorClass);
+ public String getExtractorName() {
+ return name;
}
/**
- * @return the name of the {@link Extractor}
+ * @return the label of the {@link Extractor}
*/
- public String getExtractorName() {
- return name;
+ public String getExtractorLabel() {
+ return this.getClass().getName();
}
/**
@@ -83,25 +66,6 @@ public class SimpleExtractorFactory<T ex
return supportedMIMETypes;
}
- @Override
- public Class<T> getExtractorType() {
- return extractorClass;
- }
-
- /**
- * @return an instance of type T concrete implementation of {@link Extractor}
- */
- @Override
- public T createExtractor() {
- try {
- return extractorClass.newInstance();
- } catch (IllegalAccessException ex) {
- throw new RuntimeException("Zero-argument constructor not public?", ex);
- } catch (InstantiationException ex) {
- throw new RuntimeException("Non-instantiable type?", ex);
- }
- }
-
/**
* @return an input example
*/
@@ -110,12 +74,18 @@ public class SimpleExtractorFactory<T ex
return exampleInput;
}
- private SimpleExtractorFactory(
+ protected SimpleExtractorFactory(
+ String name,
+ Prefixes prefixes) {
+ this.name = name;
+ this.prefixes = prefixes;
+ }
+
+ protected SimpleExtractorFactory(
String name,
Prefixes prefixes,
Collection<String> supportedMIMETypes,
- String exampleInput,
- Class<T> extractorClass
+ String exampleInput
) {
this.name = name;
this.prefixes = (prefixes == null) ? Prefixes.EMPTY : prefixes;
@@ -123,7 +93,6 @@ public class SimpleExtractorFactory<T ex
this.supportedMIMETypes.add(MIMEType.parse(type));
}
this.exampleInput = exampleInput;
- this.extractorClass = extractorClass;
}
}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractor.java Sat Jan 19 21:47:22 2013
@@ -59,17 +59,6 @@ public class CSVExtractor implements Ext
private CSV csv = CSV.getInstance();
- public final static ExtractorFactory<CSVExtractor> factory =
- SimpleExtractorFactory.create(
- "csv",
- null,
- Arrays.asList(
- "text/csv;q=0.1"
- ),
- "example-csv.csv",
- CSVExtractor.class
- );
-
/**
* {@inheritDoc}
*/
@@ -300,7 +289,8 @@ public class CSVExtractor implements Ext
/**
* {@inheritDoc}
*/
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return CSVExtractorFactory.getDescriptionInstance();
}
}
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/csv/CSVExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,46 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.csv;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class CSVExtractorFactory extends SimpleExtractorFactory<CSVExtractor> implements
+ ExtractorFactory<CSVExtractor> {
+
+ public static final String NAME = "csv";
+
+ public static final Prefixes PREFIXES = null;
+
+ private static final ExtractorDescription descriptionInstance = new CSVExtractorFactory();
+
+ public CSVExtractorFactory() {
+ super(
+ CSVExtractorFactory.NAME,
+ CSVExtractorFactory.PREFIXES,
+ Arrays.asList(
+ "text/csv;q=0.1"
+ ),
+ "example-csv.csv");
+ }
+
+ @Override
+ public CSVExtractor createExtractor() {
+ return new CSVExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/AdrExtractor.java Sat Jan 19 21:47:22 2013
@@ -19,17 +19,12 @@ package org.apache.any23.extractor.html;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.extractor.TagSoupExtractionResult;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.vocab.VCARD;
import org.openrdf.model.BNode;
import org.openrdf.model.vocabulary.RDF;
import org.w3c.dom.Node;
-import java.util.Arrays;
-
/**
* Extractor for the <a href="http://microformats.org/wiki/adr">adr</a>
* microformat.
@@ -89,16 +84,9 @@ public class AdrExtractor extends Entity
return true;
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return AdrExtractorFactory.getDescriptionInstance();
}
- public final static ExtractorFactory<AdrExtractor> factory =
- SimpleExtractorFactory.create(
- "html-mf-adr",
- PopularPrefixes.createSubset("rdf", "vcard"),
- Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
- "example-mf-adr.html",
- AdrExtractor.class
- );
}
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/AdrExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class AdrExtractorFactory extends SimpleExtractorFactory<AdrExtractor> implements
+ ExtractorFactory<AdrExtractor> {
+
+ public static final String NAME = "html-mf-adr";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "vcard");
+
+ private static final ExtractorDescription descriptionInstance = new AdrExtractorFactory();
+
+ public AdrExtractorFactory() {
+ super(
+ AdrExtractorFactory.NAME,
+ AdrExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+ "example-mf-adr.html");
+ }
+
+ @Override
+ public AdrExtractor createExtractor() {
+ return new AdrExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/GeoExtractor.java Sat Jan 19 21:47:22 2013
@@ -19,18 +19,12 @@ package org.apache.any23.extractor.html;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.extractor.TagSoupExtractionResult;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.vocab.VCARD;
import org.openrdf.model.BNode;
import org.openrdf.model.vocabulary.RDF;
import org.w3c.dom.Node;
-import java.util.Arrays;
-
-
/**
* Extractor for the <a href="http://microformats.org/wiki/geo">Geo</a>
* microformat.
@@ -41,17 +35,9 @@ public class GeoExtractor extends Entity
private static final VCARD vVCARD = VCARD.getInstance();
- public static final ExtractorFactory<GeoExtractor> factory =
- SimpleExtractorFactory.create(
- "html-mf-geo",
- PopularPrefixes.createSubset("rdf", "vcard"),
- Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
- "example-mf-geo.html",
- GeoExtractor.class
- );
-
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return GeoExtractorFactory.getDescriptionInstance();
}
protected String getBaseClassName() {
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/GeoExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class GeoExtractorFactory extends SimpleExtractorFactory<GeoExtractor> implements
+ ExtractorFactory<GeoExtractor> {
+
+ public static final String NAME = "html-mf-geo";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "vcard");
+
+ private static final ExtractorDescription descriptionInstance = new GeoExtractorFactory();
+
+ public GeoExtractorFactory() {
+ super(
+ GeoExtractorFactory.NAME,
+ GeoExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+ "example-mf-geo.html");
+ }
+
+ @Override
+ public GeoExtractor createExtractor() {
+ return new GeoExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractor.java Sat Jan 19 21:47:22 2013
@@ -19,10 +19,7 @@ package org.apache.any23.extractor.html;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.extractor.TagSoupExtractionResult;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.ICAL;
import org.openrdf.model.BNode;
@@ -33,7 +30,6 @@ import org.w3c.dom.Node;
import javax.xml.datatype.DatatypeConfigurationException;
import java.text.ParseException;
-import java.util.Arrays;
import java.util.List;
import static org.apache.any23.extractor.html.HTMLDocument.TextField;
@@ -49,14 +45,6 @@ public class HCalendarExtractor extends
private static final ICAL vICAL = ICAL.getInstance();
- public final static ExtractorFactory<HCalendarExtractor> factory =
- SimpleExtractorFactory.create(
- "html-mf-hcalendar",
- PopularPrefixes.createSubset("rdf", "ical"),
- Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
- "example-mf-hcalendar.html",
- HCalendarExtractor.class);
-
private static final String[] Components = {"Vevent", "Vtodo", "Vjournal", "Vfreebusy"};
private static final String DATE_FORMAT = "yyyyMMdd'T'HHmm'Z'";
@@ -75,8 +63,9 @@ public class HCalendarExtractor extends
"dtend",
};
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return HCalendarExtractorFactory.getDescriptionInstance();
}
@Override
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCalendarExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class HCalendarExtractorFactory extends SimpleExtractorFactory<HCalendarExtractor> implements
+ ExtractorFactory<HCalendarExtractor> {
+
+ public static final String NAME = "html-mf-hcalendar";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "ical");
+
+ private static final ExtractorDescription descriptionInstance = new HCalendarExtractorFactory();
+
+ public HCalendarExtractorFactory() {
+ super(
+ HCalendarExtractorFactory.NAME,
+ HCalendarExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+ "example-mf-hcalendar.html");
+ }
+
+ @Override
+ public HCalendarExtractor createExtractor() {
+ return new HCalendarExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractor.java Sat Jan 19 21:47:22 2013
@@ -21,13 +21,10 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
import org.apache.any23.extractor.IssueReport;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.extractor.TagSoupExtractionResult;
import org.apache.any23.extractor.html.annotations.Includes;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.vocab.VCARD;
import org.apache.commons.lang.StringUtils;
-import org.apache.any23.extractor.ExtractorFactory;
import org.openrdf.model.BNode;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
@@ -36,7 +33,6 @@ import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import java.util.ArrayList;
-import java.util.Arrays;
import java.util.Collection;
import java.util.List;
@@ -58,17 +54,9 @@ public class HCardExtractor extends Enti
private HTMLDocument fragment;
- public final static ExtractorFactory<HCardExtractor> factory =
- SimpleExtractorFactory.create(
- "html-mf-hcard",
- PopularPrefixes.createSubset("rdf", "vcard"),
- Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
- "example-mf-hcard.html",
- HCardExtractor.class
- );
-
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return HCardExtractorFactory.getDescriptionInstance();
}
@Override
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HCardExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class HCardExtractorFactory extends SimpleExtractorFactory<HCardExtractor> implements
+ ExtractorFactory<HCardExtractor> {
+
+ public static final String NAME = "html-mf-hcard";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "vcard");
+
+ private static final ExtractorDescription descriptionInstance = new HCardExtractorFactory();
+
+ public HCardExtractorFactory() {
+ super(
+ HCardExtractorFactory.NAME,
+ HCardExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+ "example-mf-hcard.html");
+ }
+
+ @Override
+ public HCardExtractor createExtractor() {
+ return new HCardExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HListingExtractor.java Sat Jan 19 21:47:22 2013
@@ -20,10 +20,7 @@ package org.apache.any23.extractor.html;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.extractor.TagSoupExtractionResult;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.vocab.FOAF;
import org.apache.any23.vocab.HLISTING;
import org.openrdf.model.BNode;
@@ -77,17 +74,9 @@ public class HListingExtractor extends E
private HTMLDocument fragment;
- public final static ExtractorFactory<HListingExtractor> factory =
- SimpleExtractorFactory.create(
- "html-mf-hlisting",
- PopularPrefixes.createSubset("rdf", "hlisting"),
- Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
- "example-mf-hlisting.html",
- HListingExtractor.class
- );
-
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return HListingExtractorFactory.getDescriptionInstance();
}
protected String getBaseClassName() {
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HListingExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class HListingExtractorFactory extends SimpleExtractorFactory<HListingExtractor> implements
+ ExtractorFactory<HListingExtractor> {
+
+ public static final String NAME = "html-mf-hlisting";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hlisting");
+
+ private static final ExtractorDescription descriptionInstance = new HListingExtractorFactory();
+
+ public HListingExtractorFactory() {
+ super(
+ HListingExtractorFactory.NAME,
+ HListingExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+ "example-mf-hlisting.html");
+ }
+
+ @Override
+ public HListingExtractor createExtractor() {
+ return new HListingExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractor.java Sat Jan 19 21:47:22 2013
@@ -20,17 +20,12 @@ package org.apache.any23.extractor.html;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.vocab.HRECIPE;
import org.openrdf.model.BNode;
import org.openrdf.model.URI;
import org.openrdf.model.vocabulary.RDF;
import org.w3c.dom.Node;
-import java.util.Arrays;
-
/**
* Extractor for the <a href="http://microformats.org/wiki/hrecipe">hRecipe</a>
* microformat.
@@ -41,19 +36,9 @@ public class HRecipeExtractor extends En
private static final HRECIPE vHRECIPE = HRECIPE.getInstance();
- public final static ExtractorFactory<HRecipeExtractor> factory =
- SimpleExtractorFactory.create(
- "html-mf-hrecipe",
- PopularPrefixes.createSubset("rdf", "hrecipe"),
- Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
- "example-mf-hrecipe.html",
- HRecipeExtractor.class
- );
-
-
@Override
public ExtractorDescription getDescription() {
- return factory;
+ return HRecipeExtractorFactory.getDescriptionInstance();
}
@Override
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HRecipeExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class HRecipeExtractorFactory extends SimpleExtractorFactory<HRecipeExtractor> implements
+ ExtractorFactory<HRecipeExtractor> {
+
+ public static final String NAME = "html-mf-hrecipe";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "hrecipe");
+
+ private static final ExtractorDescription descriptionInstance = new HRecipeExtractorFactory();
+
+ public HRecipeExtractorFactory() {
+ super(
+ HRecipeExtractorFactory.NAME,
+ HRecipeExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+ "example-mf-hrecipe.html");
+ }
+
+ @Override
+ public HRecipeExtractor createExtractor() {
+ return new HRecipeExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractor.java Sat Jan 19 21:47:22 2013
@@ -19,10 +19,7 @@ package org.apache.any23.extractor.html;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.extractor.TagSoupExtractionResult;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.vocab.DOAC;
import org.apache.any23.vocab.FOAF;
import org.openrdf.model.BNode;
@@ -30,7 +27,6 @@ import org.openrdf.model.Resource;
import org.openrdf.model.vocabulary.RDF;
import org.w3c.dom.Node;
-import java.util.Arrays;
import java.util.List;
/**
@@ -44,19 +40,12 @@ public class HResumeExtractor extends En
private static final FOAF vFOAF = FOAF.getInstance();
private static final DOAC vDOAC = DOAC.getInstance();
- public final static ExtractorFactory<HResumeExtractor> factory =
- SimpleExtractorFactory.create(
- "html-mf-hresume",
- PopularPrefixes.createSubset("rdf", "doac", "foaf"),
- Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
- "example-mf-hresume.html",
- HResumeExtractor.class
- );
-
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return HResumeExtractorFactory.getDescriptionInstance();
}
+ @Override
public String getBaseClassName() {
return "hresume";
}
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HResumeExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class HResumeExtractorFactory extends SimpleExtractorFactory<HResumeExtractor> implements
+ ExtractorFactory<HResumeExtractor> {
+
+ public static final String NAME = "html-mf-hresume";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "doac", "foaf");
+
+ private static final ExtractorDescription descriptionInstance = new HResumeExtractorFactory();
+
+ public HResumeExtractorFactory() {
+ super(
+ HResumeExtractorFactory.NAME,
+ HResumeExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+ "example-mf-hresume.html");
+ }
+
+ @Override
+ public HResumeExtractor createExtractor() {
+ return new HResumeExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractor.java Sat Jan 19 21:47:22 2013
@@ -20,10 +20,7 @@ package org.apache.any23.extractor.html;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.extractor.TagSoupExtractionResult;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.vocab.DCTERMS;
import org.apache.any23.vocab.REVIEW;
import org.apache.any23.vocab.VCARD;
@@ -32,7 +29,6 @@ import org.openrdf.model.Resource;
import org.openrdf.model.vocabulary.RDF;
import org.w3c.dom.Node;
-import java.util.Arrays;
import java.util.List;
import static org.apache.any23.extractor.html.HTMLDocument.TextField;
@@ -49,19 +45,12 @@ public class HReviewExtractor extends En
private static final VCARD vVCARD = VCARD.getInstance();
private static final DCTERMS vDCTERMS = DCTERMS.getInstance();
- public final static ExtractorFactory<HReviewExtractor> factory =
- SimpleExtractorFactory.create(
- "html-mf-hreview",
- PopularPrefixes.createSubset("rdf", "vcard", "rev"),
- Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
- "example-mf-hreview.html",
- HReviewExtractor.class
- );
-
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return HReviewExtractorFactory.getDescriptionInstance();
}
+ @Override
protected String getBaseClassName() {
return "hreview";
}
@@ -71,6 +60,7 @@ public class HReviewExtractor extends En
// Empty.
}
+ @Override
protected boolean extractEntity(Node node, ExtractionResult out) throws ExtractionException {
BNode rev = getBlankNodeFor(node);
out.writeTriple(rev, RDF.TYPE, vREVIEW.Review);
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HReviewExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class HReviewExtractorFactory extends SimpleExtractorFactory<HReviewExtractor> implements
+ ExtractorFactory<HReviewExtractor> {
+
+ public static final String NAME = "html-mf-hreview";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("rdf", "vcard", "rev");
+
+ private static final ExtractorDescription descriptionInstance = new HReviewExtractorFactory();
+
+ public HReviewExtractorFactory() {
+ super(
+ HReviewExtractorFactory.NAME,
+ HReviewExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.1", "application/xhtml+xml;q=0.1"),
+ "example-mf-hreview.html");
+ }
+
+ @Override
+ public HReviewExtractor createExtractor() {
+ return new HReviewExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractor.java Sat Jan 19 21:47:22 2013
@@ -23,9 +23,6 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.Extractor;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.rdf.RDFUtils;
import org.apache.any23.vocab.SINDICE;
import org.openrdf.model.URI;
@@ -36,7 +33,6 @@ import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import java.io.IOException;
-import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@@ -51,8 +47,6 @@ import java.util.Set;
*/
public class HTMLMetaExtractor implements Extractor.TagSoupDOMExtractor {
- public static final String NAME = "html-head-meta";
-
private static final SINDICE vSINDICE = SINDICE.getInstance();
private URI profile;
@@ -61,18 +55,10 @@ public class HTMLMetaExtractor implement
private String documentLang;
- public final static ExtractorFactory<HTMLMetaExtractor> factory =
- SimpleExtractorFactory.create(
- NAME,
- PopularPrefixes.createSubset("sindice"),
- Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
- "example-meta.html",
- HTMLMetaExtractor.class
- );
-
/**
* {@inheritDoc}
*/
+ @Override
public void run(
ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
@@ -173,8 +159,9 @@ public class HTMLMetaExtractor implement
return null;
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return HTMLMetaExtractorFactory.getDescriptionInstance();
}
private class Meta {
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HTMLMetaExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class HTMLMetaExtractorFactory extends SimpleExtractorFactory<HTMLMetaExtractor> implements
+ ExtractorFactory<HTMLMetaExtractor> {
+
+ public static final String NAME = "html-head-meta";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("sindice");
+
+ private static final ExtractorDescription descriptionInstance = new HTMLMetaExtractorFactory();
+
+ public HTMLMetaExtractorFactory() {
+ super(
+ HTMLMetaExtractorFactory.NAME,
+ HTMLMetaExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
+ "example-meta.html");
+ }
+
+ @Override
+ public HTMLMetaExtractor createExtractor() {
+ return new HTMLMetaExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractor.java Sat Jan 19 21:47:22 2013
@@ -22,9 +22,6 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.vocab.XHTML;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
import org.openrdf.model.URI;
@@ -34,7 +31,6 @@ import org.w3c.dom.Document;
import org.w3c.dom.Node;
import java.io.IOException;
-import java.util.Arrays;
import java.util.List;
/**
@@ -75,7 +71,7 @@ public class HeadLinkExtractor implement
if (title != null && !"".equals(title)) {
out.writeTriple(
href,
- factory.getPrefixes().expand("dcterms:title"),
+ getDescription().getPrefixes().expand("dcterms:title"),
vf.createLiteral(title)
);
}
@@ -83,22 +79,16 @@ public class HeadLinkExtractor implement
if (type != null && !"".equals(type)) {
out.writeTriple(
href,
- factory.getPrefixes().expand("dcterms:format"),
+ getDescription().getPrefixes().expand("dcterms:format"),
vf.createLiteral(type)
);
}
}
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return HeadLinkExtractorFactory.getDescriptionInstance();
}
- public final static ExtractorFactory<HeadLinkExtractor> factory =
- SimpleExtractorFactory.create(
- "html-head-links",
- PopularPrefixes.createSubset("xhtml", "dcterms"),
- Arrays.asList("text/html;q=0.05", "application/xhtml+xml;q=0.05"),
- "example-head-link.html",
- HeadLinkExtractor.class);
}
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/HeadLinkExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class HeadLinkExtractorFactory extends SimpleExtractorFactory<HeadLinkExtractor> implements
+ ExtractorFactory<HeadLinkExtractor> {
+
+ public static final String NAME = "html-head-links";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("xhtml", "dcterms");
+
+ private static final ExtractorDescription descriptionInstance = new HeadLinkExtractorFactory();
+
+ public HeadLinkExtractorFactory() {
+ super(
+ HeadLinkExtractorFactory.NAME,
+ HeadLinkExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.05", "application/xhtml+xml;q=0.05"),
+ "example-head-link.html");
+ }
+
+ @Override
+ public HeadLinkExtractor createExtractor() {
+ return new HeadLinkExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractor.java Sat Jan 19 21:47:22 2013
@@ -22,10 +22,7 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
-import org.apache.any23.extractor.SimpleExtractorFactory;
import org.apache.any23.rdf.Any23ValueFactoryWrapper;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
import org.openrdf.model.BNode;
import org.openrdf.model.URI;
@@ -34,7 +31,6 @@ import org.openrdf.model.impl.ValueFacto
import org.w3c.dom.Document;
import java.io.IOException;
-import java.util.Arrays;
/**
* Extractor for "ICBM coordinates" provided as META headers in the head
@@ -45,15 +41,7 @@ import java.util.Arrays;
*/
public class ICBMExtractor implements TagSoupDOMExtractor {
- public final static ExtractorFactory<ICBMExtractor> factory =
- SimpleExtractorFactory.create(
- "html-head-icbm",
- PopularPrefixes.createSubset("geo", "rdf"),
- Arrays.asList("text/html;q=0.01", "application/xhtml+xml;q=0.01"),
- "example-icbm.html",
- ICBMExtractor.class
- );
-
+ @Override
public void run(
ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
@@ -83,11 +71,12 @@ public class ICBMExtractor implements Ta
}
private URI expand(String curie) {
- return factory.getPrefixes().expand(curie);
+ return getDescription().getPrefixes().expand(curie);
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return ICBMExtractorFactory.getDescriptionInstance();
}
}
\ No newline at end of file
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/ICBMExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class ICBMExtractorFactory extends SimpleExtractorFactory<ICBMExtractor> implements
+ ExtractorFactory<ICBMExtractor> {
+
+ public static final String NAME = "html-head-icbm";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("geo", "rdf");
+
+ private static final ExtractorDescription descriptionInstance = new ICBMExtractorFactory();
+
+ public ICBMExtractorFactory() {
+ super(
+ ICBMExtractorFactory.NAME,
+ ICBMExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.01", "application/xhtml+xml;q=0.01"),
+ "example-icbm.html");
+ }
+
+ @Override
+ public ICBMExtractor createExtractor() {
+ return new ICBMExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java (original)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractor.java Sat Jan 19 21:47:22 2013
@@ -22,10 +22,7 @@ import org.apache.any23.extractor.Extrac
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractionResult;
import org.apache.any23.extractor.ExtractorDescription;
-import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.IssueReport;
-import org.apache.any23.extractor.SimpleExtractorFactory;
-import org.apache.any23.rdf.PopularPrefixes;
import org.apache.any23.vocab.XHTML;
import org.apache.any23.extractor.Extractor.TagSoupDOMExtractor;
import org.openrdf.model.URI;
@@ -33,7 +30,6 @@ import org.w3c.dom.Document;
import org.w3c.dom.Node;
import java.io.IOException;
-import java.util.Arrays;
/**
* Extractor for the <a href="http://microformats.org/wiki/rel-license">rel-license</a>
@@ -47,15 +43,7 @@ public class LicenseExtractor implements
private static final XHTML vXHTML = XHTML.getInstance();
- public final static ExtractorFactory<LicenseExtractor> factory =
- SimpleExtractorFactory.create(
- "html-mf-license",
- PopularPrefixes.createSubset("xhtml"),
- Arrays.asList("text/html;q=0.01", "application/xhtml+xml;q=0.01"),
- "example-mf-license.html",
- LicenseExtractor.class
- );
-
+ @Override
public void run(
ExtractionParameters extractionParameters,
ExtractionContext extractionContext,
@@ -81,8 +69,9 @@ public class LicenseExtractor implements
}
}
+ @Override
public ExtractorDescription getDescription() {
- return factory;
+ return LicenseExtractorFactory.getDescriptionInstance();
}
}
Added: any23/trunk/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java (added)
+++ any23/trunk/core/src/main/java/org/apache/any23/extractor/html/LicenseExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,45 @@
+/**
+ *
+ */
+package org.apache.any23.extractor.html;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.PopularPrefixes;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class LicenseExtractorFactory extends SimpleExtractorFactory<LicenseExtractor> implements
+ ExtractorFactory<LicenseExtractor> {
+
+ public static final String NAME = "html-mf-license";
+
+ public static final Prefixes PREFIXES = PopularPrefixes.createSubset("xhtml");
+
+ private static final ExtractorDescription descriptionInstance = new LicenseExtractorFactory();
+
+ public LicenseExtractorFactory() {
+ super(
+ LicenseExtractorFactory.NAME,
+ LicenseExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.01", "application/xhtml+xml;q=0.01"),
+ "example-mf-license.html");
+ }
+
+ @Override
+ public LicenseExtractor createExtractor() {
+ return new LicenseExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}