You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2013/01/19 22:47:24 UTC
svn commit: r1435720 [3/3] - in /any23/trunk:
api/src/main/java/org/apache/any23/extractor/
api/src/main/java/org/apache/any23/plugin/ core/src/main/assembly/
core/src/main/java/org/apache/any23/cli/
core/src/main/java/org/apache/any23/extractor/ core/...
Added: any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorFactory.java (added)
+++ any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,44 @@
+/**
+ *
+ */
+package org.apache.any23.plugin.htmlscraper;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class HTMLScraperExtractorFactory extends SimpleExtractorFactory<HTMLScraperExtractor> implements
+ ExtractorFactory<HTMLScraperExtractor> {
+
+ public static final String NAME = "html-scraper";
+
+ public static final Prefixes PREFIXES = null;
+
+ private static final ExtractorDescription descriptionInstance = new HTMLScraperExtractorFactory();
+
+ public HTMLScraperExtractorFactory() {
+ super(
+ HTMLScraperExtractorFactory.NAME,
+ HTMLScraperExtractorFactory.PREFIXES,
+ Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
+ null);
+ }
+
+ @Override
+ public HTMLScraperExtractor createExtractor() {
+ return new HTMLScraperExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java (original)
+++ any23/trunk/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -50,7 +50,7 @@ public class HTMLScraperExtractorTest {
@Before
public void setUp() {
- extractor = (HTMLScraperExtractor) new HTMLScraperPlugin().getExtractorFactory().createExtractor();
+ extractor = new HTMLScraperExtractorFactory().createExtractor();
}
@After
Modified: any23/trunk/plugins/integration-test/pom.xml
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/integration-test/pom.xml?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/plugins/integration-test/pom.xml (original)
+++ any23/trunk/plugins/integration-test/pom.xml Sat Jan 19 21:47:22 2013
@@ -59,6 +59,11 @@
<artifactId>junit</artifactId>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>org.slf4j</groupId>
+ <artifactId>slf4j-log4j12</artifactId>
+ <scope>test</scope>
+ </dependency>
<!-- END: Test Dependencies -->
</dependencies>
Modified: any23/trunk/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java (original)
+++ any23/trunk/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java Sat Jan 19 21:47:22 2013
@@ -41,7 +41,7 @@ import static org.junit.Assert.assertTru
*/
public class PluginIT {
- private static final int NUM_OF_EXTRACTORS = 23;
+ private static final int NUM_OF_EXTRACTORS = 28;
private static final String PLUGIN_DIR = "target/plugins-build/";
@@ -76,13 +76,13 @@ public class PluginIT {
@Test
public void testDetectExtractorPlugins() throws IOException, InstantiationException, IllegalAccessException {
final ExtractorGroup extractorGroup = manager.getApplicableExtractors(
- ExtractorRegistryImpl.getInstance(),
+ new ExtractorRegistryImpl(),
HTML_SCRAPER_TARGET_DIR, // Required to satisfy class dependencies.
HTML_SCRAPER_DEPENDENCY_DIR,
OFFICE_SCRAPER_TARGET_DIR
, OFFICE_SCRAPER_DEPENDENCY_DIR // Required to satisfy class dependencies.
);
- assertEquals(NUM_OF_EXTRACTORS + 2, // HTMLScraper Plugin, OfficeScraper Plugin.
+ assertEquals(NUM_OF_EXTRACTORS , // HTMLScraper Plugin, OfficeScraper Plugin.
extractorGroup.getNumOfExtractors()
);
}
Added: any23/trunk/plugins/integration-test/src/test/resources/log4j.properties
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/integration-test/src/test/resources/log4j.properties?rev=1435720&view=auto
==============================================================================
--- any23/trunk/plugins/integration-test/src/test/resources/log4j.properties (added)
+++ any23/trunk/plugins/integration-test/src/test/resources/log4j.properties Sat Jan 19 21:47:22 2013
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootCategory=INFO, stdout
+
+log4j.appender.stdout.Threshold=INFO
+
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+
+log4j.appender.stdout.layout.ConversionPattern=%5p [%t] %m%n
Modified: any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java (original)
+++ any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java Sat Jan 19 21:47:22 2013
@@ -57,20 +57,6 @@ public class ExcelExtractor implements E
private boolean stopAtFirstError = false;
- public final static ExtractorFactory<ExcelExtractor> factory =
- SimpleExtractorFactory.create(
- "excel",
- null,
- Arrays.asList(
- "application/vnd.ms-excel;q=0.1",
- "application/msexcel;q=0.1",
- "application/x-msexcel;q=0.1",
- "application/x-ms-excel;q=0.1"
- ),
- null,
- ExcelExtractor.class
- );
-
public ExcelExtractor() {}
public boolean isStopAtFirstError() {
@@ -84,7 +70,7 @@ public class ExcelExtractor implements E
@Override
public ExtractorDescription getDescription() {
- return factory;
+ return ExcelExtractorFactory.getDescriptionInstance();
}
@Override
Added: any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractorFactory.java (added)
+++ any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,49 @@
+/**
+ *
+ */
+package org.apache.any23.plugin.officescraper;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class ExcelExtractorFactory extends SimpleExtractorFactory<ExcelExtractor> implements
+ ExtractorFactory<ExcelExtractor> {
+
+ public static final String NAME = "excel";
+
+ public static final Prefixes PREFIXES = null;
+
+ private static final ExtractorDescription descriptionInstance = new ExcelExtractorFactory();
+
+ public ExcelExtractorFactory() {
+ super(
+ ExcelExtractorFactory.NAME,
+ ExcelExtractorFactory.PREFIXES,
+ Arrays.asList(
+ "application/vnd.ms-excel;q=0.1",
+ "application/msexcel;q=0.1",
+ "application/x-msexcel;q=0.1",
+ "application/x-ms-excel;q=0.1"
+ ),
+ null);
+ }
+
+ @Override
+ public ExcelExtractor createExtractor() {
+ return new ExcelExtractor();
+ }
+
+ public static ExtractorDescription getDescriptionInstance() {
+ return descriptionInstance;
+ }
+}
Modified: any23/trunk/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java (original)
+++ any23/trunk/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -57,7 +57,7 @@ public class ExcelExtractorTest {
@Before
public void setUp() {
- extractor = new ExcelExtractor();
+ extractor = new ExcelExtractorFactory().createExtractor();
}
@Test