You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@any23.apache.org by le...@apache.org on 2013/01/19 22:47:24 UTC

svn commit: r1435720 [3/3] - in /any23/trunk: api/src/main/java/org/apache/any23/extractor/ api/src/main/java/org/apache/any23/plugin/ core/src/main/assembly/ core/src/main/java/org/apache/any23/cli/ core/src/main/java/org/apache/any23/extractor/ core/...

Added: any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorFactory.java (added)
+++ any23/trunk/plugins/html-scraper/src/main/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,44 @@
+/**
+ * 
+ */
+package org.apache.any23.plugin.htmlscraper;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class HTMLScraperExtractorFactory extends SimpleExtractorFactory<HTMLScraperExtractor> implements
+        ExtractorFactory<HTMLScraperExtractor> {
+
+    public static final String NAME = "html-scraper";
+    
+    public static final Prefixes PREFIXES = null;
+
+    private static final ExtractorDescription descriptionInstance = new HTMLScraperExtractorFactory();
+    
+    public HTMLScraperExtractorFactory() {
+        super(
+                HTMLScraperExtractorFactory.NAME, 
+                HTMLScraperExtractorFactory.PREFIXES,
+                Arrays.asList("text/html;q=0.02", "application/xhtml+xml;q=0.02"),
+                null);
+    }
+    
+    @Override
+    public HTMLScraperExtractor createExtractor() {
+        return new HTMLScraperExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

Modified: any23/trunk/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java (original)
+++ any23/trunk/plugins/html-scraper/src/test/java/org/apache/any23/plugin/htmlscraper/HTMLScraperExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -50,7 +50,7 @@ public class HTMLScraperExtractorTest {
 
     @Before
     public void setUp() {
-        extractor = (HTMLScraperExtractor) new HTMLScraperPlugin().getExtractorFactory().createExtractor();
+        extractor = new HTMLScraperExtractorFactory().createExtractor();
     }
 
     @After

Modified: any23/trunk/plugins/integration-test/pom.xml
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/integration-test/pom.xml?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/plugins/integration-test/pom.xml (original)
+++ any23/trunk/plugins/integration-test/pom.xml Sat Jan 19 21:47:22 2013
@@ -59,6 +59,11 @@
       <artifactId>junit</artifactId>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.slf4j</groupId>
+      <artifactId>slf4j-log4j12</artifactId>
+      <scope>test</scope>
+    </dependency>
     <!-- END: Test Dependencies -->
 
   </dependencies>

Modified: any23/trunk/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java (original)
+++ any23/trunk/plugins/integration-test/src/test/java/org/apache/any23/plugin/PluginIT.java Sat Jan 19 21:47:22 2013
@@ -41,7 +41,7 @@ import static org.junit.Assert.assertTru
  */
 public class PluginIT {
 
-    private static final int NUM_OF_EXTRACTORS = 23;
+    private static final int NUM_OF_EXTRACTORS = 28;
 
     private static final String PLUGIN_DIR = "target/plugins-build/";
 
@@ -76,13 +76,13 @@ public class PluginIT {
     @Test
     public void testDetectExtractorPlugins() throws IOException, InstantiationException, IllegalAccessException {
         final ExtractorGroup extractorGroup = manager.getApplicableExtractors(
-                ExtractorRegistryImpl.getInstance(),
+                new ExtractorRegistryImpl(),
                 HTML_SCRAPER_TARGET_DIR,  // Required to satisfy class dependencies.
                 HTML_SCRAPER_DEPENDENCY_DIR,
                 OFFICE_SCRAPER_TARGET_DIR
 , OFFICE_SCRAPER_DEPENDENCY_DIR // Required to satisfy class dependencies.
         );
-        assertEquals(NUM_OF_EXTRACTORS + 2,        // HTMLScraper Plugin, OfficeScraper Plugin.
+        assertEquals(NUM_OF_EXTRACTORS ,        // HTMLScraper Plugin, OfficeScraper Plugin.
                 extractorGroup.getNumOfExtractors()
         );
     }

Added: any23/trunk/plugins/integration-test/src/test/resources/log4j.properties
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/integration-test/src/test/resources/log4j.properties?rev=1435720&view=auto
==============================================================================
--- any23/trunk/plugins/integration-test/src/test/resources/log4j.properties (added)
+++ any23/trunk/plugins/integration-test/src/test/resources/log4j.properties Sat Jan 19 21:47:22 2013
@@ -0,0 +1,23 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+log4j.rootCategory=INFO, stdout
+
+log4j.appender.stdout.Threshold=INFO
+
+log4j.appender.stdout=org.apache.log4j.ConsoleAppender
+log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
+
+log4j.appender.stdout.layout.ConversionPattern=%5p [%t] %m%n

Modified: any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java (original)
+++ any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractor.java Sat Jan 19 21:47:22 2013
@@ -57,20 +57,6 @@ public class ExcelExtractor implements E
 
     private boolean stopAtFirstError = false;
 
-    public final static ExtractorFactory<ExcelExtractor> factory =
-            SimpleExtractorFactory.create(
-                    "excel",
-                    null,
-                    Arrays.asList(
-                            "application/vnd.ms-excel;q=0.1",
-                            "application/msexcel;q=0.1",
-                            "application/x-msexcel;q=0.1",
-                            "application/x-ms-excel;q=0.1"
-                    ),
-                    null,
-                    ExcelExtractor.class
-            );
-
     public ExcelExtractor() {}
 
     public boolean isStopAtFirstError() {
@@ -84,7 +70,7 @@ public class ExcelExtractor implements E
 
     @Override
     public ExtractorDescription getDescription() {
-        return factory;
+        return ExcelExtractorFactory.getDescriptionInstance();
     }
 
     @Override

Added: any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractorFactory.java
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractorFactory.java?rev=1435720&view=auto
==============================================================================
--- any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractorFactory.java (added)
+++ any23/trunk/plugins/office-scraper/src/main/java/org/apache/any23/plugin/officescraper/ExcelExtractorFactory.java Sat Jan 19 21:47:22 2013
@@ -0,0 +1,49 @@
+/**
+ * 
+ */
+package org.apache.any23.plugin.officescraper;
+
+import java.util.Arrays;
+
+import org.apache.any23.extractor.ExtractorDescription;
+import org.apache.any23.extractor.ExtractorFactory;
+import org.apache.any23.extractor.SimpleExtractorFactory;
+import org.apache.any23.rdf.Prefixes;
+import org.kohsuke.MetaInfServices;
+
+/**
+ * @author Peter Ansell p_ansell@yahoo.com
+ *
+ */
+@MetaInfServices(ExtractorFactory.class)
+public class ExcelExtractorFactory extends SimpleExtractorFactory<ExcelExtractor> implements
+        ExtractorFactory<ExcelExtractor> {
+
+    public static final String NAME = "excel";
+    
+    public static final Prefixes PREFIXES = null;
+
+    private static final ExtractorDescription descriptionInstance = new ExcelExtractorFactory();
+    
+    public ExcelExtractorFactory() {
+        super(
+                ExcelExtractorFactory.NAME, 
+                ExcelExtractorFactory.PREFIXES,
+                Arrays.asList(
+                        "application/vnd.ms-excel;q=0.1",
+                        "application/msexcel;q=0.1",
+                        "application/x-msexcel;q=0.1",
+                        "application/x-ms-excel;q=0.1"
+                ),
+                null);
+    }
+    
+    @Override
+    public ExcelExtractor createExtractor() {
+        return new ExcelExtractor();
+    }
+
+    public static ExtractorDescription getDescriptionInstance() {
+        return descriptionInstance;
+    }
+}

Modified: any23/trunk/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java
URL: http://svn.apache.org/viewvc/any23/trunk/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java?rev=1435720&r1=1435719&r2=1435720&view=diff
==============================================================================
--- any23/trunk/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java (original)
+++ any23/trunk/plugins/office-scraper/src/test/java/org/apache/any23/plugin/officescraper/ExcelExtractorTest.java Sat Jan 19 21:47:22 2013
@@ -57,7 +57,7 @@ public class ExcelExtractorTest {
 
     @Before
     public void setUp() {
-        extractor = new ExcelExtractor();
+        extractor = new ExcelExtractorFactory().createExtractor();
     }
 
     @Test