You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/04/28 15:56:29 UTC

svn commit: r769404 - in /lucene/tika/trunk: tika-core/ tika-core/src/main/java/org/apache/tika/parser/asm/ tika-core/src/main/java/org/apache/tika/parser/audio/ tika-core/src/main/java/org/apache/tika/parser/html/ tika-core/src/main/java/org/apache/ti...

Author: jukka
Date: Tue Apr 28 13:56:27 2009
New Revision: 769404

URL: http://svn.apache.org/viewvc?rev=769404&view=rev
Log:
TIKA-219: Split Tika to separate modules

Step 3: Move all parser classes and related tests to the new tika-parsers component.

PS. Needed to remove the getUtf8Reader() method from o.a.t.util.Utils class.

Added:
    lucene/tika/trunk/tika-parsers/   (with props)
    lucene/tika/trunk/tika-parsers/pom.xml   (with props)
    lucene/tika/trunk/tika-parsers/src/
    lucene/tika/trunk/tika-parsers/src/main/
    lucene/tika/trunk/tika-parsers/src/main/java/
    lucene/tika/trunk/tika-parsers/src/main/java/org/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/TestParsers.java   (props changed)
      - copied unchanged from r769383, lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/TestParsers.java
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/mime/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/asm/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/audio/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/html/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/image/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/microsoft/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/mp3/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/opendocument/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/pdf/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/
      - copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/pkg/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/rtf/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/txt/
    lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/xml/
    lucene/tika/trunk/tika-parsers/src/test/
    lucene/tika/trunk/tika-parsers/src/test/java/
    lucene/tika/trunk/tika-parsers/src/test/java/org/
    lucene/tika/trunk/tika-parsers/src/test/java/org/apache/
    lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/
    lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/parser/
    lucene/tika/trunk/tika-parsers/src/test/resources/
    lucene/tika/trunk/tika-parsers/src/test/resources/test-documents/   (props changed)
      - copied from r769383, lucene/tika/trunk/tika-core/src/test/resources/test-documents/
Removed:
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/asm/
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/audio/
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/html/
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/image/
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/microsoft/
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/mp3/
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/opendocument/
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/pdf/
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/pkg/
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/rtf/
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/txt/
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/xml/
    lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/TestParsers.java
    lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/
    lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/parser/
    lucene/tika/trunk/tika-core/src/test/resources/test-documents/
Modified:
    lucene/tika/trunk/tika-core/pom.xml
    lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/Utils.java

Modified: lucene/tika/trunk/tika-core/pom.xml
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/pom.xml?rev=769404&r1=769403&r2=769404&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/pom.xml (original)
+++ lucene/tika/trunk/tika-core/pom.xml Tue Apr 28 13:56:27 2009
@@ -62,52 +62,6 @@
       <version>1.4</version>
     </dependency>
     <dependency>
-      <groupId>pdfbox</groupId>
-      <artifactId>pdfbox</artifactId>
-      <version>0.7.3</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.poi</groupId>
-      <artifactId>poi</artifactId>
-      <version>3.5-beta5</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.poi</groupId>
-      <artifactId>poi-scratchpad</artifactId>
-      <version>3.5-beta5</version>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.poi</groupId>
-      <artifactId>poi-ooxml</artifactId>
-      <version>3.5-beta5</version>
-      <exclusions>
-        <exclusion>
-          <groupId>stax</groupId>
-          <artifactId>stax-api</artifactId>
-        </exclusion>
-      </exclusions>
-    </dependency>
-    <dependency>
-      <groupId>org.apache.geronimo.specs</groupId>
-      <artifactId>geronimo-stax-api_1.0_spec</artifactId>
-      <version>1.0</version>
-    </dependency>
-    <dependency>
-      <groupId>net.sourceforge.nekohtml</groupId>
-      <artifactId>nekohtml</artifactId>
-      <version>1.9.9</version>
-    </dependency>
-    <dependency>
-      <groupId>com.ibm.icu</groupId>
-      <artifactId>icu4j</artifactId>
-      <version>3.8</version>
-    </dependency>
-    <dependency>
-      <groupId>asm</groupId>
-      <artifactId>asm</artifactId>
-      <version>3.1</version>
-    </dependency>
-    <dependency>
       <groupId>log4j</groupId>
       <artifactId>log4j</artifactId>
       <version>1.2.14</version>

Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/Utils.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/Utils.java?rev=769404&r1=769403&r2=769404&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/Utils.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/Utils.java Tue Apr 28 13:56:27 2009
@@ -32,14 +32,6 @@
 import java.util.zip.ZipInputStream;
 
 import org.apache.log4j.Logger;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.HttpHeaders;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ParsingReader;
-import org.apache.tika.parser.txt.TXTParser;
-
-import com.ibm.icu.text.CharsetDetector;
-import com.ibm.icu.text.CharsetMatch;
 
 /**
  * Class util
@@ -94,19 +86,4 @@
         out.close();
     }
 
-    /**
-     * Try to detect encoding from inputstream and return a UTF-8
-     * Reader. A metadata hint can be submitted as part of {@link Metadata}
-     * under key {@link HttpHeaders#CONTENT_ENCODING}.
-     * 
-     * After succesfull detection, fills Metadata with detected content encoding
-     * and content language ({@link HttpHeaders#CONTENT_LANGUAGE}).
-     * 
-     * @return Reader to utf8 encoded reader.
-     * @deprecated use {@link TXTParser} instead
-     */
-    public static Reader getUTF8Reader(InputStream stream, Metadata metadata) throws TikaException, IOException{
-        return new ParsingReader(new TXTParser(), stream, metadata);
-    }
-
 }

Propchange: lucene/tika/trunk/tika-parsers/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Apr 28 13:56:27 2009
@@ -0,0 +1,2 @@
+target
+.*

Added: lucene/tika/trunk/tika-parsers/pom.xml
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/pom.xml?rev=769404&view=auto
==============================================================================
--- lucene/tika/trunk/tika-parsers/pom.xml (added)
+++ lucene/tika/trunk/tika-parsers/pom.xml Tue Apr 28 13:56:27 2009
@@ -0,0 +1,258 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+ 
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
+                             http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+
+  <parent>
+    <groupId>org.apache</groupId>
+    <artifactId>apache</artifactId>
+    <version>4</version>
+  </parent>
+
+  <groupId>org.apache.tika</groupId>
+  <artifactId>tika-parsers</artifactId>
+  <version>0.4-SNAPSHOT</version>
+
+  <name>Apache Tika parsers</name>
+
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.tika</groupId>
+      <artifactId>tika-core</artifactId>
+      <version>0.4-SNAPSHOT</version>
+    </dependency>
+    <dependency>
+      <groupId>pdfbox</groupId>
+      <artifactId>pdfbox</artifactId>
+      <version>0.7.3</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.poi</groupId>
+      <artifactId>poi</artifactId>
+      <version>3.5-beta5</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.poi</groupId>
+      <artifactId>poi-scratchpad</artifactId>
+      <version>3.5-beta5</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.poi</groupId>
+      <artifactId>poi-ooxml</artifactId>
+      <version>3.5-beta5</version>
+      <exclusions>
+        <exclusion>
+          <groupId>stax</groupId>
+          <artifactId>stax-api</artifactId>
+        </exclusion>
+      </exclusions>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.geronimo.specs</groupId>
+      <artifactId>geronimo-stax-api_1.0_spec</artifactId>
+      <version>1.0</version>
+    </dependency>
+    <dependency>
+      <groupId>net.sourceforge.nekohtml</groupId>
+      <artifactId>nekohtml</artifactId>
+      <version>1.9.9</version>
+    </dependency>
+    <dependency>
+      <groupId>com.ibm.icu</groupId>
+      <artifactId>icu4j</artifactId>
+      <version>3.8</version>
+    </dependency>
+    <dependency>
+      <groupId>asm</groupId>
+      <artifactId>asm</artifactId>
+      <version>3.1</version>
+    </dependency>
+    <dependency>
+      <groupId>log4j</groupId>
+      <artifactId>log4j</artifactId>
+      <version>1.2.14</version>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>3.8.1</version>
+      <type>jar</type>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+
+  <build>
+    <plugins>
+      <plugin>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>2.0.2</version>
+        <configuration>
+          <source>1.5</source>
+          <target>1.5</target>
+        </configuration>
+      </plugin>
+      <plugin>
+        <artifactId>maven-resources-plugin</artifactId>
+        <version>2.3</version>
+        <configuration>
+          <encoding>UTF-8</encoding>
+        </configuration>
+        <executions>
+          <execution>
+            <id>copy-resources</id>
+            <phase>process-resources</phase>
+            <goals>
+              <goal>copy-resources</goal>
+            </goals>
+            <configuration>
+              <outputDirectory>
+                ${project.build.directory}/classes/META-INF
+              </outputDirectory>
+              <resources>
+                <resource>
+                  <directory>${basedir}</directory>
+                  <includes>
+                    <include>README.txt</include>
+                    <include>NOTICE.txt</include>
+                    <include>LICENSE.txt</include>
+                  </includes>
+                </resource>
+              </resources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <artifactId>maven-surefire-plugin</artifactId>
+        <version>2.4.3</version>
+      </plugin>
+      <plugin>
+        <artifactId>maven-jar-plugin</artifactId>
+        <version>2.2</version>
+        <configuration>
+          <archive>
+            <manifestEntries>
+              <Specification-Title>${project.name}</Specification-Title>
+              <Specification-Version>${project.version}</Specification-Version>
+              <Specification-Vendor>${project.organization.name}</Specification-Vendor>
+              <Implementation-Title>${project.name}</Implementation-Title>
+              <Implementation-Version>${project.version}</Implementation-Version>
+              <Implementation-Vendor>${project.organization.name}</Implementation-Vendor>
+              <Implementation-Vendor-Id>org.apache</Implementation-Vendor-Id>
+            </manifestEntries>
+          </archive>
+        </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>retrotranslator-maven-plugin</artifactId>
+        <version>1.0-alpha-4</version>
+        <executions>
+          <execution>
+            <goals>
+              <goal>translate-project</goal>
+            </goals>
+          </execution>
+        </executions>
+      </plugin>
+    </plugins>
+  </build>
+
+  <reporting>
+    <plugins>
+
+      <!-- Produce JavaDoc -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-javadoc-plugin</artifactId>
+        <version>2.2</version>
+        <configuration> 
+          <aggregate>true</aggregate>
+          <source>1.5</source>
+        </configuration> 
+      </plugin>
+
+      <!-- Produce Source cross references -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-jxr-plugin</artifactId>
+        <version>2.1</version>
+        <configuration> 
+          <aggregate>true</aggregate>
+        </configuration> 
+      </plugin>
+
+      <!-- Unit tests report -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-surefire-report-plugin</artifactId>
+        <version>2.3</version>
+      </plugin>
+
+      <!-- "Release Audit" report (checks license headers etc.) -->
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>rat-maven-plugin</artifactId>
+        <version>1.0-alpha-3</version>
+      </plugin>
+
+      <!-- FindBugs Report -->
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>findbugs-maven-plugin</artifactId>
+        <version>1.0.0</version>
+        <configuration>
+          <threshold>Normal</threshold>
+          <effort>Default</effort>
+       </configuration>
+      </plugin>
+
+      <!-- Checkstyle report -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-checkstyle-plugin</artifactId>
+        <version>2.1</version>
+        <configuration>
+          <!--configLocation>checkstyle.xml</configLocation-->
+          <enableRulesSummary>false</enableRulesSummary>
+        </configuration>
+      </plugin>
+
+      <!-- PMD report -->
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-pmd-plugin</artifactId>
+        <configuration>
+          <linkXref>true</linkXref>
+          <sourceEncoding>ASCII</sourceEncoding>
+          <targetJdk>1.5</targetJdk>
+        </configuration>
+      </plugin>
+
+    </plugins>
+  </reporting>
+
+</project>
+

Propchange: lucene/tika/trunk/tika-parsers/pom.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/TestParsers.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/TestParsers.java
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/mime/
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/
------------------------------------------------------------------------------
    svn:mergeinfo = 

Propchange: lucene/tika/trunk/tika-parsers/src/test/resources/test-documents/
------------------------------------------------------------------------------
    svn:mergeinfo =