You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2009/04/28 15:56:29 UTC
svn commit: r769404 - in /lucene/tika/trunk: tika-core/
tika-core/src/main/java/org/apache/tika/parser/asm/
tika-core/src/main/java/org/apache/tika/parser/audio/
tika-core/src/main/java/org/apache/tika/parser/html/
tika-core/src/main/java/org/apache/ti...
Author: jukka
Date: Tue Apr 28 13:56:27 2009
New Revision: 769404
URL: http://svn.apache.org/viewvc?rev=769404&view=rev
Log:
TIKA-219: Split Tika to separate modules
Step 3: Move all parser classes and related tests to the new tika-parsers component.
PS. Needed to remove the getUtf8Reader() method from o.a.t.util.Utils class.
Added:
lucene/tika/trunk/tika-parsers/ (with props)
lucene/tika/trunk/tika-parsers/pom.xml (with props)
lucene/tika/trunk/tika-parsers/src/
lucene/tika/trunk/tika-parsers/src/main/
lucene/tika/trunk/tika-parsers/src/main/java/
lucene/tika/trunk/tika-parsers/src/main/java/org/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/TestParsers.java (props changed)
- copied unchanged from r769383, lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/TestParsers.java
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/mime/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/asm/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/audio/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/html/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/image/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/microsoft/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/mp3/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/opendocument/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/pdf/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/
- copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/pkg/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/rtf/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/txt/
lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/xml/
lucene/tika/trunk/tika-parsers/src/test/
lucene/tika/trunk/tika-parsers/src/test/java/
lucene/tika/trunk/tika-parsers/src/test/java/org/
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/
lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/parser/
lucene/tika/trunk/tika-parsers/src/test/resources/
lucene/tika/trunk/tika-parsers/src/test/resources/test-documents/ (props changed)
- copied from r769383, lucene/tika/trunk/tika-core/src/test/resources/test-documents/
Removed:
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/asm/
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/audio/
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/html/
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/image/
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/microsoft/
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/mp3/
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/opendocument/
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/pdf/
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/pkg/
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/rtf/
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/txt/
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/parser/xml/
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/TestParsers.java
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/mime/
lucene/tika/trunk/tika-core/src/test/java/org/apache/tika/parser/
lucene/tika/trunk/tika-core/src/test/resources/test-documents/
Modified:
lucene/tika/trunk/tika-core/pom.xml
lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/Utils.java
Modified: lucene/tika/trunk/tika-core/pom.xml
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/pom.xml?rev=769404&r1=769403&r2=769404&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/pom.xml (original)
+++ lucene/tika/trunk/tika-core/pom.xml Tue Apr 28 13:56:27 2009
@@ -62,52 +62,6 @@
<version>1.4</version>
</dependency>
<dependency>
- <groupId>pdfbox</groupId>
- <artifactId>pdfbox</artifactId>
- <version>0.7.3</version>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi</artifactId>
- <version>3.5-beta5</version>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi-scratchpad</artifactId>
- <version>3.5-beta5</version>
- </dependency>
- <dependency>
- <groupId>org.apache.poi</groupId>
- <artifactId>poi-ooxml</artifactId>
- <version>3.5-beta5</version>
- <exclusions>
- <exclusion>
- <groupId>stax</groupId>
- <artifactId>stax-api</artifactId>
- </exclusion>
- </exclusions>
- </dependency>
- <dependency>
- <groupId>org.apache.geronimo.specs</groupId>
- <artifactId>geronimo-stax-api_1.0_spec</artifactId>
- <version>1.0</version>
- </dependency>
- <dependency>
- <groupId>net.sourceforge.nekohtml</groupId>
- <artifactId>nekohtml</artifactId>
- <version>1.9.9</version>
- </dependency>
- <dependency>
- <groupId>com.ibm.icu</groupId>
- <artifactId>icu4j</artifactId>
- <version>3.8</version>
- </dependency>
- <dependency>
- <groupId>asm</groupId>
- <artifactId>asm</artifactId>
- <version>3.1</version>
- </dependency>
- <dependency>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
<version>1.2.14</version>
Modified: lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/Utils.java
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/Utils.java?rev=769404&r1=769403&r2=769404&view=diff
==============================================================================
--- lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/Utils.java (original)
+++ lucene/tika/trunk/tika-core/src/main/java/org/apache/tika/utils/Utils.java Tue Apr 28 13:56:27 2009
@@ -32,14 +32,6 @@
import java.util.zip.ZipInputStream;
import org.apache.log4j.Logger;
-import org.apache.tika.exception.TikaException;
-import org.apache.tika.metadata.HttpHeaders;
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.parser.ParsingReader;
-import org.apache.tika.parser.txt.TXTParser;
-
-import com.ibm.icu.text.CharsetDetector;
-import com.ibm.icu.text.CharsetMatch;
/**
* Class util
@@ -94,19 +86,4 @@
out.close();
}
- /**
- * Try to detect encoding from inputstream and return a UTF-8
- * Reader. A metadata hint can be submitted as part of {@link Metadata}
- * under key {@link HttpHeaders#CONTENT_ENCODING}.
- *
- * After succesfull detection, fills Metadata with detected content encoding
- * and content language ({@link HttpHeaders#CONTENT_LANGUAGE}).
- *
- * @return Reader to utf8 encoded reader.
- * @deprecated use {@link TXTParser} instead
- */
- public static Reader getUTF8Reader(InputStream stream, Metadata metadata) throws TikaException, IOException{
- return new ParsingReader(new TXTParser(), stream, metadata);
- }
-
}
Propchange: lucene/tika/trunk/tika-parsers/
------------------------------------------------------------------------------
--- svn:ignore (added)
+++ svn:ignore Tue Apr 28 13:56:27 2009
@@ -0,0 +1,2 @@
+target
+.*
Added: lucene/tika/trunk/tika-parsers/pom.xml
URL: http://svn.apache.org/viewvc/lucene/tika/trunk/tika-parsers/pom.xml?rev=769404&view=auto
==============================================================================
--- lucene/tika/trunk/tika-parsers/pom.xml (added)
+++ lucene/tika/trunk/tika-parsers/pom.xml Tue Apr 28 13:56:27 2009
@@ -0,0 +1,258 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
+ http://maven.apache.org/maven-v4_0_0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+
+ <parent>
+ <groupId>org.apache</groupId>
+ <artifactId>apache</artifactId>
+ <version>4</version>
+ </parent>
+
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-parsers</artifactId>
+ <version>0.4-SNAPSHOT</version>
+
+ <name>Apache Tika parsers</name>
+
+ <dependencies>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-core</artifactId>
+ <version>0.4-SNAPSHOT</version>
+ </dependency>
+ <dependency>
+ <groupId>pdfbox</groupId>
+ <artifactId>pdfbox</artifactId>
+ <version>0.7.3</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi</artifactId>
+ <version>3.5-beta5</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-scratchpad</artifactId>
+ <version>3.5-beta5</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.poi</groupId>
+ <artifactId>poi-ooxml</artifactId>
+ <version>3.5-beta5</version>
+ <exclusions>
+ <exclusion>
+ <groupId>stax</groupId>
+ <artifactId>stax-api</artifactId>
+ </exclusion>
+ </exclusions>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.geronimo.specs</groupId>
+ <artifactId>geronimo-stax-api_1.0_spec</artifactId>
+ <version>1.0</version>
+ </dependency>
+ <dependency>
+ <groupId>net.sourceforge.nekohtml</groupId>
+ <artifactId>nekohtml</artifactId>
+ <version>1.9.9</version>
+ </dependency>
+ <dependency>
+ <groupId>com.ibm.icu</groupId>
+ <artifactId>icu4j</artifactId>
+ <version>3.8</version>
+ </dependency>
+ <dependency>
+ <groupId>asm</groupId>
+ <artifactId>asm</artifactId>
+ <version>3.1</version>
+ </dependency>
+ <dependency>
+ <groupId>log4j</groupId>
+ <artifactId>log4j</artifactId>
+ <version>1.2.14</version>
+ </dependency>
+ <dependency>
+ <groupId>junit</groupId>
+ <artifactId>junit</artifactId>
+ <version>3.8.1</version>
+ <type>jar</type>
+ <scope>test</scope>
+ </dependency>
+ </dependencies>
+
+ <build>
+ <plugins>
+ <plugin>
+ <artifactId>maven-compiler-plugin</artifactId>
+ <version>2.0.2</version>
+ <configuration>
+ <source>1.5</source>
+ <target>1.5</target>
+ </configuration>
+ </plugin>
+ <plugin>
+ <artifactId>maven-resources-plugin</artifactId>
+ <version>2.3</version>
+ <configuration>
+ <encoding>UTF-8</encoding>
+ </configuration>
+ <executions>
+ <execution>
+ <id>copy-resources</id>
+ <phase>process-resources</phase>
+ <goals>
+ <goal>copy-resources</goal>
+ </goals>
+ <configuration>
+ <outputDirectory>
+ ${project.build.directory}/classes/META-INF
+ </outputDirectory>
+ <resources>
+ <resource>
+ <directory>${basedir}</directory>
+ <includes>
+ <include>README.txt</include>
+ <include>NOTICE.txt</include>
+ <include>LICENSE.txt</include>
+ </includes>
+ </resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+ <artifactId>maven-surefire-plugin</artifactId>
+ <version>2.4.3</version>
+ </plugin>
+ <plugin>
+ <artifactId>maven-jar-plugin</artifactId>
+ <version>2.2</version>
+ <configuration>
+ <archive>
+ <manifestEntries>
+ <Specification-Title>${project.name}</Specification-Title>
+ <Specification-Version>${project.version}</Specification-Version>
+ <Specification-Vendor>${project.organization.name}</Specification-Vendor>
+ <Implementation-Title>${project.name}</Implementation-Title>
+ <Implementation-Version>${project.version}</Implementation-Version>
+ <Implementation-Vendor>${project.organization.name}</Implementation-Vendor>
+ <Implementation-Vendor-Id>org.apache</Implementation-Vendor-Id>
+ </manifestEntries>
+ </archive>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>retrotranslator-maven-plugin</artifactId>
+ <version>1.0-alpha-4</version>
+ <executions>
+ <execution>
+ <goals>
+ <goal>translate-project</goal>
+ </goals>
+ </execution>
+ </executions>
+ </plugin>
+ </plugins>
+ </build>
+
+ <reporting>
+ <plugins>
+
+ <!-- Produce JavaDoc -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-javadoc-plugin</artifactId>
+ <version>2.2</version>
+ <configuration>
+ <aggregate>true</aggregate>
+ <source>1.5</source>
+ </configuration>
+ </plugin>
+
+ <!-- Produce Source cross references -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-jxr-plugin</artifactId>
+ <version>2.1</version>
+ <configuration>
+ <aggregate>true</aggregate>
+ </configuration>
+ </plugin>
+
+ <!-- Unit tests report -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-surefire-report-plugin</artifactId>
+ <version>2.3</version>
+ </plugin>
+
+ <!-- "Release Audit" report (checks license headers etc.) -->
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>rat-maven-plugin</artifactId>
+ <version>1.0-alpha-3</version>
+ </plugin>
+
+ <!-- FindBugs Report -->
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>findbugs-maven-plugin</artifactId>
+ <version>1.0.0</version>
+ <configuration>
+ <threshold>Normal</threshold>
+ <effort>Default</effort>
+ </configuration>
+ </plugin>
+
+ <!-- Checkstyle report -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-checkstyle-plugin</artifactId>
+ <version>2.1</version>
+ <configuration>
+ <!--configLocation>checkstyle.xml</configLocation-->
+ <enableRulesSummary>false</enableRulesSummary>
+ </configuration>
+ </plugin>
+
+ <!-- PMD report -->
+ <plugin>
+ <groupId>org.apache.maven.plugins</groupId>
+ <artifactId>maven-pmd-plugin</artifactId>
+ <configuration>
+ <linkXref>true</linkXref>
+ <sourceEncoding>ASCII</sourceEncoding>
+ <targetJdk>1.5</targetJdk>
+ </configuration>
+ </plugin>
+
+ </plugins>
+ </reporting>
+
+</project>
+
Propchange: lucene/tika/trunk/tika-parsers/pom.xml
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/TestParsers.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/TestParsers.java
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/mime/
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/opendocument/
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/
------------------------------------------------------------------------------
svn:mergeinfo =
Propchange: lucene/tika/trunk/tika-parsers/src/test/resources/test-documents/
------------------------------------------------------------------------------
svn:mergeinfo =