You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ju...@apache.org on 2011/10/31 19:22:08 UTC
svn commit: r1195596 - in /tika/trunk: tika-core/
tika-core/src/main/java/org/apache/tika/config/ tika-parsers/
tika-parsers/src/main/java/org/apache/tika/parser/asm/
tika-parsers/src/main/java/org/apache/tika/parser/audio/
tika-parsers/src/main/java/o...
Author: jukka
Date: Mon Oct 31 18:22:06 2011
New Revision: 1195596
URL: http://svn.apache.org/viewvc?rev=1195596&view=rev
Log:
TIKA-565: Improved OSGi bundling
Use central OSGiParser and OSGiDetector classes in tika-parsers to better handle issues with missing dependencies and to avoid having to maintain the list of parser implementations in two separate places (OSGi annotations and the services files)
Added:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/OSGiDetector.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/OSGiParser.java
Modified:
tika/trunk/tika-core/pom.xml
tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
tika/trunk/tika-parsers/pom.xml
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/ClassParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/DcXMLParser.java
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/XMLParser.java
Modified: tika/trunk/tika-core/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/pom.xml?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-core/pom.xml (original)
+++ tika/trunk/tika-core/pom.xml Mon Oct 31 18:22:06 2011
@@ -35,7 +35,7 @@
<url>http://tika.apache.org/</url>
<dependencies>
- <!-- Optional OSGi dependency, used only when running within OSGi -->
+ <!-- Optional OSGi dependencies, used only when running within OSGi -->
<dependency>
<groupId>org.osgi</groupId>
<artifactId>org.osgi.core</artifactId>
Modified: tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java (original)
+++ tika/trunk/tika-core/src/main/java/org/apache/tika/config/ServiceLoader.java Mon Oct 31 18:22:06 2011
@@ -101,9 +101,17 @@ public class ServiceLoader {
private final LoadErrorHandler handler;
- public ServiceLoader(ClassLoader loader, LoadErrorHandler handler) {
+ private final boolean dynamic;
+
+ public ServiceLoader(
+ ClassLoader loader, LoadErrorHandler handler, boolean dynamic) {
this.loader = loader;
this.handler = handler;
+ this.dynamic = dynamic;
+ }
+
+ public ServiceLoader(ClassLoader loader, LoadErrorHandler handler) {
+ this(loader, handler, false);
}
public ServiceLoader(ClassLoader loader) {
@@ -111,7 +119,7 @@ public class ServiceLoader {
}
public ServiceLoader() {
- this(getContextClassLoader());
+ this(getContextClassLoader(), LoadErrorHandler.IGNORE, true);
}
/**
@@ -141,10 +149,12 @@ public class ServiceLoader {
public <T> List<T> loadServiceProviders(Class<T> iface) {
List<T> providers = new ArrayList<T>();
- synchronized (services) {
- for (Object service : services.values()) {
- if (iface.isAssignableFrom(service.getClass())) {
- providers.add((T) service);
+ if (dynamic) {
+ synchronized (services) {
+ for (Object service : services.values()) {
+ if (iface.isAssignableFrom(service.getClass())) {
+ providers.add((T) service);
+ }
}
}
}
Modified: tika/trunk/tika-parsers/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/pom.xml?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/pom.xml (original)
+++ tika/trunk/tika-parsers/pom.xml Mon Oct 31 18:22:06 2011
@@ -47,12 +47,21 @@
<version>${project.version}</version>
</dependency>
+ <!-- Optional OSGi dependencies, used only when running within OSGi -->
+ <dependency>
+ <groupId>org.osgi</groupId>
+ <artifactId>org.osgi.compendium</artifactId>
+ <version>4.2.0</version>
+ <scope>provided</scope>
+ <optional>true</optional>
+ </dependency>
<dependency>
<groupId>org.apache.felix</groupId>
<artifactId>org.apache.felix.scr.annotations</artifactId>
<scope>provided</scope>
</dependency>
+ <!-- Upstream parser libraries -->
<dependency>
<groupId>edu.ucar</groupId>
<artifactId>netcdf</artifactId>
@@ -151,6 +160,8 @@
<artifactId>rome</artifactId>
<version>0.9</version>
</dependency>
+
+ <!-- Test dependencies -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/ClassParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/ClassParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/ClassParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/asm/ClassParser.java Mon Oct 31 18:22:06 2011
@@ -21,23 +21,22 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
/**
* Parser for Java .class files.
*/
-@Component @Service(Parser.class)
public class ClassParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = -3531388963354454357L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.singleton(MediaType.application("java-vm"));
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/AudioParser.java Mon Oct 31 18:22:06 2011
@@ -23,31 +23,30 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
-import java.util.Set;
import java.util.Map.Entry;
+import java.util.Set;
import javax.sound.sampled.AudioFileFormat;
+import javax.sound.sampled.AudioFileFormat.Type;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.UnsupportedAudioFileException;
-import javax.sound.sampled.AudioFileFormat.Type;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.XMPDM;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-@Component @Service(Parser.class)
public class AudioParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = -6015684081240882695L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
MediaType.audio("basic"),
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/audio/MidiParser.java Mon Oct 31 18:22:06 2011
@@ -32,21 +32,20 @@ import javax.sound.midi.Patch;
import javax.sound.midi.Sequence;
import javax.sound.midi.Track;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-@Component @Service(Parser.class)
public class MidiParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = 6343278584336189432L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
MediaType.application("x-midi"),
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/chm/ChmParser.java Mon Oct 31 18:22:06 2011
@@ -21,23 +21,21 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-@Component @Service(Parser.class)
public class ChmParser extends AbstractParser {
+ /** Serial version UID */
private static final long serialVersionUID = 5938777307516469802L;
- private static final Set<MediaType> SUPPORTED_TYPES = Collections
- .singleton(MediaType.application("chm"));
+
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.singleton(MediaType.application("chm"));
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java Mon Oct 31 18:22:06 2011
@@ -21,8 +21,6 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.poi.util.IOUtils;
import org.apache.poi.util.StringUtil;
import org.apache.tika.exception.TikaException;
@@ -31,7 +29,6 @@ import org.apache.tika.metadata.Metadata
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -42,9 +39,11 @@ import org.xml.sax.SAXException;
* Note that we use Apache POI for various parts of the processing, as
* lots of the low level string/int/short concepts are the same.
*/
-@Component @Service(Parser.class)
public class DWGParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = -7744232583079169119L;
+
private static MediaType TYPE = MediaType.image("vnd.dwg");
public Set<MediaType> getSupportedTypes(ParseContext context) {
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/epub/EpubParser.java Mon Oct 31 18:22:06 2011
@@ -23,8 +23,6 @@ import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
@@ -40,9 +38,11 @@ import org.xml.sax.helpers.DefaultHandle
/**
* Epub parser
*/
-@Component @Service(Parser.class)
public class EpubParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = 215176772484050550L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.singleton(MediaType.application("epub+zip"));
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/feed/FeedParser.java Mon Oct 31 18:22:06 2011
@@ -21,19 +21,14 @@ import java.io.InputStream;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
@@ -51,11 +46,13 @@ import com.sun.syndication.io.SyndFeedIn
* Uses Rome for parsing the feeds. A feed description is put in a paragraph
* with its link and title in an anchor.
*/
-@Component @Service(Parser.class)
public class FeedParser extends AbstractParser {
- private static final Set<MediaType> SUPPORTED_TYPES = Collections
- .unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+ /** Serial version UID */
+ private static final long serialVersionUID = -3785361933034525186L;
+
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
MediaType.application("rss+xml"),
MediaType.application("atom+xml"))));
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/font/TrueTypeParser.java Mon Oct 31 18:22:06 2011
@@ -21,8 +21,6 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.fontbox.ttf.TTFParser;
import org.apache.fontbox.ttf.TrueTypeFont;
import org.apache.tika.exception.TikaException;
@@ -33,7 +31,6 @@ import org.apache.tika.metadata.Property
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -41,9 +38,11 @@ import org.xml.sax.SAXException;
/**
* Parser for TrueType font files (TTF).
*/
-@Component @Service(Parser.class)
public class TrueTypeParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = 44788554612243032L;
+
private static final MediaType TYPE =
MediaType.application("x-font-ttf");
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/hdf/HDFParser.java Mon Oct 31 18:22:06 2011
@@ -24,22 +24,17 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
-//TIKA imports
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.parser.netcdf.NetCDFParser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-//NetCDF imports
import ucar.nc2.Attribute;
import ucar.nc2.Group;
import ucar.nc2.NetcdfFile;
@@ -51,11 +46,12 @@ import ucar.nc2.NetcdfFile;
* we are able to use it to parse HDF files as well. See <a href=
* "http://www.unidata.ucar.edu/software/netcdf-java/formats/FileTypes.html"
* >this link</a> for more information.
- *
*/
-@Component @Service(Parser.class)
public class HDFParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = 1091208208003437549L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.singleton(MediaType.application("x-hdf"));
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/html/HtmlParser.java Mon Oct 31 18:22:06 2011
@@ -16,7 +16,10 @@
*/
package org.apache.tika.parser.html;
-import java.io.*;
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Collections;
@@ -25,15 +28,12 @@ import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.parser.txt.CharsetDetector;
import org.apache.tika.parser.txt.CharsetMatch;
import org.apache.tika.utils.CharsetUtils;
@@ -48,9 +48,11 @@ import org.xml.sax.SAXException;
* and post-processes the events to produce XHTML and metadata expected by
* Tika clients.
*/
-@Component @Service(Parser.class)
public class HtmlParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = 7895315240498733128L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
MediaType.text("html"),
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/ImageParser.java Mon Oct 31 18:22:06 2011
@@ -29,8 +29,6 @@ import javax.imageio.ImageIO;
import javax.imageio.ImageReader;
import javax.imageio.metadata.IIOMetadata;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
@@ -38,16 +36,17 @@ import org.apache.tika.metadata.Property
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-@Component @Service(Parser.class)
public class ImageParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = 7852529269245520335L;
+
private static final MediaType CANONICAL_BMP_TYPE = MediaType.image("x-ms-bmp");
private static final MediaType JAVA_BMP_TYPE = MediaType.image("bmp");
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/PSDParser.java Mon Oct 31 18:22:06 2011
@@ -24,8 +24,6 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.poi.util.IOUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.EndianUtils;
@@ -34,7 +32,6 @@ import org.apache.tika.metadata.TIFF;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -45,8 +42,11 @@ import org.xml.sax.SAXException;
* Documentation on the file format is available from
* http://www.adobe.com/devnet-apps/photoshop/fileformatashtml/PhotoshopFileFormats.htm
*/
-@Component @Service(Parser.class)
public class PSDParser extends AbstractParser {
+
+ /** Serial version UID */
+ private static final long serialVersionUID = 883387734607994914L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
MediaType.image("vnd.adobe.photoshop"))));
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/image/TiffParser.java Mon Oct 31 18:22:06 2011
@@ -22,22 +22,21 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.parser.image.xmp.JempboxExtractor;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-@Component @Service(Parser.class)
public class TiffParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = -3941143576535464926L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.singleton(MediaType.image("tiff"));
Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/OSGiDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/OSGiDetector.java?rev=1195596&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/OSGiDetector.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/OSGiDetector.java Mon Oct 31 18:22:06 2011
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.internal;
+
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
+
+@Component @Service(Detector.class)
+public class OSGiDetector extends DefaultDetector {
+
+ /** Serial version UID */
+ private static final long serialVersionUID = -4397900223116731483L;
+
+ public OSGiDetector() {
+ super(OSGiDetector.class.getClassLoader());
+ }
+
+}
Added: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/OSGiParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/OSGiParser.java?rev=1195596&view=auto
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/OSGiParser.java (added)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/internal/OSGiParser.java Mon Oct 31 18:22:06 2011
@@ -0,0 +1,34 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.internal;
+
+import org.apache.felix.scr.annotations.Component;
+import org.apache.felix.scr.annotations.Service;
+import org.apache.tika.parser.DefaultParser;
+import org.apache.tika.parser.Parser;
+
+@Component @Service(Parser.class)
+public class OSGiParser extends DefaultParser {
+
+ /** Serial version UID */
+ private static final long serialVersionUID = -2496251420681985759L;
+
+ public OSGiParser() {
+ super(OSGiParser.class.getClassLoader());
+ }
+
+}
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/iwork/IWorkPackageParser.java Mon Oct 31 18:22:06 2011
@@ -29,8 +29,6 @@ import javax.xml.namespace.QName;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
import org.apache.commons.compress.archivers.zip.ZipFile;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.detect.XmlRootExtractor;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.CloseShieldInputStream;
@@ -38,7 +36,6 @@ import org.apache.tika.metadata.Metadata
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.OfflineContentHandler;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
@@ -55,11 +52,9 @@ import org.xml.sax.SAXException;
* <li>Numbers format version 1.x. Currently only tested with Numbers version 2.0.x
* </ol>
*/
-@Component @Service(Parser.class)
public class IWorkPackageParser extends AbstractParser {
- /**
- * Serial version UID
- */
+
+ /** Serial version UID */
private static final long serialVersionUID = -2160322853809682372L;
/**
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/jpeg/JpegParser.java Mon Oct 31 18:22:06 2011
@@ -21,8 +21,6 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
@@ -30,16 +28,17 @@ import org.apache.tika.metadata.Metadata
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.parser.image.ImageMetadataExtractor;
import org.apache.tika.parser.image.xmp.JempboxExtractor;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-@Component @Service(Parser.class)
public class JpegParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = -1355028253756234603L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.singleton(MediaType.image("jpeg"));
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mail/RFC822Parser.java Mon Oct 31 18:22:06 2011
@@ -21,8 +21,6 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.james.mime4j.MimeException;
import org.apache.james.mime4j.parser.MimeStreamParser;
import org.apache.james.mime4j.stream.MimeConfig;
@@ -32,7 +30,6 @@ import org.apache.tika.metadata.Metadata
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -46,9 +43,11 @@ import org.xml.sax.SAXException;
*
* @author jnioche@digitalpebble.com
*/
-@Component @Service(Parser.class)
public class RFC822Parser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = -5504243905998074168L;
+
private static final Set<MediaType> SUPPORTED_TYPES = Collections
.singleton(MediaType.parse("message/rfc822"));
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mbox/MboxParser.java Mon Oct 31 18:22:06 2011
@@ -30,14 +30,11 @@ import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -46,11 +43,9 @@ import org.xml.sax.SAXException;
* Mbox (mailbox) parser. This version returns the headers for the first email
* via metadata, which means headers from subsequent emails will be lost.
*/
-@Component @Service(Parser.class)
public class MboxParser extends AbstractParser {
- /**
- * Serial version UID
- */
+
+ /** Serial version UID */
private static final long serialVersionUID = -1762689436731160661L;
private static final Set<MediaType> SUPPORTED_TYPES =
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/OfficeParser.java Mon Oct 31 18:22:06 2011
@@ -27,8 +27,6 @@ import java.util.Locale;
import java.util.Map;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
import org.apache.poi.poifs.crypt.Decryptor;
@@ -46,7 +44,6 @@ import org.apache.tika.metadata.Metadata
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.parser.microsoft.ooxml.OOXMLParser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.EmbeddedContentHandler;
@@ -57,25 +54,25 @@ import org.xml.sax.SAXException;
/**
* Defines a Microsoft document content extractor.
*/
-@Component @Service(Parser.class)
public class OfficeParser extends AbstractParser {
+ /** Serial version UID */
private static final long serialVersionUID = 7393462244028653479L;
-
- private static final Set<MediaType> SUPPORTED_TYPES =
- Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
- POIFSDocumentType.WORKBOOK.type,
- POIFSDocumentType.OLE10_NATIVE.type,
- POIFSDocumentType.WORDDOCUMENT.type,
- POIFSDocumentType.UNKNOWN.type,
- POIFSDocumentType.ENCRYPTED.type,
- POIFSDocumentType.POWERPOINT.type,
- POIFSDocumentType.PUBLISHER.type,
- POIFSDocumentType.VISIO.type,
- POIFSDocumentType.OUTLOOK.type,
- MediaType.application("vnd.ms-excel.sheet.binary.macroenabled.12")
- )));
-
+
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
+ POIFSDocumentType.WORKBOOK.type,
+ POIFSDocumentType.OLE10_NATIVE.type,
+ POIFSDocumentType.WORDDOCUMENT.type,
+ POIFSDocumentType.UNKNOWN.type,
+ POIFSDocumentType.ENCRYPTED.type,
+ POIFSDocumentType.POWERPOINT.type,
+ POIFSDocumentType.PUBLISHER.type,
+ POIFSDocumentType.VISIO.type,
+ POIFSDocumentType.OUTLOOK.type,
+ MediaType.application("vnd.ms-excel.sheet.binary.macroenabled.12")
+ )));
+
public enum POIFSDocumentType {
WORKBOOK("xls", MediaType.application("vnd.ms-excel")),
OLE10_NATIVE("ole", MediaType.application("x-tika-msoffice")),
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/POIFSContainerDetector.java Mon Oct 31 18:22:06 2011
@@ -16,6 +16,8 @@
*/
package org.apache.tika.parser.microsoft;
+import static org.apache.tika.mime.MediaType.application;
+
import java.io.IOException;
import java.io.InputStream;
import java.nio.channels.FileChannel;
@@ -23,8 +25,6 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
@@ -33,20 +33,15 @@ import org.apache.tika.io.TikaInputStrea
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
-import static org.apache.tika.mime.MediaType.application;
-
/**
* A detector that works on a POIFS OLE2 document
* to figure out exactly what the file is.
* This should work for all OLE2 documents, whether
* they are ones supported by POI or not.
*/
-@Component @Service(Detector.class)
public class POIFSContainerDetector implements Detector {
- /**
- * Serial version UID.
- */
+ /** Serial version UID */
private static final long serialVersionUID = -3028021741663605293L;
/** The OLE base file format */
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParser.java Mon Oct 31 18:22:06 2011
@@ -23,25 +23,20 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
/**
* Office Open XML (OOXML) parser.
*/
-@Component @Service(Parser.class)
public class OOXMLParser extends AbstractParser {
- /**
- * Serial version UID
- */
+
+ /** Serial version UID */
private static final long serialVersionUID = 6535995710857776481L;
private static final Set<MediaType> SUPPORTED_TYPES =
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/mp3/Mp3Parser.java Mon Oct 31 18:22:06 2011
@@ -23,15 +23,12 @@ import java.util.Collections;
import java.util.List;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.XMPDM;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -44,9 +41,11 @@ import org.xml.sax.SAXException;
* @see <a href="http://www.id3.org/id3v2.4.0-structure">MP3 ID3 Version 2.4 Structure Specification</a>
* @see <a href="http://www.id3.org/id3v2.4.0-frames">MP3 ID3 Version 2.4 Frames Specification</a>
*/
-@Component @Service(Parser.class)
public class Mp3Parser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = 8537074922934844370L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.singleton(MediaType.audio("mpeg"));
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/netcdf/NetCDFParser.java Mon Oct 31 18:22:06 2011
@@ -23,9 +23,6 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
-//TIKA imports
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
@@ -37,7 +34,6 @@ import org.apache.tika.sax.XHTMLContentH
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-//NETCDF imports
import ucar.nc2.Attribute;
import ucar.nc2.NetcdfFile;
@@ -48,9 +44,11 @@ import ucar.nc2.NetcdfFile;
* href="http://www.unidata.ucar.edu/software/netcdf-java/">NetCDF for Java</a>
* API.
*/
-@Component @Service(Parser.class)
public class NetCDFParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = -5940938274907708665L;
+
private final Set<MediaType> SUPPORTED_TYPES =
Collections.singleton(MediaType.application("x-netcdf"));
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/odf/OpenDocumentParser.java Mon Oct 31 18:22:06 2011
@@ -27,8 +27,6 @@ import java.util.zip.ZipInputStream;
//import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
//import org.apache.commons.compress.archivers.zip.ZipFile;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOUtils;
import org.apache.tika.metadata.Metadata;
@@ -45,9 +43,11 @@ import org.xml.sax.helpers.DefaultHandle
/**
* OpenOffice parser
*/
-@Component @Service(Parser.class)
public class OpenDocumentParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = -6410276875438618287L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
MediaType.application("vnd.sun.xml.writer"),
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pdf/PDFParser.java Mon Oct 31 18:22:06 2011
@@ -24,8 +24,6 @@ import java.util.Collections;
import java.util.List;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSName;
@@ -40,7 +38,6 @@ import org.apache.tika.metadata.Property
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -52,7 +49,6 @@ import org.xml.sax.SAXException;
* document. If no password is given, then this parser will try decrypting
* the document using the empty password that's often used with PDFs.
*/
-@Component @Service(Parser.class)
public class PDFParser extends AbstractParser {
/** Serial version UID */
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/PackageParser.java Mon Oct 31 18:22:06 2011
@@ -23,14 +23,11 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -40,9 +37,11 @@ import org.xml.sax.SAXException;
* elements that contain the (optional) entry name as a <h1> element
* and the full structured body content of the parsed entry.
*/
-@Component @Service(Parser.class)
public class PackageParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = -5331043266963888708L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
MediaType.application("x-archive"),
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/pkg/ZipContainerDetector.java Mon Oct 31 18:22:06 2011
@@ -21,8 +21,6 @@ import java.io.InputStream;
import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
import org.apache.commons.compress.archivers.zip.ZipFile;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.poi.extractor.ExtractorFactory;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.opc.OPCPackage;
@@ -41,11 +39,9 @@ import org.apache.tika.parser.iwork.IWor
* A detector that works on a Zip document
* to figure out exactly what the file is
*/
-@Component @Service(Detector.class)
public class ZipContainerDetector implements Detector {
- /**
- * Serial version UID
- */
+
+ /** Serial version UID */
private static final long serialVersionUID = 2891763938430295453L;
public MediaType detect(InputStream input, Metadata metadata)
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/prt/PRTParser.java Mon Oct 31 18:22:06 2011
@@ -22,8 +22,6 @@ import java.io.UnsupportedEncodingExcept
import java.util.Collections;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.poi.util.IOUtils;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.EndianUtils;
@@ -31,7 +29,6 @@ import org.apache.tika.metadata.Metadata
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -40,17 +37,19 @@ import org.xml.sax.SAXException;
* A basic text extracting parser for the CADKey PRT (CAD Drawing)
* format. It outputs text from note entries.
*/
-@Component @Service(Parser.class)
+
public class PRTParser extends AbstractParser {
+
+ /** Serial version UID */
private static final long serialVersionUID = 4659638314375035178L;
-
+
private static final Set<MediaType> SUPPORTED_TYPES = Collections.singleton(MediaType.application("x-prt"));
public static final String PRT_MIME_TYPE = "application/x-prt";
-
+
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
}
-
+
/**
* How long do we allow a text run to claim to be, before we
* decide we're confused and it's not really text after all?
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/rtf/RTFParser.java Mon Oct 31 18:22:06 2011
@@ -21,15 +21,12 @@ import java.io.InputStream;
import java.util.Collections;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TaggedInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -37,11 +34,13 @@ import org.xml.sax.SAXException;
/**
* RTF parser
*/
-@Component @Service(Parser.class)
public class RTFParser extends AbstractParser {
- private static final Set<MediaType> SUPPORTED_TYPES = Collections
- .singleton(MediaType.application("rtf"));
+ /** Serial version UID */
+ private static final long serialVersionUID = -4165069489372320313L;
+
+ private static final Set<MediaType> SUPPORTED_TYPES =
+ Collections.singleton(MediaType.application("rtf"));
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/TXTParser.java Mon Oct 31 18:22:06 2011
@@ -27,14 +27,11 @@ import java.nio.charset.Charset;
import java.util.Collections;
import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -58,10 +55,11 @@ import org.xml.sax.SAXException;
* </dt>
* </dl>
*/
-@Component @Service(Parser.class)
-@SuppressWarnings("serial")
public class TXTParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = -6656102320836888910L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.singleton(MediaType.TEXT_PLAIN);
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/video/FLVParser.java Mon Oct 31 18:22:06 2011
@@ -25,17 +25,14 @@ import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
-import java.util.Set;
import java.util.Map.Entry;
+import java.util.Set;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -63,9 +60,11 @@ import org.xml.sax.SAXException;
* hasCuePoints width, cuePoints, lasttimestamp, canSeekToEnd, datasize,
* duration, videosize, filesize, audiodatarate, hasAudio, stereo audiodelay
*/
-@Component @Service(Parser.class)
public class FLVParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = -8718013155719197679L;
+
private static int TYPE_METADATA = 0x12;
private static byte MASK_AUDIO = 1;
private static byte MASK_VIDEO = 4;
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/DcXMLParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/DcXMLParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/DcXMLParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/DcXMLParser.java Mon Oct 31 18:22:06 2011
@@ -16,21 +16,20 @@
*/
package org.apache.tika.parser.xml;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
import org.apache.tika.metadata.DublinCore;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
import org.apache.tika.sax.TeeContentHandler;
import org.xml.sax.ContentHandler;
/**
* Dublin Core metadata parser
*/
-@Component @Service(Parser.class)
public class DcXMLParser extends XMLParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = 4905318835463880819L;
+
private static ContentHandler getDublinCoreHandler(
Metadata metadata, String name, String element) {
return new ElementMetadataHandler(
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/XMLParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/XMLParser.java?rev=1195596&r1=1195595&r2=1195596&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/XMLParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/xml/XMLParser.java Mon Oct 31 18:22:06 2011
@@ -16,30 +16,35 @@
*/
package org.apache.tika.parser.xml;
-import org.apache.felix.scr.annotations.Component;
-import org.apache.felix.scr.annotations.Service;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.CloseShieldInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.sax.*;
+import org.apache.tika.sax.EmbeddedContentHandler;
+import org.apache.tika.sax.OfflineContentHandler;
+import org.apache.tika.sax.TaggedContentHandler;
+import org.apache.tika.sax.TextContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.HashSet;
-import java.util.Set;
-
/**
* XML parser.
*/
public class XMLParser extends AbstractParser {
+ /** Serial version UID */
+ private static final long serialVersionUID = -6028836725280212837L;
+
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
MediaType.application("xml"),