You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by gr...@apache.org on 2015/09/21 19:19:42 UTC

svn commit: r1704368 [2/2] - in /tika/trunk/tika-example/src: main/java/org/apache/tika/example/ test/java/org/apache/tika/example/

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -34,54 +37,52 @@ import org.apache.tika.metadata.Property
  */
 @SuppressWarnings("deprecation")
 public class MetadataAwareLuceneIndexer {
+    private Tika tika;
 
-	private Tika tika;
-
-	private IndexWriter writer;
-
-	public MetadataAwareLuceneIndexer(IndexWriter writer, Tika tika) {
-		this.writer = writer;
-		this.tika = tika;
-	}
-
-	public void indexContentSpecificMet(File file) throws Exception {
-		Metadata met = new Metadata();
-		try (InputStream is = new FileInputStream(file)) {
-			tika.parse(is, met);
-			Document document = new Document();
-			for (String key : met.names()) {
-				String[] values = met.getValues(key);
-				for (String val : values) {
-					document.add(new Field(key, val, Store.YES, Index.ANALYZED));
-				}
-				writer.addDocument(document);
-			}
-		}
-	}
-
-	public void indexWithDublinCore(File file) throws Exception {
-		Metadata met = new Metadata();
-		met.add(Metadata.CREATOR, "Manning");
-		met.add(Metadata.CREATOR, "Tika in Action");
-		met.set(Metadata.DATE, new Date());
-		met.set(Metadata.FORMAT, tika.detect(file));
-		met.set(DublinCore.SOURCE, file.toURI().toURL().toString());
-		met.add(Metadata.SUBJECT, "File");
-		met.add(Metadata.SUBJECT, "Indexing");
-		met.add(Metadata.SUBJECT, "Metadata");
-		met.set(Property.externalClosedChoise(Metadata.RIGHTS, "public",
-				"private"), "public");
-		try (InputStream is = new FileInputStream(file)) {
-			tika.parse(is, met);
-			Document document = new Document();
-			for (String key : met.names()) {
-				String[] values = met.getValues(key);
-				for (String val : values) {
-					document.add(new Field(key, val, Store.YES, Index.ANALYZED));
-				}
-				writer.addDocument(document);
-			}
-		}
-	}
+    private IndexWriter writer;
 
+    public MetadataAwareLuceneIndexer(IndexWriter writer, Tika tika) {
+        this.writer = writer;
+        this.tika = tika;
+    }
+
+    public void indexContentSpecificMet(File file) throws Exception {
+        Metadata met = new Metadata();
+        try (InputStream is = new FileInputStream(file)) {
+            tika.parse(is, met);
+            Document document = new Document();
+            for (String key : met.names()) {
+                String[] values = met.getValues(key);
+                for (String val : values) {
+                    document.add(new Field(key, val, Store.YES, Index.ANALYZED));
+                }
+                writer.addDocument(document);
+            }
+        }
+    }
+
+    public void indexWithDublinCore(File file) throws Exception {
+        Metadata met = new Metadata();
+        met.add(Metadata.CREATOR, "Manning");
+        met.add(Metadata.CREATOR, "Tika in Action");
+        met.set(Metadata.DATE, new Date());
+        met.set(Metadata.FORMAT, tika.detect(file));
+        met.set(DublinCore.SOURCE, file.toURI().toURL().toString());
+        met.add(Metadata.SUBJECT, "File");
+        met.add(Metadata.SUBJECT, "Indexing");
+        met.add(Metadata.SUBJECT, "Metadata");
+        met.set(Property.externalClosedChoise(Metadata.RIGHTS, "public",
+                "private"), "public");
+        try (InputStream is = new FileInputStream(file)) {
+            tika.parse(is, met);
+            Document document = new Document();
+            for (String key : met.names()) {
+                String[] values = met.getValues(key);
+                for (String val : values) {
+                    document.add(new Field(key, val, Store.YES, Index.ANALYZED));
+                }
+                writer.addDocument(document);
+            }
+        }
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -38,23 +41,23 @@ import static java.nio.charset.StandardC
  * Demonstrates how to call the different components within Tika: its
  * {@link Detector} framework (aka MIME identification and repository), its
  * {@link Parser} interface, its {@link LanguageIdentifier} and other goodies.
- * 
+ * <p>
  * It also shows the "easy way" via {@link AutoDetectParser}
  */
 public class MyFirstTika {
     public static void main(String[] args) throws Exception {
         String filename = args[0];
         TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
-        
+
         Metadata metadata = new Metadata();
         String text = parseUsingComponents(filename, tikaConfig, metadata);
         System.out.println("Parsed Metadata: ");
         System.out.println(metadata);
         System.out.println("Parsed Text: ");
         System.out.println(text);
-        
+
         System.out.println("-------------------------");
-        
+
         metadata = new Metadata();
         text = parseUsingAutoDetect(filename, tikaConfig, metadata);
         System.out.println("Parsed Metadata: ");
@@ -62,19 +65,20 @@ public class MyFirstTika {
         System.out.println("Parsed Text: ");
         System.out.println(text);
     }
-    
-    public static String parseUsingAutoDetect(String filename, TikaConfig tikaConfig, 
-            Metadata metadata) throws Exception {
+
+    public static String parseUsingAutoDetect(String filename, TikaConfig tikaConfig,
+                                              Metadata metadata) throws Exception {
         System.out.println("Handling using AutoDetectParser: [" + filename + "]");
-        
+
         AutoDetectParser parser = new AutoDetectParser(tikaConfig);
         ContentHandler handler = new BodyContentHandler();
         TikaInputStream stream = TikaInputStream.get(new File(filename), metadata);
         parser.parse(stream, handler, metadata, new ParseContext());
         return handler.toString();
     }
-    public static String parseUsingComponents(String filename, TikaConfig tikaConfig, 
-            Metadata metadata) throws Exception {    
+
+    public static String parseUsingComponents(String filename, TikaConfig tikaConfig,
+                                              Metadata metadata) throws Exception {
         MimeTypes mimeRegistry = tikaConfig.getMimeRepository();
 
         System.out.println("Examining: [" + filename + "]");
@@ -106,7 +110,7 @@ public class MyFirstTika {
         // Have the file parsed to get the content and metadata
         ContentHandler handler = new BodyContentHandler();
         parser.parse(stream, handler, metadata, new ParseContext());
-        
+
         return handler.toString();
     }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java Mon Sep 21 17:19:26 2015
@@ -14,6 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.tika.example;
 
 import java.io.IOException;
@@ -44,7 +45,7 @@ public class ParsingExample {
     /**
      * Example of how to use Tika's parseToString method to parse the content of a file,
      * and return any text found.
-     *
+     * <p>
      * Note: Tika.parseToString() will extract content from the outer container
      * document and any embedded/attached documents.
      *
@@ -60,20 +61,20 @@ public class ParsingExample {
     /**
      * Example of how to use Tika to parse a file when you do not know its file type
      * ahead of time.
-     *
+     * <p>
      * AutoDetectParser attempts to discover the file's type automatically, then call
      * the exact Parser built for that file type.
-     *
+     * <p>
      * The stream to be parsed by the Parser. In this case, we get a file from the
      * resources folder of this project.
-     *
+     * <p>
      * Handlers are used to get the exact information you want out of the host of
      * information gathered by Parsers. The body content handler, intuitively, extracts
      * everything that would go between HTML body tags.
-     *
+     * <p>
      * The Metadata object will be filled by the Parser with Metadata discovered about
      * the file being parsed.
-     *
+     * <p>
      * Note: This example will extract content from the outer document and all
      * embedded documents.  However, if you choose to use a {@link ParseContext},
      * make sure to set a {@link Parser} or else embedded content will not be
@@ -154,7 +155,6 @@ public class ParsingExample {
      */
     public List<Metadata> recursiveParserWrapperExample() throws IOException,
             SAXException, TikaException {
-
         Parser p = new AutoDetectParser();
         ContentHandlerFactory factory = new BasicContentHandlerFactory(
                 BasicContentHandlerFactory.HANDLER_TYPE.HTML, -1);
@@ -187,7 +187,7 @@ public class ParsingExample {
      */
     public String serializedRecursiveParserWrapperExample() throws IOException,
             SAXException, TikaException {
-        List metadataList = recursiveParserWrapperExample();
+        List<Metadata> metadataList = recursiveParserWrapperExample();
         StringWriter writer = new StringWriter();
         JsonMetadataList.toJson(metadataList, writer);
         return writer.toString();
@@ -195,7 +195,6 @@ public class ParsingExample {
 
 
     /**
-     *
      * @param outputPath -- output directory to place files
      * @return list of files created
      * @throws IOException
@@ -207,9 +206,9 @@ public class ParsingExample {
         InputStream stream = ParsingExample.class.getResourceAsStream("test_recursive_embedded.docx");
         ExtractEmbeddedFiles ex = new ExtractEmbeddedFiles();
         ex.extract(stream, outputPath);
-        List<Path> ret = new ArrayList<Path>();
+        List<Path> ret = new ArrayList<>();
         try (DirectoryStream<Path> dirStream = Files.newDirectoryStream(outputPath)) {
-            for (Path entry: dirStream) {
+            for (Path entry : dirStream) {
                 ret.add(entry);
             }
         }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -17,15 +20,13 @@ package org.apache.tika.example;
 import java.security.Key;
 
 public class Pharmacy {
+    private static Key key = null;
 
-	private static Key key = null;
-
-	public static Key getKey() {
-		return key;
-	}
-
-	public static void setKey(Key key) {
-		Pharmacy.key = key;
-	}
-
+    public static Key getKey() {
+        return key;
+    }
+
+    public static void setKey(Key key) {
+        Pharmacy.key = key;
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -26,27 +29,24 @@ import org.apache.tika.sax.TeeContentHan
 import org.xml.sax.ContentHandler;
 
 public class PrescriptionParser extends XMLParser {
+    private static final long serialVersionUID = 7690682277511967388L;
 
-	private static final long serialVersionUID = 7690682277511967388L;
-
-	@Override
-	protected ContentHandler getContentHandler(ContentHandler handler,
-			Metadata metadata, ParseContext context) {
-		String xpd = "http://example.com/2011/xpd";
-
-		ContentHandler doctor = new ElementMetadataHandler(xpd, "doctor",
-				metadata, "xpd:doctor");
-		ContentHandler patient = new ElementMetadataHandler(xpd, "patient",
-				metadata, "xpd:patient");
-
-		return new TeeContentHandler(super.getContentHandler(handler, metadata,
-				context), doctor, patient);
-	}
-
-	@Override
-	public Set<MediaType> getSupportedTypes(ParseContext context) {
-		return Collections.singleton(MediaType
-				.application("x-prescription+xml"));
-	}
-
+    @Override
+    protected ContentHandler getContentHandler(ContentHandler handler,
+                                               Metadata metadata, ParseContext context) {
+        String xpd = "http://example.com/2011/xpd";
+
+        ContentHandler doctor = new ElementMetadataHandler(xpd, "doctor",
+                metadata, "xpd:doctor");
+        ContentHandler patient = new ElementMetadataHandler(xpd, "patient",
+                metadata, "xpd:patient");
+
+        return new TeeContentHandler(super.getContentHandler(handler, metadata,
+                context), doctor, patient);
+    }
+
+    @Override
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return Collections.singleton(MediaType.application("x-prescription+xml"));
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -35,114 +38,108 @@ import org.apache.tika.metadata.DublinCo
 import org.apache.tika.metadata.Metadata;
 
 /**
- *
  * Builds on top of the LuceneIndexer and the Metadata discussions in Chapter 6
  * to output an RSS (or RDF) feed of files crawled by the LuceneIndexer within
  * the last N minutes.
  */
 @SuppressWarnings("deprecation")
 public class RecentFiles {
+    private IndexReader reader;
 
-	private IndexReader reader;
-
-	private SimpleDateFormat rssDateFormat = new SimpleDateFormat(
-			"E, dd MMM yyyy HH:mm:ss z", Locale.getDefault());
-
-	public String generateRSS(File indexFile) throws CorruptIndexException,
-			IOException {
-		StringBuffer output = new StringBuffer();
-		output.append(getRSSHeaders());
-		IndexSearcher searcher = null;
-		try {
-			reader = IndexReader.open(new SimpleFSDirectory(indexFile));
-			searcher = new IndexSearcher(reader);
-			GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
-			gc.setTime(new Date());
-			String nowDateTime = ISO8601.format(gc);
-			gc.add(java.util.GregorianCalendar.MINUTE, -5);
-			String fiveMinsAgo = ISO8601.format(gc);
-			TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(),
-					fiveMinsAgo, nowDateTime, true, true);
-			TopScoreDocCollector collector = TopScoreDocCollector.create(20,
-					true);
-			searcher.search(query, collector);
-			ScoreDoc[] hits = collector.topDocs().scoreDocs;
-			for (int i = 0; i < hits.length; i++) {
-				Document doc = searcher.doc(hits[i].doc);
-				output.append(getRSSItem(doc));
-			}
-
-		} finally {
-			if (reader != null) reader.close();
-			if (searcher != null) searcher.close();
-		}
-
-		output.append(getRSSFooters());
-		return output.toString();
-	}
-
-	public String getRSSItem(Document doc) {
-		StringBuffer output = new StringBuffer();
-		output.append("<item>");
-		output.append(emitTag("guid", doc.get(DublinCore.SOURCE.getName()),
-				"isPermalink", "true"));
-		output.append(emitTag("title", doc.get(Metadata.TITLE), null, null));
-		output.append(emitTag("link", doc.get(DublinCore.SOURCE.getName()),
-				null, null));
-		output.append(emitTag("author", doc.get(Metadata.CREATOR), null, null));
-		for (String topic : doc.getValues(Metadata.SUBJECT)) {
-			output.append(emitTag("category", topic, null, null));
-		}
-		output.append(emitTag("pubDate", rssDateFormat.format(ISO8601.parse(doc
-				.get(Metadata.DATE.toString()))), null, null));
-		output.append(emitTag("description", doc.get(Metadata.TITLE), null,
-				null));
-		output.append("</item>");
-		return output.toString();
-	}
-
-	public String getRSSHeaders() {
-		StringBuffer output = new StringBuffer();
-		output.append("<?xml version=\"1.0\" encoding=\"utf-8\">");
-		output.append("<rss version=\"2.0\">");
-		output.append("  <channel>");
-		output.append("     <title>Tika in Action: Recent Files Feed."
-				+ "</title>");
-		output.append("     <description>Chapter 6 Examples demonstrating "
-				+ "use of Tika Metadata for RSS.</description>");
-		output.append("     <link>tikainaction.rss</link>");
-		output.append("     <lastBuildDate>" + rssDateFormat.format(new Date())
-				+ "</lastBuildDate>");
-		output.append("     <generator>Manning Publications: Tika in Action"
-				+ "</generator>");
-		output.append("     <copyright>All Rights Reserved</copyright>");
-		return output.toString();
-	}
-
-	public String getRSSFooters() {
-		StringBuffer output = new StringBuffer();
-		output.append("   </channel>");
-		return output.toString();
-	}
-
-	private String emitTag(String tagName, String value, String attributeName,
-			String attributeValue) {
-		StringBuffer output = new StringBuffer();
-		output.append("<");
-		output.append(tagName);
-		if (attributeName != null) {
-			output.append(" ");
-			output.append(attributeName);
-			output.append("=\"");
-			output.append(attributeValue);
-			output.append("\"");
-		}
-		output.append(">");
-		output.append(value);
-		output.append("</");
-		output.append(tagName);
-		output.append(">");
-		return output.toString();
-	}
+    private SimpleDateFormat rssDateFormat = new SimpleDateFormat(
+            "E, dd MMM yyyy HH:mm:ss z", Locale.getDefault());
 
+    public String generateRSS(File indexFile) throws CorruptIndexException,
+            IOException {
+        StringBuffer output = new StringBuffer();
+        output.append(getRSSHeaders());
+        IndexSearcher searcher = null;
+        try {
+            reader = IndexReader.open(new SimpleFSDirectory(indexFile));
+            searcher = new IndexSearcher(reader);
+            GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
+            gc.setTime(new Date());
+            String nowDateTime = ISO8601.format(gc);
+            gc.add(java.util.GregorianCalendar.MINUTE, -5);
+            String fiveMinsAgo = ISO8601.format(gc);
+            TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(),
+                    fiveMinsAgo, nowDateTime, true, true);
+            TopScoreDocCollector collector = TopScoreDocCollector.create(20,
+                    true);
+            searcher.search(query, collector);
+            ScoreDoc[] hits = collector.topDocs().scoreDocs;
+            for (int i = 0; i < hits.length; i++) {
+                Document doc = searcher.doc(hits[i].doc);
+                output.append(getRSSItem(doc));
+            }
+
+        } finally {
+            if (reader != null) reader.close();
+            if (searcher != null) searcher.close();
+        }
+
+        output.append(getRSSFooters());
+        return output.toString();
+    }
+
+    public String getRSSItem(Document doc) {
+        StringBuilder output = new StringBuilder();
+        output.append("<item>");
+        output.append(emitTag("guid", doc.get(DublinCore.SOURCE.getName()),
+                "isPermalink", "true"));
+        output.append(emitTag("title", doc.get(Metadata.TITLE), null, null));
+        output.append(emitTag("link", doc.get(DublinCore.SOURCE.getName()),
+                null, null));
+        output.append(emitTag("author", doc.get(Metadata.CREATOR), null, null));
+        for (String topic : doc.getValues(Metadata.SUBJECT)) {
+            output.append(emitTag("category", topic, null, null));
+        }
+        output.append(emitTag("pubDate", rssDateFormat.format(ISO8601.parse(doc
+                .get(Metadata.DATE.toString()))), null, null));
+        output.append(emitTag("description", doc.get(Metadata.TITLE), null,
+                null));
+        output.append("</item>");
+        return output.toString();
+    }
+
+    public String getRSSHeaders() {
+        StringBuilder output = new StringBuilder();
+        output.append("<?xml version=\"1.0\" encoding=\"utf-8\">");
+        output.append("<rss version=\"2.0\">");
+        output.append("  <channel>");
+        output.append("     <title>Tika in Action: Recent Files Feed.</title>");
+        output.append("     <description>Chapter 6 Examples demonstrating "
+                + "use of Tika Metadata for RSS.</description>");
+        output.append("     <link>tikainaction.rss</link>");
+        output.append("     <lastBuildDate>");
+        output.append(rssDateFormat.format(new Date()));
+        output.append("</lastBuildDate>");
+        output.append("     <generator>Manning Publications: Tika in Action</generator>");
+        output.append("     <copyright>All Rights Reserved</copyright>");
+        return output.toString();
+    }
+
+    public String getRSSFooters() {
+        return "   </channel>";
+    }
+
+    private String emitTag(String tagName, String value, String attributeName,
+                           String attributeValue) {
+        StringBuilder output = new StringBuilder();
+        output.append("<");
+        output.append(tagName);
+        if (attributeName != null) {
+            output.append(" ");
+            output.append(attributeName);
+            output.append("=\"");
+            output.append(attributeValue);
+            output.append("\"");
+        }
+        output.append(">");
+        output.append(value);
+        output.append("</");
+        output.append(tagName);
+        output.append(">");
+        return output.toString();
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -43,100 +46,92 @@ import static java.nio.charset.StandardC
  * Demonstrates Tika and its ability to sense symlinks.
  */
 public class RollbackSoftware {
-
-	public static void main(String[] args) throws Exception {
-		RollbackSoftware r = new RollbackSoftware();
-		r.rollback(new File(args[0]));
-	}
-
-	public void rollback(File deployArea) throws IOException, SAXException,
-			TikaException {
-		LinkContentHandler handler = new LinkContentHandler();
-		Metadata met = new Metadata();
-		DeploymentAreaParser parser = new DeploymentAreaParser();
-		parser.parse(IOUtils.toInputStream(deployArea.getAbsolutePath(), UTF_8),
-				handler, met);
-		List<Link> links = handler.getLinks();
-		if (links.size() < 2)
-			throw new IOException("Must have installed at least 2 versions!");
-		Collections.sort(links, new Comparator<Link>() {
-			public int compare(Link o1, Link o2) {
-				return o1.getText().compareTo(o2.getText());
-			}
-		});
-
-		this.updateVersion(links.get(links.size() - 2).getText());
-
-	}
-
-	private void updateVersion(String version) {
-		System.out.println("Rolling back to version: [" + version + "]");
-	}
-
-	class DeploymentAreaParser implements Parser {
-
-		private static final long serialVersionUID = -2356647405087933468L;
-
-		/*
-		 * (non-Javadoc)
-		 * 
-		 * @see org.apache.tika.parser.Parser#getSupportedTypes(
-		 * org.apache.tika.parser.ParseContext)
-		 */
-		public Set<MediaType> getSupportedTypes(ParseContext context) {
-			return Collections.unmodifiableSet(new HashSet<MediaType>(Arrays
-					.asList(MediaType.TEXT_PLAIN)));
-		}
-
-		/*
-		 * (non-Javadoc)
-		 * 
-		 * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
-		 * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
-		 */
-		public void parse(InputStream is, ContentHandler handler,
-				Metadata metadata) throws IOException, SAXException,
-				TikaException {
-			parse(is, handler, metadata, new ParseContext());
-		}
-
-		/*
-		 * (non-Javadoc)
-		 * 
-		 * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
-		 * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
-		 * org.apache.tika.parser.ParseContext)
-		 */
-
-		public void parse(InputStream is, ContentHandler handler,
-				Metadata metadata, ParseContext context) throws IOException,
-				SAXException, TikaException {
-
-			File deployArea = new File(IOUtils.toString(is, UTF_8));
-			File[] versions = deployArea.listFiles(new FileFilter() {
-
-				public boolean accept(File pathname) {
-					return !pathname.getName().startsWith("current");
-				}
-			});
-
-			XHTMLContentHandler xhtml = new XHTMLContentHandler(handler,
-					metadata);
-			xhtml.startDocument();
-			for (File v : versions) {
-				if (isSymlink(v))
-					continue;
-				xhtml.startElement("a", "href", v.toURI().toURL().toExternalForm());
-				xhtml.characters(v.getName());
-				xhtml.endElement("a");
-			}
-
-		}
-
-	}
-
-	private boolean isSymlink(File f) throws IOException {
-		return !f.getAbsolutePath().equals(f.getCanonicalPath());
-	}
-
+    public static void main(String[] args) throws Exception {
+        RollbackSoftware r = new RollbackSoftware();
+        r.rollback(new File(args[0]));
+    }
+
+    public void rollback(File deployArea) throws IOException, SAXException,
+            TikaException {
+        LinkContentHandler handler = new LinkContentHandler();
+        Metadata met = new Metadata();
+        DeploymentAreaParser parser = new DeploymentAreaParser();
+        parser.parse(IOUtils.toInputStream(deployArea.getAbsolutePath(), UTF_8),
+                handler, met);
+        List<Link> links = handler.getLinks();
+        if (links.size() < 2)
+            throw new IOException("Must have installed at least 2 versions!");
+        Collections.sort(links, new Comparator<Link>() {
+            public int compare(Link o1, Link o2) {
+                return o1.getText().compareTo(o2.getText());
+            }
+        });
+
+        this.updateVersion(links.get(links.size() - 2).getText());
+    }
+
+    private void updateVersion(String version) {
+        System.out.println("Rolling back to version: [" + version + "]");
+    }
+
+    class DeploymentAreaParser implements Parser {
+        private static final long serialVersionUID = -2356647405087933468L;
+
+        /*
+         * (non-Javadoc)
+         *
+         * @see org.apache.tika.parser.Parser#getSupportedTypes(
+         * org.apache.tika.parser.ParseContext)
+         */
+        public Set<MediaType> getSupportedTypes(ParseContext context) {
+            return Collections.unmodifiableSet(new HashSet<MediaType>(Arrays
+                    .asList(MediaType.TEXT_PLAIN)));
+        }
+
+        /*
+         * (non-Javadoc)
+         *
+         * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+         * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
+         */
+        public void parse(InputStream is, ContentHandler handler,
+                          Metadata metadata) throws IOException, SAXException,
+                TikaException {
+            parse(is, handler, metadata, new ParseContext());
+        }
+
+        /*
+         * (non-Javadoc)
+         *
+         * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+         * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+         * org.apache.tika.parser.ParseContext)
+         */
+        public void parse(InputStream is, ContentHandler handler,
+                          Metadata metadata, ParseContext context) throws IOException,
+                SAXException, TikaException {
+
+            File deployArea = new File(IOUtils.toString(is, UTF_8));
+            File[] versions = deployArea.listFiles(new FileFilter() {
+                public boolean accept(File pathname) {
+                    return !pathname.getName().startsWith("current");
+                }
+            });
+
+            XHTMLContentHandler xhtml = new XHTMLContentHandler(handler,
+                    metadata);
+            xhtml.startDocument();
+            for (File v : versions) {
+                if (isSymlink(v))
+                    continue;
+                xhtml.startElement("a", "href", v.toURI().toURL().toExternalForm());
+                xhtml.characters(v.getName());
+                xhtml.endElement("a");
+            }
+        }
+    }
+
+    private boolean isSymlink(File f) throws IOException {
+        return !f.getAbsolutePath().equals(f.getCanonicalPath());
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -15,20 +18,19 @@
 package org.apache.tika.example;
 
 import java.io.File;
+
 import org.apache.tika.Tika;
 
 public class SimpleTextExtractor {
-
-	public static void main(String[] args) throws Exception {
-		// Create a Tika instance with the default configuration
-		Tika tika = new Tika();
-
-		// Parse all given files and print out the extracted
-		// text content
-		for (String file : args) {
-			String text = tika.parseToString(new File(file));
-			System.out.print(text);
-		}
-	}
-
+    public static void main(String[] args) throws Exception {
+        // Create a Tika instance with the default configuration
+        Tika tika = new Tika();
+
+        // Parse all given files and print out the extracted
+        // text content
+        for (String file : args) {
+            String text = tika.parseToString(new File(file));
+            System.out.print(text);
+        }
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -19,14 +22,12 @@ import java.io.File;
 import org.apache.tika.Tika;
 
 public class SimpleTypeDetector {
+    public static void main(String[] args) throws Exception {
+        Tika tika = new Tika();
 
-	public static void main(String[] args) throws Exception {
-		Tika tika = new Tika();
-
-		for (String file : args) {
-			String type = tika.detect(new File(file));
-			System.out.println(file + ": " + type);
-		}
-	}
-
+        for (String file : args) {
+            String type = tika.detect(new File(file));
+            System.out.println(file + ": " + type);
+        }
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -26,14 +29,12 @@ import org.springframework.context.suppo
 import static java.nio.charset.StandardCharsets.UTF_8;
 
 public class SpringExample {
-
-	public static void main(String[] args) throws Exception {
-		ApplicationContext context = new ClassPathXmlApplicationContext(
-				new String[] { "org/apache/tika/example/spring.xml" });
-		Parser parser = context.getBean("tika", Parser.class);
-		parser.parse(new ByteArrayInputStream("Hello, World!".getBytes(UTF_8)),
-				new WriteOutContentHandler(System.out), new Metadata(),
-				new ParseContext());
-	}
-
+    public static void main(String[] args) throws Exception {
+        ApplicationContext context = new ClassPathXmlApplicationContext(
+                new String[]{"org/apache/tika/example/spring.xml"});
+        Parser parser = context.getBean("tika", Parser.class);
+        parser.parse(new ByteArrayInputStream("Hello, World!".getBytes(UTF_8)),
+                new WriteOutContentHandler(System.out), new Metadata(),
+                new ParseContext());
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -52,149 +55,147 @@ import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
 
 public class TIAParsingExample {
-
-	public static String parseToStringExample() throws Exception {
-		File document = new File("example.doc");
-		String content = new Tika().parseToString(document);
-		System.out.print(content);
-		return content;
-	}
-
-	public static void parseToReaderExample() throws Exception {
-		File document = new File("example.doc");
-		try (Reader reader = new Tika().parse(document)) {
-			char[] buffer = new char[1000];
-			int n = reader.read(buffer);
-			while (n != -1) {
-				System.out.append(CharBuffer.wrap(buffer, 0, n));
-				n = reader.read(buffer);
-			}
-		}
-	}
-
-	public static void parseFileInputStream(String filename) throws Exception {
-		Parser parser = new AutoDetectParser();
-		ContentHandler handler = new DefaultHandler();
-		Metadata metadata = new Metadata();
-		ParseContext context = new ParseContext();
-		try (InputStream stream = new FileInputStream(new File(filename))) {
-			parser.parse(stream, handler, metadata, context);
-		}
-	}
-
-	public static void parseURLStream(String address) throws Exception {
-		Parser parser = new AutoDetectParser();
-		ContentHandler handler = new DefaultHandler();
-		Metadata metadata = new Metadata();
-		ParseContext context = new ParseContext();
-		try (InputStream stream = new GZIPInputStream(new URL(address).openStream())) {
-			parser.parse(stream, handler, metadata, context);
-		}
-	}
-
-	public static void parseTikaInputStream(String filename) throws Exception {
-		Parser parser = new AutoDetectParser();
-		ContentHandler handler = new DefaultHandler();
-		Metadata metadata = new Metadata();
-		ParseContext context = new ParseContext();
-		try (InputStream stream = TikaInputStream.get(new File(filename))) {
-			parser.parse(stream, handler, metadata, context);
-		}
-	}
-
-	public static File tikaInputStreamGetFile(String filename) throws Exception {
-		try (InputStream stream = TikaInputStream.get(new File(filename))) {
-			TikaInputStream tikaInputStream = TikaInputStream.get(stream);
-			File file = tikaInputStream.getFile();
-			return file;
-		}
-	}
-
-	public static void useHtmlParser() throws Exception {
-		InputStream stream = new ByteArrayInputStream(new byte[0]);
-		ContentHandler handler = new DefaultHandler();
-		Metadata metadata = new Metadata();
-		ParseContext context = new ParseContext();
-		Parser parser = new HtmlParser();
-		parser.parse(stream, handler, metadata, context);
-	}
-
-	public static void useCompositeParser() throws Exception {
-		InputStream stream = new ByteArrayInputStream(new byte[0]);
-		ContentHandler handler = new DefaultHandler();
-		ParseContext context = new ParseContext();
-		Map<MediaType, Parser> parsersByType = new HashMap<MediaType, Parser>();
-		parsersByType.put(MediaType.parse("text/html"), new HtmlParser());
-		parsersByType.put(MediaType.parse("application/xml"), new XMLParser());
-
-		CompositeParser parser = new CompositeParser();
-		parser.setParsers(parsersByType);
-		parser.setFallback(new TXTParser());
-
-		Metadata metadata = new Metadata();
-		metadata.set(Metadata.CONTENT_TYPE, "text/html");
-		parser.parse(stream, handler, metadata, context);
-	}
-
-	public static void useAutoDetectParser() throws Exception {
-		InputStream stream = new ByteArrayInputStream(new byte[0]);
-		ContentHandler handler = new DefaultHandler();
-		Metadata metadata = new Metadata();
-		ParseContext context = new ParseContext();
-		Parser parser = new AutoDetectParser();
-		parser.parse(stream, handler, metadata, context);
-	}
-
-	public static void testTeeContentHandler(String filename) throws Exception {
-		InputStream stream = new ByteArrayInputStream(new byte[0]);
-		Metadata metadata = new Metadata();
-		ParseContext context = new ParseContext();
-		Parser parser = new AutoDetectParser();
-		LinkContentHandler linkCollector = new LinkContentHandler();
-		try (OutputStream output = new FileOutputStream(new File(filename))) {
-			ContentHandler handler = new TeeContentHandler(
-					new BodyContentHandler(output), linkCollector);
-			parser.parse(stream, handler, metadata, context);
-		}
-	}
-
-	public static void testLocale() throws Exception {
-		InputStream stream = new ByteArrayInputStream(new byte[0]);
-		ContentHandler handler = new DefaultHandler();
-		Metadata metadata = new Metadata();
-		Parser parser = new AutoDetectParser();
-		ParseContext context = new ParseContext();
-		context.set(Locale.class, Locale.ENGLISH);
-		parser.parse(stream, handler, metadata, context);
-	}
-
-	public static void testHtmlMapper() throws Exception {
-		InputStream stream = new ByteArrayInputStream(new byte[0]);
-		ContentHandler handler = new DefaultHandler();
-		Metadata metadata = new Metadata();
-		Parser parser = new AutoDetectParser();
-		ParseContext context = new ParseContext();
-		context.set(HtmlMapper.class, new IdentityHtmlMapper());
-		parser.parse(stream, handler, metadata, context);
-	}
-
-	public static void testCompositeDocument() throws Exception {
-		InputStream stream = new ByteArrayInputStream(new byte[0]);
-		ContentHandler handler = new DefaultHandler();
-		Metadata metadata = new Metadata();
-		Parser parser = new AutoDetectParser();
-		ParseContext context = new ParseContext();
-		context.set(Parser.class, new ParserDecorator(parser) {
-			private static final long serialVersionUID = 4424210691523343833L;
-
-			@Override
-			public void parse(InputStream stream, ContentHandler handler,
-					Metadata metadata, ParseContext context)
-					throws IOException, SAXException, TikaException {
-				// custom processing of the component document
-			}
-		});
-		parser.parse(stream, handler, metadata, context);
-	}
-
+    public static String parseToStringExample() throws Exception {
+        File document = new File("example.doc");
+        String content = new Tika().parseToString(document);
+        System.out.print(content);
+        return content;
+    }
+
+    public static void parseToReaderExample() throws Exception {
+        File document = new File("example.doc");
+        try (Reader reader = new Tika().parse(document)) {
+            char[] buffer = new char[1000];
+            int n = reader.read(buffer);
+            while (n != -1) {
+                System.out.append(CharBuffer.wrap(buffer, 0, n));
+                n = reader.read(buffer);
+            }
+        }
+    }
+
+    public static void parseFileInputStream(String filename) throws Exception {
+        Parser parser = new AutoDetectParser();
+        ContentHandler handler = new DefaultHandler();
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+        try (InputStream stream = new FileInputStream(new File(filename))) {
+            parser.parse(stream, handler, metadata, context);
+        }
+    }
+
+    public static void parseURLStream(String address) throws Exception {
+        Parser parser = new AutoDetectParser();
+        ContentHandler handler = new DefaultHandler();
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+        try (InputStream stream = new GZIPInputStream(new URL(address).openStream())) {
+            parser.parse(stream, handler, metadata, context);
+        }
+    }
+
+    public static void parseTikaInputStream(String filename) throws Exception {
+        Parser parser = new AutoDetectParser();
+        ContentHandler handler = new DefaultHandler();
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+        try (InputStream stream = TikaInputStream.get(new File(filename))) {
+            parser.parse(stream, handler, metadata, context);
+        }
+    }
+
+    public static File tikaInputStreamGetFile(String filename) throws Exception {
+        try (InputStream stream = TikaInputStream.get(new File(filename))) {
+            TikaInputStream tikaInputStream = TikaInputStream.get(stream);
+            File file = tikaInputStream.getFile();
+            return file;
+        }
+    }
+
+    public static void useHtmlParser() throws Exception {
+        InputStream stream = new ByteArrayInputStream(new byte[0]);
+        ContentHandler handler = new DefaultHandler();
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+        Parser parser = new HtmlParser();
+        parser.parse(stream, handler, metadata, context);
+    }
+
+    public static void useCompositeParser() throws Exception {
+        InputStream stream = new ByteArrayInputStream(new byte[0]);
+        ContentHandler handler = new DefaultHandler();
+        ParseContext context = new ParseContext();
+        Map<MediaType, Parser> parsersByType = new HashMap<MediaType, Parser>();
+        parsersByType.put(MediaType.parse("text/html"), new HtmlParser());
+        parsersByType.put(MediaType.parse("application/xml"), new XMLParser());
+
+        CompositeParser parser = new CompositeParser();
+        parser.setParsers(parsersByType);
+        parser.setFallback(new TXTParser());
+
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "text/html");
+        parser.parse(stream, handler, metadata, context);
+    }
+
+    public static void useAutoDetectParser() throws Exception {
+        InputStream stream = new ByteArrayInputStream(new byte[0]);
+        ContentHandler handler = new DefaultHandler();
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+        Parser parser = new AutoDetectParser();
+        parser.parse(stream, handler, metadata, context);
+    }
+
+    public static void testTeeContentHandler(String filename) throws Exception {
+        InputStream stream = new ByteArrayInputStream(new byte[0]);
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+        Parser parser = new AutoDetectParser();
+        LinkContentHandler linkCollector = new LinkContentHandler();
+        try (OutputStream output = new FileOutputStream(new File(filename))) {
+            ContentHandler handler = new TeeContentHandler(
+                    new BodyContentHandler(output), linkCollector);
+            parser.parse(stream, handler, metadata, context);
+        }
+    }
+
+    public static void testLocale() throws Exception {
+        InputStream stream = new ByteArrayInputStream(new byte[0]);
+        ContentHandler handler = new DefaultHandler();
+        Metadata metadata = new Metadata();
+        Parser parser = new AutoDetectParser();
+        ParseContext context = new ParseContext();
+        context.set(Locale.class, Locale.ENGLISH);
+        parser.parse(stream, handler, metadata, context);
+    }
+
+    public static void testHtmlMapper() throws Exception {
+        InputStream stream = new ByteArrayInputStream(new byte[0]);
+        ContentHandler handler = new DefaultHandler();
+        Metadata metadata = new Metadata();
+        Parser parser = new AutoDetectParser();
+        ParseContext context = new ParseContext();
+        context.set(HtmlMapper.class, new IdentityHtmlMapper());
+        parser.parse(stream, handler, metadata, context);
+    }
+
+    public static void testCompositeDocument() throws Exception {
+        InputStream stream = new ByteArrayInputStream(new byte[0]);
+        ContentHandler handler = new DefaultHandler();
+        Metadata metadata = new Metadata();
+        Parser parser = new AutoDetectParser();
+        ParseContext context = new ParseContext();
+        context.set(Parser.class, new ParserDecorator(parser) {
+            private static final long serialVersionUID = 4424210691523343833L;
+
+            @Override
+            public void parse(InputStream stream, ContentHandler handler,
+                              Metadata metadata, ParseContext context)
+                    throws IOException, SAXException, TikaException {
+                // custom processing of the component document
+            }
+        });
+        parser.parse(stream, handler, metadata, context);
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -25,87 +28,80 @@ import org.apache.tika.exception.TikaExc
 import org.apache.tika.metadata.Metadata;
 
 /**
- * 
  * Generates document summaries for corpus analysis in the Open Relevance
  * project.
- * 
  */
 @SuppressWarnings("deprecation")
 public class TrecDocumentGenerator {
-
-	public TrecDocument summarize(File file) throws FileNotFoundException,
-			IOException, TikaException {
-		Tika tika = new Tika(); 
-		Metadata met = new Metadata();
-
-		String contents = tika.parseToString(new FileInputStream(file), met);
-		return new TrecDocument(met.get(Metadata.RESOURCE_NAME_KEY), contents,
-				met.getDate(Metadata.DATE)); 
-
-	}
-
-	// copied from
-	// http://svn.apache.org/repos/asf/lucene/openrelevance/trunk/src/java/org/
-	// apache/orp/util/TrecDocument.java
-	// since the ORP jars aren't published anywhere
-	class TrecDocument {
-		private CharSequence docname;
-		private CharSequence body;
-		private Date date;
-
-		public TrecDocument(CharSequence docname, CharSequence body, Date date) {
-			this.docname = docname;
-			this.body = body;
-			this.date = date;
-		}
-
-		public TrecDocument() {
-		}
-
-		/**
-		 * @return the docname
-		 */
-		public CharSequence getDocname() {
-			return docname;
-		}
-
-		/**
-		 * @param docname
-		 *            the docname to set
-		 */
-		public void setDocname(CharSequence docname) {
-			this.docname = docname;
-		}
-
-		/**
-		 * @return the body
-		 */
-		public CharSequence getBody() {
-			return body;
-		}
-
-		/**
-		 * @param body
-		 *            the body to set
-		 */
-		public void setBody(CharSequence body) {
-			this.body = body;
-		}
-
-		/**
-		 * @return the date
-		 */
-		public Date getDate() {
-			return date;
-		}
-
-		/**
-		 * @param date
-		 *            the date to set
-		 */
-		public void setDate(Date date) {
-			this.date = date;
-		}
-	}
-
+    public TrecDocument summarize(File file) throws FileNotFoundException,
+            IOException, TikaException {
+        Tika tika = new Tika();
+        Metadata met = new Metadata();
+
+        String contents = tika.parseToString(new FileInputStream(file), met);
+        return new TrecDocument(met.get(Metadata.RESOURCE_NAME_KEY), contents,
+                met.getDate(Metadata.DATE));
+
+    }
+
+    // copied from
+    // http://svn.apache.org/repos/asf/lucene/openrelevance/trunk/src/java/org/
+    // apache/orp/util/TrecDocument.java
+    // since the ORP jars aren't published anywhere
+    class TrecDocument {
+        private CharSequence docname;
+        private CharSequence body;
+        private Date date;
+
+        public TrecDocument(CharSequence docname, CharSequence body, Date date) {
+            this.docname = docname;
+            this.body = body;
+            this.date = date;
+        }
+
+        public TrecDocument() {
+        }
+
+        /**
+         * @return the docname
+         */
+        public CharSequence getDocname() {
+            return docname;
+        }
+
+        /**
+         * @param docname the docname to set
+         */
+        public void setDocname(CharSequence docname) {
+            this.docname = docname;
+        }
+
+        /**
+         * @return the body
+         */
+        public CharSequence getBody() {
+            return body;
+        }
+
+        /**
+         * @param body the body to set
+         */
+        public void setBody(CharSequence body) {
+            this.body = body;
+        }
+
+        /**
+         * @return the date
+         */
+        public Date getDate() {
+            return date;
+        }
+
+        /**
+         * @param date the date to set
+         */
+        public void setDate(Date date) {
+            this.date = date;
+        }
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,34 +17,29 @@
 
 package org.apache.tika.example;
 
-//JDK imports
 import java.io.IOException;
 import java.util.Collections;
 import java.util.zip.ZipEntry;
 import java.util.zip.ZipFile;
 
 /**
- * 
- *
  * Example code listing from Chapter 1. Lists a zip file's entries using JDK's
  * standard APIs.
- *
  */
 public class ZipListFiles {
-	public static void main(String[] args) throws Exception {
-		if (args.length > 0) {
-			for (String file : args) {
-				System.out.println("Files in " + file + " file:");
-				listZipEntries(file);
-			}
-		}
-	}
-
-	public static void listZipEntries(String path) throws IOException {
-		ZipFile zip = new ZipFile(path);
-		for (ZipEntry entry : Collections.list(zip.entries())) {
-			System.out.println(entry.getName());
-		}
-	}
+    public static void main(String[] args) throws Exception {
+        if (args.length > 0) {
+            for (String file : args) {
+                System.out.println("Files in " + file + " file:");
+                listZipEntries(file);
+            }
+        }
+    }
 
-}
\ No newline at end of file
+    public static void listZipEntries(String path) throws IOException {
+        ZipFile zip = new ZipFile(path);
+        for (ZipEntry entry : Collections.list(zip.entries())) {
+            System.out.println(entry.getName());
+        }
+    }
+}

Modified: tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java (original)
+++ tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,18 +17,14 @@
 
 package org.apache.tika.example;
 
+import org.junit.Test;
 
 import static org.junit.Assert.assertEquals;
 
-import org.junit.Test;
-
 @SuppressWarnings("deprecation")
 public class AdvancedTypeDetectorTest {
-
-	@Test
-	public void testDetectWithCustomConfig() throws Exception {
-		assertEquals("application/xml",
-				AdvancedTypeDetector.detectWithCustomConfig("pom.xml"));
-	}
-
+    @Test
+    public void testDetectWithCustomConfig() throws Exception {
+        assertEquals("application/xml", AdvancedTypeDetector.detectWithCustomConfig("pom.xml"));
+    }
 }