You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by gr...@apache.org on 2015/09/21 19:19:42 UTC

svn commit: r1704368 [1/2] - in /tika/trunk/tika-example/src: main/java/org/apache/tika/example/ test/java/org/apache/tika/example/

Author: grossws
Date: Mon Sep 21 17:19:26 2015
New Revision: 1704368

URL: http://svn.apache.org/viewvc?rev=1704368&view=rev
Log:
Fix license headers and reformat in tika-example

Apache License v2.0 headers are changed to recommended[1] (as used in o.a.tika.Tika).
Reformat from tabs to 4 spaces.
Added paragraphs in javadoc.
Trivial migrations to java 7 (like diamond operator).
Removed some extra lines to have consistent formatting in examples.

[1]: http://www.apache.org/legal/src-headers.html#headers

Modified:
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java
    tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -24,32 +27,30 @@ import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MimeTypesFactory;
 
 public class AdvancedTypeDetector {
-
-	public static String detectWithCustomConfig(String name) throws Exception {
-		String config = "/org/apache/tika/mime/tika-mimetypes.xml";
-		Tika tika = new Tika(MimeTypesFactory.create(config));
-		return tika.detect(name);
-	}
-
-	public static String detectWithCustomDetector(String name) throws Exception {
-		String config = "/org/apache/tika/mime/tika-mimetypes.xml";
-		Detector detector = MimeTypesFactory.create(config);
-
-		Detector custom = new Detector() {
-			private static final long serialVersionUID = -5420638839201540749L;
-
-			public MediaType detect(InputStream input, Metadata metadata) {
-				String type = metadata.get("my-custom-type-override");
-				if (type != null) {
-					return MediaType.parse(type);
-				} else {
-					return MediaType.OCTET_STREAM;
-				}
-			}
-		};
-
-		Tika tika = new Tika(new CompositeDetector(custom, detector));
-		return tika.detect(name);
-	}
-
+    public static String detectWithCustomConfig(String name) throws Exception {
+        String config = "/org/apache/tika/mime/tika-mimetypes.xml";
+        Tika tika = new Tika(MimeTypesFactory.create(config));
+        return tika.detect(name);
+    }
+
+    public static String detectWithCustomDetector(String name) throws Exception {
+        String config = "/org/apache/tika/mime/tika-mimetypes.xml";
+        Detector detector = MimeTypesFactory.create(config);
+
+        Detector custom = new Detector() {
+            private static final long serialVersionUID = -5420638839201540749L;
+
+            public MediaType detect(InputStream input, Metadata metadata) {
+                String type = metadata.get("my-custom-type-override");
+                if (type != null) {
+                    return MediaType.parse(type);
+                } else {
+                    return MediaType.OCTET_STREAM;
+                }
+            }
+        };
+
+        Tika tika = new Tika(new CompositeDetector(custom, detector));
+        return tika.detect(name);
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java Mon Sep 21 17:19:26 2015
@@ -14,6 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
 package org.apache.tika.example;
 
 import java.io.IOException;
@@ -36,7 +37,7 @@ import org.xml.sax.SAXException;
 
 /**
  * Examples of using different Content Handlers to
- *  get different parts of the file's contents 
+ * get different parts of the file's contents
  */
 public class ContentHandlerExample {
     /**
@@ -67,10 +68,10 @@ public class ContentHandlerExample {
             return handler.toString();
         }
     }
-    
+
     /**
      * Example of extracting just the body as HTML, without the
-     *  head part, as a string
+     * head part, as a string
      */
     public String parseBodyToHTML() throws IOException, SAXException, TikaException {
         ContentHandler handler = new BodyContentHandler(
@@ -83,16 +84,15 @@ public class ContentHandlerExample {
             return handler.toString();
         }
     }
-    
+
     /**
      * Example of extracting just one part of the document's body,
-     *  as HTML as a string, excluding the rest
+     * as HTML as a string, excluding the rest
      */
     public String parseOnePartToHTML() throws IOException, SAXException, TikaException {
         // Only get things under html -> body -> div (class=header)
         XPathParser xhtmlParser = new XPathParser("xhtml", XHTMLContentHandler.XHTML);
-        Matcher divContentMatcher = xhtmlParser.parse(
-                "/xhtml:html/xhtml:body/xhtml:div/descendant::node()");        
+        Matcher divContentMatcher = xhtmlParser.parse("/xhtml:html/xhtml:body/xhtml:div/descendant::node()");
         ContentHandler handler = new MatchingContentHandler(
                 new ToXMLContentHandler(), divContentMatcher);
 
@@ -103,25 +103,26 @@ public class ContentHandlerExample {
             return handler.toString();
         }
     }
-    
+
     protected final int MAXIMUM_TEXT_CHUNK_SIZE = 40;
+
     /**
      * Example of extracting the plain text in chunks, with each chunk
-     *  of no more than a certain maximum size
+     * of no more than a certain maximum size
      */
     public List<String> parseToPlainTextChunks() throws IOException, SAXException, TikaException {
-        final List<String> chunks = new ArrayList<String>();
+        final List<String> chunks = new ArrayList<>();
         chunks.add("");
         ContentHandlerDecorator handler = new ContentHandlerDecorator() {
             @Override
             public void characters(char[] ch, int start, int length) {
-                String lastChunk = chunks.get(chunks.size()-1);
+                String lastChunk = chunks.get(chunks.size() - 1);
                 String thisStr = new String(ch, start, length);
-                
-                if (lastChunk.length()+length > MAXIMUM_TEXT_CHUNK_SIZE) {
+
+                if (lastChunk.length() + length > MAXIMUM_TEXT_CHUNK_SIZE) {
                     chunks.add(thisStr);
                 } else {
-                    chunks.set(chunks.size()-1, lastChunk+thisStr);
+                    chunks.set(chunks.size() - 1, lastChunk + thisStr);
                 }
             }
         };

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -22,28 +25,25 @@ import org.apache.tika.mime.MimeTypes;
 import org.apache.tika.mime.MimeTypesFactory;
 
 public class CustomMimeInfo {
-
-	public static String customMimeInfo() throws Exception {
-		String path = "file:///path/to/prescription-type.xml";
-		MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
-		Tika tika = new Tika(typeDatabase);
-		String type = tika.detect("/path/to/prescription.xpd");
-		return type;
-	}
-
-	public static String customCompositeDetector() throws Exception {
-		String path = "file:///path/to/prescription-type.xml";
-		MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
-		Tika tika = new Tika(new CompositeDetector(typeDatabase,
-				new EncryptedPrescriptionDetector()));
-		String type = tika.detect("/path/to/tmp/prescription.xpd");
-		return type;
-	}
-
-	public static void main(String[] args) throws Exception {
-		System.out.println("customMimeInfo=" + customMimeInfo());
-		System.out.println("customCompositeDetector="
-				+ customCompositeDetector());
-	}
-
+    public static String customMimeInfo() throws Exception {
+        String path = "file:///path/to/prescription-type.xml";
+        MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
+        Tika tika = new Tika(typeDatabase);
+        String type = tika.detect("/path/to/prescription.xpd");
+        return type;
+    }
+
+    public static String customCompositeDetector() throws Exception {
+        String path = "file:///path/to/prescription-type.xml";
+        MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
+        Tika tika = new Tika(new CompositeDetector(typeDatabase,
+                new EncryptedPrescriptionDetector()));
+        String type = tika.detect("/path/to/tmp/prescription.xpd");
+        return type;
+    }
+
+    public static void main(String[] args) throws Exception {
+        System.out.println("customMimeInfo=" + customMimeInfo());
+        System.out.println("customCompositeDetector=" + customCompositeDetector());
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -17,14 +20,10 @@ package org.apache.tika.example;
 import org.apache.tika.cli.TikaCLI;
 
 /**
- *
  * Print the supported Tika Metadata models and their fields.
- *
  */
 public class DescribeMetadata {
-
-	public static void main(String[] args) throws Exception {
-		TikaCLI.main(new String[] { "--list-met-models" });
-	}
-
+    public static void main(String[] args) throws Exception {
+        TikaCLI.main(new String[]{"--list-met-models"});
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -16,7 +19,7 @@ package org.apache.tika.example;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
@@ -40,101 +43,101 @@ import static java.nio.charset.StandardC
  */
 public class DirListParser implements Parser {
 
-	private static final long serialVersionUID = 2717930544410610735L;
+    private static final long serialVersionUID = 2717930544410610735L;
 
-	private static Set<MediaType> SUPPORTED_TYPES = new HashSet<MediaType>(
-			Arrays.asList(MediaType.TEXT_PLAIN));
+    private static Set<MediaType> SUPPORTED_TYPES = new HashSet<>(
+            Collections.singletonList(MediaType.TEXT_PLAIN));
 
-	/*
-	 * (non-Javadoc)
-	 * 
-	 * @see org.apache.tika.parser.Parser#getSupportedTypes(
-	 * org.apache.tika.parser.ParseContext)
-	 */
-	public Set<MediaType> getSupportedTypes(ParseContext context) {
-		return SUPPORTED_TYPES;
-	}
-
-	/*
-	 * (non-Javadoc)
-	 * 
-	 * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
-	 * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
-	 */
-	public void parse(InputStream is, ContentHandler handler, Metadata metadata)
-			throws IOException, SAXException, TikaException {
-		this.parse(is, handler, metadata, new ParseContext());
-	}
-
-	/*
-	 * (non-Javadoc)
-	 * 
-	 * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
-	 * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
-	 * org.apache.tika.parser.ParseContext)
-	 */
-	public void parse(InputStream is, ContentHandler handler,
-			Metadata metadata, ParseContext context) throws IOException,
-			SAXException, TikaException {
-
-		List<String> lines = FileUtils.readLines(TikaInputStream.get(is).getFile(), UTF_8);
-		for (String line : lines) {
-			String[] fileToks = line.split("\\s+");
-			if (fileToks.length < 8)
-				continue;
-			String filePermissions = fileToks[0];
-			String numHardLinks = fileToks[1];
-			String fileOwner = fileToks[2];
-			String fileOwnerGroup = fileToks[3];
-			String fileSize = fileToks[4];
-			StringBuffer lastModDate = new StringBuffer();
-			lastModDate.append(fileToks[5]);
-			lastModDate.append(" ");
-			lastModDate.append(fileToks[6]);
-			lastModDate.append(" ");
-			lastModDate.append(fileToks[7]);
-			StringBuffer fileName = new StringBuffer();
-			for (int i = 8; i < fileToks.length; i++) {
-				fileName.append(fileToks[i]);
-				fileName.append(" ");
-			}
-			fileName.deleteCharAt(fileName.length() - 1);
-			this.addMetadata(metadata, filePermissions, numHardLinks,
-					fileOwner, fileOwnerGroup, fileSize,
-					lastModDate.toString(), fileName.toString());
-		}
-	}
-
-	public static void main(String[] args) throws IOException, SAXException,
-			TikaException {
-		DirListParser parser = new DirListParser();
-		Metadata met = new Metadata();
-		parser.parse(System.in, new BodyContentHandler(), met);
-
-		System.out.println("Num files: " + met.getValues("Filename").length);
-		System.out.println("Num executables: " + met.get("NumExecutables"));
-	}
-
-	private void addMetadata(Metadata metadata, String filePerms,
-			String numHardLinks, String fileOwner, String fileOwnerGroup,
-			String fileSize, String lastModDate, String fileName) {
-		metadata.add("FilePermissions", filePerms);
-		metadata.add("NumHardLinks", numHardLinks);
-		metadata.add("FileOwner", fileOwner);
-		metadata.add("FileOwnerGroup", fileOwnerGroup);
-		metadata.add("FileSize", fileSize);
-		metadata.add("LastModifiedDate", lastModDate);
-		metadata.add("Filename", fileName);
-
-		if (filePerms.indexOf("x") != -1 && filePerms.indexOf("d") == -1) {
-			if (metadata.get("NumExecutables") != null) {
-				int numExecs = Integer.valueOf(metadata.get("NumExecutables"));
-				numExecs++;
-				metadata.set("NumExecutables", String.valueOf(numExecs));
-			} else {
-				metadata.set("NumExecutables", "1");
-			}
-		}
-	}
+    /*
+     * (non-Javadoc)
+     *
+     * @see org.apache.tika.parser.Parser#getSupportedTypes(
+     * org.apache.tika.parser.ParseContext)
+     */
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return SUPPORTED_TYPES;
+    }
+
+    /*
+     * (non-Javadoc)
+     *
+     * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+     * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
+     */
+    public void parse(InputStream is, ContentHandler handler, Metadata metadata)
+            throws IOException, SAXException, TikaException {
+        this.parse(is, handler, metadata, new ParseContext());
+    }
+
+    /*
+     * (non-Javadoc)
+     *
+     * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+     * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+     * org.apache.tika.parser.ParseContext)
+     */
+    public void parse(InputStream is, ContentHandler handler,
+                      Metadata metadata, ParseContext context) throws IOException,
+            SAXException, TikaException {
+
+        List<String> lines = FileUtils.readLines(TikaInputStream.get(is).getFile(), UTF_8);
+        for (String line : lines) {
+            String[] fileToks = line.split("\\s+");
+            if (fileToks.length < 8)
+                continue;
+            String filePermissions = fileToks[0];
+            String numHardLinks = fileToks[1];
+            String fileOwner = fileToks[2];
+            String fileOwnerGroup = fileToks[3];
+            String fileSize = fileToks[4];
+            StringBuilder lastModDate = new StringBuilder();
+            lastModDate.append(fileToks[5]);
+            lastModDate.append(" ");
+            lastModDate.append(fileToks[6]);
+            lastModDate.append(" ");
+            lastModDate.append(fileToks[7]);
+            StringBuilder fileName = new StringBuilder();
+            for (int i = 8; i < fileToks.length; i++) {
+                fileName.append(fileToks[i]);
+                fileName.append(" ");
+            }
+            fileName.deleteCharAt(fileName.length() - 1);
+            this.addMetadata(metadata, filePermissions, numHardLinks,
+                    fileOwner, fileOwnerGroup, fileSize,
+                    lastModDate.toString(), fileName.toString());
+        }
+    }
+
+    public static void main(String[] args) throws IOException, SAXException,
+            TikaException {
+        DirListParser parser = new DirListParser();
+        Metadata met = new Metadata();
+        parser.parse(System.in, new BodyContentHandler(), met);
+
+        System.out.println("Num files: " + met.getValues("Filename").length);
+        System.out.println("Num executables: " + met.get("NumExecutables"));
+    }
+
+    private void addMetadata(Metadata metadata, String filePerms,
+                             String numHardLinks, String fileOwner, String fileOwnerGroup,
+                             String fileSize, String lastModDate, String fileName) {
+        metadata.add("FilePermissions", filePerms);
+        metadata.add("NumHardLinks", numHardLinks);
+        metadata.add("FileOwner", fileOwner);
+        metadata.add("FileOwnerGroup", fileOwnerGroup);
+        metadata.add("FileSize", fileSize);
+        metadata.add("LastModifiedDate", lastModDate);
+        metadata.add("Filename", fileName);
+
+        if (filePerms.indexOf("x") != -1 && filePerms.indexOf("d") == -1) {
+            if (metadata.get("NumExecutables") != null) {
+                int numExecs = Integer.valueOf(metadata.get("NumExecutables"));
+                numExecs++;
+                metadata.set("NumExecutables", String.valueOf(numExecs));
+            } else {
+                metadata.set("NumExecutables", "1");
+            }
+        }
+    }
 
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -28,19 +31,16 @@ import org.xml.sax.SAXException;
  * Grabs a PDF file from a URL and prints its {@link Metadata}
  */
 public class DisplayMetInstance {
-
-	public static Metadata getMet(URL url) throws IOException, SAXException,
-			TikaException {
-		Metadata met = new Metadata();
-		PDFParser parser = new PDFParser();
-		parser.parse(url.openStream(), new BodyContentHandler(), met,
-				new ParseContext());
-		return met;
-	}
-
-	public static void main(String[] args) throws Exception {
-		Metadata met = DisplayMetInstance.getMet(new URL(args[0]));
-		System.out.println(met);
-	}
-
+    public static Metadata getMet(URL url) throws IOException, SAXException,
+            TikaException {
+        Metadata met = new Metadata();
+        PDFParser parser = new PDFParser();
+        parser.parse(url.openStream(), new BodyContentHandler(), met, new ParseContext());
+        return met;
+    }
+
+    public static void main(String[] args) throws Exception {
+        Metadata met = DisplayMetInstance.getMet(new URL(args[0]));
+        System.out.println(met);
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java Mon Sep 21 17:19:26 2015
@@ -1,4 +1,3 @@
-package org.apache.tika.example;
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,13 +15,7 @@ package org.apache.tika.example;
  * limitations under the License.
  */
 
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
+package org.apache.tika.example;
 
 import java.io.File;
 import java.io.FileOutputStream;
@@ -36,6 +29,13 @@ import java.util.Map;
 import java.util.Set;
 import java.util.TreeMap;
 import java.util.TreeSet;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
 
 import org.apache.tika.config.TikaConfig;
 import org.apache.tika.detect.DefaultDetector;
@@ -65,9 +65,7 @@ import static java.nio.charset.StandardC
  * for your custom mime types.
  */
 public class DumpTikaConfigExample {
-
     /**
-     *
      * @param config config file to dump
      * @param writer writer to which to write
      * @throws Exception
@@ -104,8 +102,7 @@ public class DumpTikaConfigExample {
         Translator translator = config.getTranslator();
         if (translator instanceof DefaultTranslator) {
             Node mimeComment = doc.createComment(
-                    "for example: "+
-                            "<translator class=\"org.apache.tika.language.translate.GoogleTranslator\"/>");
+                    "for example: <translator class=\"org.apache.tika.language.translate.GoogleTranslator\"/>");
             rootElement.appendChild(mimeComment);
         } else {
             Element translatorElement = doc.createElement("translator");
@@ -125,7 +122,7 @@ public class DumpTikaConfigExample {
         Element detectorsElement = doc.createElement("detectors");
 
         if (detector instanceof DefaultDetector) {
-            List<Detector> children = ((DefaultDetector)detector).getDetectors();
+            List<Detector> children = ((DefaultDetector) detector).getDetectors();
             for (Detector d : children) {
                 Element detectorElement = doc.createElement("detector");
                 detectorElement.setAttribute("class", d.getClass().getCanonicalName());
@@ -147,9 +144,9 @@ public class DumpTikaConfigExample {
             Parser child = e.getValue();
             String className = e.getKey();
             parserElement.setAttribute("class", className);
-            Set<MediaType> types = new TreeSet<MediaType>();
+            Set<MediaType> types = new TreeSet<>();
             types.addAll(child.getSupportedTypes(context));
-            for (MediaType type : types){
+            for (MediaType type : types) {
                 Element mimeElement = doc.createElement("mime");
                 mimeElement.appendChild(doc.createTextNode(type.toString()));
                 parserElement.appendChild(mimeElement);
@@ -160,10 +157,10 @@ public class DumpTikaConfigExample {
 
     }
 
-    private Map<String, Parser> getConcreteParsers(Parser parentParser)throws TikaException, IOException  {
-        Map<String, Parser> parsers = new TreeMap<String, Parser>();
+    private Map<String, Parser> getConcreteParsers(Parser parentParser) throws TikaException, IOException {
+        Map<String, Parser> parsers = new TreeMap<>();
         if (parentParser instanceof CompositeParser) {
-            addParsers((CompositeParser)parentParser, parsers);
+            addParsers((CompositeParser) parentParser, parsers);
         } else {
             addParser(parentParser, parsers);
         }
@@ -173,7 +170,7 @@ public class DumpTikaConfigExample {
     private void addParsers(CompositeParser p, Map<String, Parser> parsers) {
         for (Parser child : p.getParsers().values()) {
             if (child instanceof CompositeParser) {
-                addParsers((CompositeParser)child, parsers);
+                addParsers((CompositeParser) child, parsers);
             } else {
                 addParser(child, parsers);
             }
@@ -185,12 +182,10 @@ public class DumpTikaConfigExample {
     }
 
     /**
-     *
      * @param args outputFile, outputEncoding, if args is empty, this prints to console
      * @throws Exception
      */
     public static void main(String[] args) throws Exception {
-
         Charset encoding = UTF_8;
         Writer writer = null;
         if (args.length > 0) {

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -18,7 +21,6 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.security.GeneralSecurityException;
 import java.security.Key;
-
 import javax.crypto.Cipher;
 import javax.crypto.CipherInputStream;
 import javax.xml.namespace.QName;
@@ -30,30 +32,28 @@ import org.apache.tika.metadata.Metadata
 import org.apache.tika.mime.MediaType;
 
 public class EncryptedPrescriptionDetector implements Detector {
+    private static final long serialVersionUID = -1709652690773421147L;
 
-	private static final long serialVersionUID = -1709652690773421147L;
-
-	public MediaType detect(InputStream stream, Metadata metadata)
-			throws IOException {
-		Key key = Pharmacy.getKey();
-		MediaType type = MediaType.OCTET_STREAM;
-
-		try (InputStream lookahead = new LookaheadInputStream(stream, 1024)) {
-			Cipher cipher = Cipher.getInstance("RSA");
-
-			cipher.init(Cipher.DECRYPT_MODE, key);
-			InputStream decrypted = new CipherInputStream(lookahead, cipher);
-
-			QName name = new XmlRootExtractor().extractRootElement(decrypted);
-			if (name != null
-					&& "http://example.com/xpd".equals(name.getNamespaceURI())
-					&& "prescription".equals(name.getLocalPart())) {
-				type = MediaType.application("x-prescription");
-			}
-		} catch (GeneralSecurityException e) {
-			// unable to decrypt, fall through
-		}
-		return type;
-	}
-
+    public MediaType detect(InputStream stream, Metadata metadata)
+            throws IOException {
+        Key key = Pharmacy.getKey();
+        MediaType type = MediaType.OCTET_STREAM;
+
+        try (InputStream lookahead = new LookaheadInputStream(stream, 1024)) {
+            Cipher cipher = Cipher.getInstance("RSA");
+
+            cipher.init(Cipher.DECRYPT_MODE, key);
+            InputStream decrypted = new CipherInputStream(lookahead, cipher);
+
+            QName name = new XmlRootExtractor().extractRootElement(decrypted);
+            if (name != null
+                    && "http://example.com/xpd".equals(name.getNamespaceURI())
+                    && "prescription".equals(name.getLocalPart())) {
+                type = MediaType.application("x-prescription");
+            }
+        } catch (GeneralSecurityException e) {
+            // unable to decrypt, fall through
+        }
+        return type;
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -20,7 +23,6 @@ import java.security.GeneralSecurityExce
 import java.security.Key;
 import java.util.Collections;
 import java.util.Set;
-
 import javax.crypto.Cipher;
 import javax.crypto.CipherInputStream;
 
@@ -33,28 +35,26 @@ import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 public class EncryptedPrescriptionParser extends AbstractParser {
+    private static final long serialVersionUID = -7816987249611278541L;
 
-	private static final long serialVersionUID = -7816987249611278541L;
-
-	public void parse(InputStream stream, ContentHandler handler,
-			Metadata metadata, ParseContext context) throws IOException,
-			SAXException, TikaException {
-		try {
-			Key key = Pharmacy.getKey();
-			Cipher cipher = Cipher.getInstance("RSA");
-			cipher.init(Cipher.DECRYPT_MODE, key);
-			InputStream decrypted = new CipherInputStream(stream, cipher);
-
-			new PrescriptionParser().parse(decrypted, handler, metadata,
-					context);
-		} catch (GeneralSecurityException e) {
-			throw new TikaException("Unable to decrypt a digital prescription",
-					e);
-		}
-	}
-
-	public Set<MediaType> getSupportedTypes(ParseContext context) {
-		return Collections.singleton(MediaType.application("x-prescription"));
-	}
-
+    public void parse(InputStream stream, ContentHandler handler,
+                      Metadata metadata, ParseContext context) throws IOException,
+            SAXException, TikaException {
+        try {
+            Key key = Pharmacy.getKey();
+            Cipher cipher = Cipher.getInstance("RSA");
+            cipher.init(Cipher.DECRYPT_MODE, key);
+            InputStream decrypted = new CipherInputStream(stream, cipher);
+
+            new PrescriptionParser().parse(decrypted, handler, metadata,
+                    context);
+        } catch (GeneralSecurityException e) {
+            throw new TikaException("Unable to decrypt a digital prescription",
+                    e);
+        }
+    }
+
+    public Set<MediaType> getSupportedTypes(ParseContext context) {
+        return Collections.singleton(MediaType.application("x-prescription"));
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +17,6 @@
 
 package org.apache.tika.example;
 
-
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.file.Files;
@@ -37,9 +39,8 @@ import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
 
 public class ExtractEmbeddedFiles {
-
     private Parser parser = new AutoDetectParser();
-    private Detector detector = ((AutoDetectParser)parser).getDetector();
+    private Detector detector = ((AutoDetectParser) parser).getDetector();
     private TikaConfig config = TikaConfig.getDefaultConfig();
 
     public void extract(InputStream is, Path outputDir) throws SAXException, TikaException, IOException {
@@ -87,7 +88,7 @@ public class ExtractEmbeddedFiles {
             //now try to figure out the right extension for the embedded file
             MediaType contentType = detector.detect(stream, metadata);
 
-            if (name.indexOf('.')==-1 && contentType!=null) {
+            if (name.indexOf('.') == -1 && contentType != null) {
                 try {
                     name += config.getMimeRepository().forName(
                             contentType.toString()).getExtension();

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java Mon Sep 21 17:19:26 2015
@@ -1,10 +1,12 @@
-package org.apache.tika.example;
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -12,6 +14,14 @@ package org.apache.tika.example;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.HashSet;
+
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.parser.AutoDetectParser;
 import org.apache.tika.parser.ParseContext;
@@ -19,15 +29,10 @@ import org.apache.tika.parser.Parser;
 import org.apache.tika.sax.BodyContentHandler;
 import org.apache.tika.sax.PhoneExtractingContentHandler;
 
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStream;
-import java.util.HashSet;
-
 /**
  * Class to demonstrate how to use the {@link org.apache.tika.sax.PhoneExtractingContentHandler}
  * to get a list of all of the phone numbers from every file in a directory.
- *
+ * <p>
  * You can run this main method by running
  * <code>
  *     mvn exec:java -Dexec.mainClass="org.apache.tika.example.GrabPhoneNumbersExample" -Dexec.args="/path/to/directory"
@@ -38,7 +43,7 @@ public class GrabPhoneNumbersExample {
     private static HashSet<String> phoneNumbers = new HashSet<String>();
     private static int failedFiles, successfulFiles = 0;
 
-    public static void main(String[] args){
+    public static void main(String[] args) {
         if (args.length != 1) {
             System.err.println("Usage `java GrabPhoneNumbers [corpus]");
             return;

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -20,7 +23,6 @@ import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.Date;
-
 import javax.jcr.Item;
 
 import org.apache.jackrabbit.server.io.DefaultIOListener;
@@ -38,205 +40,196 @@ import org.slf4j.LoggerFactory;
  * <code>ImportContextImpl</code>...
  */
 public class ImportContextImpl implements ImportContext {
+    private static Logger log = LoggerFactory.getLogger(ImportContextImpl.class);
 
-	private static Logger log = LoggerFactory
-			.getLogger(ImportContextImpl.class);
-
-	private final IOListener ioListener;
-	private final Item importRoot;
-	private final String systemId;
-	private final File inputFile;
-
-	private InputContext inputCtx;
-	private boolean completed;
-
-	private final Detector detector;
-
-	private final MediaType type;
-
-	/**
-	 * Creates a new item import context. The specified InputStream is written
-	 * to a temporary file in order to avoid problems with multiple IOHandlers
-	 * that try to run the import but fail. The temporary file is deleted as
-	 * soon as this context is informed that the import has been completed and
-	 * it will not be used any more.
-	 *
-	 * @param importRoot
-	 * @param systemId
-	 * @param ctx
-	 *            input context, or <code>null</code>
-	 * @param stream
-	 *            document input stream, or <code>null</code>
-	 * @param ioListener
-	 * @param detector
-	 *            content type detector
-	 * @throws IOException
-	 * @see ImportContext#informCompleted(boolean)
-	 */
-	public ImportContextImpl(Item importRoot, String systemId,
-			InputContext ctx, InputStream stream, IOListener ioListener,
-			Detector detector) throws IOException {
-		this.importRoot = importRoot;
-		this.systemId = systemId;
-		this.inputCtx = ctx;
-		this.ioListener = (ioListener != null) ? ioListener
-				: new DefaultIOListener(log);
-		this.detector = detector;
-
-		Metadata metadata = new Metadata();
-		if (ctx != null && ctx.getContentType() != null) {
-			metadata.set(Metadata.CONTENT_TYPE, ctx.getContentType());
-		}
-		if (systemId != null) {
-			metadata.set(Metadata.RESOURCE_NAME_KEY, systemId);
-		}
-		if (stream != null && !stream.markSupported()) {
-			stream = new BufferedInputStream(stream);
-		}
-		type = detector.detect(stream, metadata);
-
-		this.inputFile = IOUtil.getTempFile(stream);
-	}
-
-	/**
-	 * @see ImportContext#getIOListener()
-	 */
-	public IOListener getIOListener() {
-		return ioListener;
-	}
-
-	/**
-	 * @see ImportContext#getImportRoot()
-	 */
-	public Item getImportRoot() {
-		return importRoot;
-	}
-
-	/**
-	 * @see ImportContext#getDetector()
-	 */
-	public Detector getDetector() {
-		return detector;
-	}
-
-	/**
-	 * @see ImportContext#hasStream()
-	 */
-	public boolean hasStream() {
-		return inputFile != null;
-	}
-
-	/**
-	 * Returns a new <code>InputStream</code> to the temporary file created
-	 * during instanciation or <code>null</code>, if this context does not
-	 * provide a stream.
-	 *
-	 * @see ImportContext#getInputStream()
-	 * @see #hasStream()
-	 */
-	public InputStream getInputStream() {
-		checkCompleted();
-		InputStream in = null;
-		if (inputFile != null) {
-			try {
-				in = new FileInputStream(inputFile);
-			} catch (IOException e) {
-				// unexpected error... ignore and return null
-			}
-		}
-		return in;
-	}
-
-	/**
-	 * @see ImportContext#getSystemId()
-	 */
-	public String getSystemId() {
-		return systemId;
-	}
-
-	/**
-	 * @see ImportContext#getModificationTime()
-	 */
-	public long getModificationTime() {
-		return (inputCtx != null) ? inputCtx.getModificationTime() : new Date()
-				.getTime();
-	}
-
-	/**
-	 * @see ImportContext#getContentLanguage()
-	 */
-	public String getContentLanguage() {
-		return (inputCtx != null) ? inputCtx.getContentLanguage() : null;
-	}
-
-	/**
-	 * @see ImportContext#getContentLength()
-	 */
-	public long getContentLength() {
-		long length = IOUtil.UNDEFINED_LENGTH;
-		if (inputCtx != null) {
-			length = inputCtx.getContentLength();
-		}
-		if (length < 0 && inputFile != null) {
-			length = inputFile.length();
-		}
-		if (length < 0) {
-			log.debug("Unable to determine content length -> default value = "
-					+ IOUtil.UNDEFINED_LENGTH);
-		}
-		return length;
-	}
-
-	/**
-	 * @see ImportContext#getMimeType()
-	 */
-	public String getMimeType() {
-		return IOUtil.getMimeType(type.toString());
-	}
-
-	/**
-	 * @see ImportContext#getEncoding()
-	 */
-	public String getEncoding() {
-		return IOUtil.getEncoding(type.toString());
-	}
-
-	/**
-	 * @see ImportContext#getProperty(Object)
-	 */
-	public Object getProperty(Object propertyName) {
-		return (inputCtx != null) ? inputCtx.getProperty(propertyName
-				.toString()) : null;
-	}
-
-	/**
-	 * @see ImportContext#informCompleted(boolean)
-	 */
-	public void informCompleted(boolean success) {
-		checkCompleted();
-		completed = true;
-		if (inputFile != null) {
-			inputFile.delete();
-		}
-	}
-
-	/**
-	 * @see ImportContext#isCompleted()
-	 */
-	public boolean isCompleted() {
-		return completed;
-	}
-
-	/**
-	 * @throws IllegalStateException
-	 *             if the context is already completed.
-	 * @see #isCompleted()
-	 * @see #informCompleted(boolean)
-	 */
-	private void checkCompleted() {
-		if (completed) {
-			throw new IllegalStateException(
-					"ImportContext has already been consumed.");
-		}
-	}
+    private final IOListener ioListener;
+    private final Item importRoot;
+    private final String systemId;
+    private final File inputFile;
+
+    private InputContext inputCtx;
+    private boolean completed;
+
+    private final Detector detector;
+
+    private final MediaType type;
+
+    /**
+     * Creates a new item import context. The specified InputStream is written
+     * to a temporary file in order to avoid problems with multiple IOHandlers
+     * that try to run the import but fail. The temporary file is deleted as
+     * soon as this context is informed that the import has been completed and
+     * it will not be used any more.
+     *
+     * @param importRoot
+     * @param systemId
+     * @param ctx        input context, or <code>null</code>
+     * @param stream     document input stream, or <code>null</code>
+     * @param ioListener
+     * @param detector   content type detector
+     * @throws IOException
+     * @see ImportContext#informCompleted(boolean)
+     */
+    public ImportContextImpl(Item importRoot, String systemId,
+                             InputContext ctx, InputStream stream, IOListener ioListener,
+                             Detector detector) throws IOException {
+        this.importRoot = importRoot;
+        this.systemId = systemId;
+        this.inputCtx = ctx;
+        this.ioListener = (ioListener != null) ? ioListener
+                : new DefaultIOListener(log);
+        this.detector = detector;
+
+        Metadata metadata = new Metadata();
+        if (ctx != null && ctx.getContentType() != null) {
+            metadata.set(Metadata.CONTENT_TYPE, ctx.getContentType());
+        }
+        if (systemId != null) {
+            metadata.set(Metadata.RESOURCE_NAME_KEY, systemId);
+        }
+        if (stream != null && !stream.markSupported()) {
+            stream = new BufferedInputStream(stream);
+        }
+        type = detector.detect(stream, metadata);
+
+        this.inputFile = IOUtil.getTempFile(stream);
+    }
+
+    /**
+     * @see ImportContext#getIOListener()
+     */
+    public IOListener getIOListener() {
+        return ioListener;
+    }
+
+    /**
+     * @see ImportContext#getImportRoot()
+     */
+    public Item getImportRoot() {
+        return importRoot;
+    }
+
+    /**
+     * @see ImportContext#getDetector()
+     */
+    public Detector getDetector() {
+        return detector;
+    }
+
+    /**
+     * @see ImportContext#hasStream()
+     */
+    public boolean hasStream() {
+        return inputFile != null;
+    }
+
+    /**
+     * Returns a new <code>InputStream</code> to the temporary file created
+     * during instanciation or <code>null</code>, if this context does not
+     * provide a stream.
+     *
+     * @see ImportContext#getInputStream()
+     * @see #hasStream()
+     */
+    public InputStream getInputStream() {
+        checkCompleted();
+        InputStream in = null;
+        if (inputFile != null) {
+            try {
+                in = new FileInputStream(inputFile);
+            } catch (IOException e) {
+                // unexpected error... ignore and return null
+            }
+        }
+        return in;
+    }
+
+    /**
+     * @see ImportContext#getSystemId()
+     */
+    public String getSystemId() {
+        return systemId;
+    }
+
+    /**
+     * @see ImportContext#getModificationTime()
+     */
+    public long getModificationTime() {
+        return (inputCtx != null) ? inputCtx.getModificationTime() : new Date().getTime();
+    }
+
+    /**
+     * @see ImportContext#getContentLanguage()
+     */
+    public String getContentLanguage() {
+        return (inputCtx != null) ? inputCtx.getContentLanguage() : null;
+    }
+
+    /**
+     * @see ImportContext#getContentLength()
+     */
+    public long getContentLength() {
+        long length = IOUtil.UNDEFINED_LENGTH;
+        if (inputCtx != null) {
+            length = inputCtx.getContentLength();
+        }
+        if (length < 0 && inputFile != null) {
+            length = inputFile.length();
+        }
+        if (length < 0) {
+            log.debug("Unable to determine content length -> default value = "
+                    + IOUtil.UNDEFINED_LENGTH);
+        }
+        return length;
+    }
+
+    /**
+     * @see ImportContext#getMimeType()
+     */
+    public String getMimeType() {
+        return IOUtil.getMimeType(type.toString());
+    }
+
+    /**
+     * @see ImportContext#getEncoding()
+     */
+    public String getEncoding() {
+        return IOUtil.getEncoding(type.toString());
+    }
+
+    /**
+     * @see ImportContext#getProperty(Object)
+     */
+    public Object getProperty(Object propertyName) {
+        return (inputCtx != null) ? inputCtx.getProperty(propertyName.toString()) : null;
+    }
+
+    /**
+     * @see ImportContext#informCompleted(boolean)
+     */
+    public void informCompleted(boolean success) {
+        checkCompleted();
+        completed = true;
+        if (inputFile != null) {
+            inputFile.delete();
+        }
+    }
+
+    /**
+     * @see ImportContext#isCompleted()
+     */
+    public boolean isCompleted() {
+        return completed;
+    }
+
+    /**
+     * @throws IllegalStateException if the context is already completed.
+     * @see #isCompleted()
+     * @see #informCompleted(boolean)
+     */
+    private void checkCompleted() {
+        if (completed) {
+            throw new IllegalStateException("ImportContext has already been consumed.");
+        }
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -25,35 +28,31 @@ import org.apache.tika.parser.AutoDetect
 import org.apache.tika.parser.ParseContext;
 
 public class Language {
-
-	public static void languageDetection() throws IOException {
-		LanguageProfile profile = new LanguageProfile(
-				"Alla människor är födda fria och"
-						+ " lika i värde och rättigheter.");
-
-		LanguageIdentifier identifier = new LanguageIdentifier(profile);
-		System.out.println(identifier.getLanguage());
-	}
-
-	public static void languageDetectionWithWriter() throws IOException {
-		ProfilingWriter writer = new ProfilingWriter();
-		writer.append("Minden emberi lény");
-		writer.append(" szabadon születik és");
-		writer.append(" egyenlő méltósága és");
-		writer.append(" joga van.");
-
-		LanguageIdentifier identifier = writer.getLanguage();
-		System.out.println(identifier.getLanguage());
-		writer.close();
-
-	}
-
-	public static void languageDetectionWithHandler() throws Exception {
-		ProfilingHandler handler = new ProfilingHandler();
-		new AutoDetectParser().parse(System.in, handler, new Metadata(),
-				new ParseContext());
-
-		LanguageIdentifier identifier = handler.getLanguage();
-		System.out.println(identifier.getLanguage());
-	}
+    public static void languageDetection() throws IOException {
+        LanguageProfile profile = new LanguageProfile(
+                "Alla människor är födda fria och lika i värde och rättigheter.");
+
+        LanguageIdentifier identifier = new LanguageIdentifier(profile);
+        System.out.println(identifier.getLanguage());
+    }
+
+    public static void languageDetectionWithWriter() throws IOException {
+        ProfilingWriter writer = new ProfilingWriter();
+        writer.append("Minden emberi lény");
+        writer.append(" szabadon születik és");
+        writer.append(" egyenlő méltósága és");
+        writer.append(" joga van.");
+
+        LanguageIdentifier identifier = writer.getLanguage();
+        System.out.println(identifier.getLanguage());
+        writer.close();
+    }
+
+    public static void languageDetectionWithHandler() throws Exception {
+        ProfilingHandler handler = new ProfilingHandler();
+        new AutoDetectParser().parse(System.in, handler, new Metadata(), new ParseContext());
+
+        LanguageIdentifier identifier = handler.getLanguage();
+        System.out.println(identifier.getLanguage());
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -29,21 +32,19 @@ import org.xml.sax.SAXException;
 
 @SuppressWarnings("deprecation")
 public class LanguageDetectingParser extends DelegatingParser {
+    private static final long serialVersionUID = 4291320409396502774L;
 
-	private static final long serialVersionUID = 4291320409396502774L;
-
-	public void parse(InputStream stream, ContentHandler handler,
-			final Metadata metadata, ParseContext context) throws SAXException,
-			IOException, TikaException {
-		ProfilingHandler profiler = new ProfilingHandler();
-		ContentHandler tee = new TeeContentHandler(handler, profiler);
-
-		super.parse(stream, tee, metadata, context);
-
-		LanguageIdentifier identifier = profiler.getLanguage();
-		if (identifier.isReasonablyCertain()) {
-			metadata.set(Metadata.LANGUAGE, identifier.getLanguage());
-		}
-	}
-
+    public void parse(InputStream stream, ContentHandler handler,
+                      final Metadata metadata, ParseContext context) throws SAXException,
+            IOException, TikaException {
+        ProfilingHandler profiler = new ProfilingHandler();
+        ContentHandler tee = new TeeContentHandler(handler, profiler);
+
+        super.parse(stream, tee, metadata, context);
+
+        LanguageIdentifier identifier = profiler.getLanguage();
+        if (identifier.isReasonablyCertain()) {
+            metadata.set(Metadata.LANGUAGE, identifier.getLanguage());
+        }
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -44,171 +47,164 @@ import org.xml.sax.helpers.DefaultHandle
  */
 @SuppressWarnings("serial")
 public class LazyTextExtractorField extends AbstractField {
-
-	/**
-	 * The logger instance for this class.
-	 */
-	private static final Logger log = LoggerFactory
-			.getLogger(LazyTextExtractorField.class);
-
-	/**
-	 * The exception used to forcibly terminate the extraction process when the
-	 * maximum field length is reached.
-	 */
-	private static final SAXException STOP = new SAXException(
-			"max field length reached");
-
-	/**
-	 * The extracted text content of the given binary value. Set to non-null
-	 * when the text extraction task finishes.
-	 */
-	private volatile String extract = null;
-
-	/**
-	 * Creates a new <code>LazyTextExtractorField</code> with the given
-	 * <code>name</code>.
-	 *
-	 * @param name
-	 *            the name of the field.
-	 * @param reader
-	 *            the reader where to obtain the string from.
-	 * @param highlighting
-	 *            set to <code>true</code> to enable result highlighting support
-	 */
-	public LazyTextExtractorField(Parser parser, InternalValue value,
-			Metadata metadata, Executor executor, boolean highlighting,
-			int maxFieldLength) {
-		super(FieldNames.FULLTEXT, highlighting ? Store.YES : Store.NO,
-				Field.Index.ANALYZED, highlighting ? TermVector.WITH_OFFSETS
-						: TermVector.NO);
-		executor.execute(new ParsingTask(parser, value, metadata,
-				maxFieldLength));
-	}
-
-	/**
-	 * Returns the extracted text. This method blocks until the text extraction
-	 * task has been completed.
-	 *
-	 * @return the string value of this field
-	 */
-	public synchronized String stringValue() {
-		try {
-			while (!isExtractorFinished()) {
-				wait();
-			}
-			return extract;
-		} catch (InterruptedException e) {
-			log.error("Text extraction thread was interrupted", e);
-			return "";
-		}
-	}
-
-	/**
-	 * @return always <code>null</code>
-	 */
-	public Reader readerValue() {
-		return null;
-	}
-
-	/**
-	 * @return always <code>null</code>
-	 */
-	public byte[] binaryValue() {
-		return null;
-	}
-
-	/**
-	 * @return always <code>null</code>
-	 */
-	public TokenStream tokenStreamValue() {
-		return null;
-	}
-
-	/**
-	 * Checks whether the text extraction task has finished.
-	 *
-	 * @return <code>true</code> if the extracted text is available
-	 */
-	public boolean isExtractorFinished() {
-		return extract != null;
-	}
-
-	private synchronized void setExtractedText(String value) {
-		extract = value;
-		notify();
-	}
-
-	/**
-	 * Releases all resources associated with this field.
-	 */
-	public void dispose() {
-		// TODO: Cause the ContentHandler below to throw an exception
-	}
-
-	/**
-	 * The background task for extracting text from a binary value.
-	 */
-	private class ParsingTask extends DefaultHandler implements Runnable {
-
-		private final Parser parser;
-
-		private final InternalValue value;
-
-		private final Metadata metadata;
-
-		private final int maxFieldLength;
-
-		private final StringBuilder builder = new StringBuilder();
-
-		private final ParseContext context = new ParseContext();
-
-		// NOTE: not a part of Jackrabbit code, made
-		private final ContentHandler handler = new DefaultHandler();
-
-		public ParsingTask(Parser parser, InternalValue value,
-				Metadata metadata, int maxFieldLength) {
-			this.parser = parser;
-			this.value = value;
-			this.metadata = metadata;
-			this.maxFieldLength = maxFieldLength;
-		}
-
-		public void run() {
-			try {
-				try (InputStream stream = value.getStream()) {
-					parser.parse(stream, handler, metadata, context);
-				}
-			} catch (LinkageError e) {
-				// Capture and ignore
-			} catch (Throwable t) {
-				if (t != STOP) {
-					log.debug("Failed to extract text.", t);
-					setExtractedText("TextExtractionError");
-					return;
-				}
-			} finally {
-				value.discard();
-			}
-			setExtractedText(handler.toString());
-
-		}
-
-		@Override
-		public void characters(char[] ch, int start, int length)
-				throws SAXException {
-			builder.append(ch, start,
-					Math.min(length, maxFieldLength - builder.length()));
-			if (builder.length() >= maxFieldLength) {
-				throw STOP;
-			}
-		}
-
-		@Override
-		public void ignorableWhitespace(char[] ch, int start, int length)
-				throws SAXException {
-			characters(ch, start, length);
-		}
-
-	}
-
+    /**
+     * The logger instance for this class.
+     */
+    private static final Logger log = LoggerFactory.getLogger(LazyTextExtractorField.class);
+
+    /**
+     * The exception used to forcibly terminate the extraction process when the
+     * maximum field length is reached.
+     * <p>
+     * Such exceptions shouldn't be used in logging since its stack trace is meaningless.
+     */
+    private static final SAXException STOP = new SAXException("max field length reached");
+
+    /**
+     * The extracted text content of the given binary value. Set to non-null
+     * when the text extraction task finishes.
+     */
+    private volatile String extract = null;
+
+    /**
+     * Creates a new <code>LazyTextExtractorField</code> with the given
+     * <code>name</code>.
+     *
+     * @param name         the name of the field.
+     * @param reader       the reader where to obtain the string from.
+     * @param highlighting set to <code>true</code> to enable result highlighting support
+     */
+    public LazyTextExtractorField(Parser parser, InternalValue value,
+                                  Metadata metadata, Executor executor, boolean highlighting,
+                                  int maxFieldLength) {
+        super(FieldNames.FULLTEXT, highlighting ? Store.YES : Store.NO,
+                Field.Index.ANALYZED, highlighting ? TermVector.WITH_OFFSETS
+                        : TermVector.NO);
+        executor.execute(new ParsingTask(parser, value, metadata,
+                maxFieldLength));
+    }
+
+    /**
+     * Returns the extracted text. This method blocks until the text extraction
+     * task has been completed.
+     *
+     * @return the string value of this field
+     */
+    public synchronized String stringValue() {
+        try {
+            while (!isExtractorFinished()) {
+                wait();
+            }
+            return extract;
+        } catch (InterruptedException e) {
+            log.error("Text extraction thread was interrupted", e);
+            return "";
+        }
+    }
+
+    /**
+     * @return always <code>null</code>
+     */
+    public Reader readerValue() {
+        return null;
+    }
+
+    /**
+     * @return always <code>null</code>
+     */
+    public byte[] binaryValue() {
+        return null;
+    }
+
+    /**
+     * @return always <code>null</code>
+     */
+    public TokenStream tokenStreamValue() {
+        return null;
+    }
+
+    /**
+     * Checks whether the text extraction task has finished.
+     *
+     * @return <code>true</code> if the extracted text is available
+     */
+    public boolean isExtractorFinished() {
+        return extract != null;
+    }
+
+    private synchronized void setExtractedText(String value) {
+        extract = value;
+        notify();
+    }
+
+    /**
+     * Releases all resources associated with this field.
+     */
+    public void dispose() {
+        // TODO: Cause the ContentHandler below to throw an exception
+    }
+
+    /**
+     * The background task for extracting text from a binary value.
+     */
+    private class ParsingTask extends DefaultHandler implements Runnable {
+        private final Parser parser;
+
+        private final InternalValue value;
+
+        private final Metadata metadata;
+
+        private final int maxFieldLength;
+
+        private final StringBuilder builder = new StringBuilder();
+
+        private final ParseContext context = new ParseContext();
+
+        // NOTE: not a part of Jackrabbit code, made
+        private final ContentHandler handler = new DefaultHandler();
+
+        public ParsingTask(Parser parser, InternalValue value,
+                           Metadata metadata, int maxFieldLength) {
+            this.parser = parser;
+            this.value = value;
+            this.metadata = metadata;
+            this.maxFieldLength = maxFieldLength;
+        }
+
+        public void run() {
+            try {
+                try (InputStream stream = value.getStream()) {
+                    parser.parse(stream, handler, metadata, context);
+                }
+            } catch (LinkageError e) {
+                // Capture and ignore
+            } catch (Throwable t) {
+                if (t != STOP) {
+                    log.debug("Failed to extract text.", t);
+                    setExtractedText("TextExtractionError");
+                    return;
+                }
+            } finally {
+                value.discard();
+            }
+            setExtractedText(handler.toString());
+
+        }
+
+        @Override
+        public void characters(char[] ch, int start, int length)
+                throws SAXException {
+            builder.append(ch, start,
+                    Math.min(length, maxFieldLength - builder.length()));
+            if (builder.length() >= maxFieldLength) {
+                throw STOP;
+            }
+        }
+
+        @Override
+        public void ignorableWhitespace(char[] ch, int start, int length)
+                throws SAXException {
+            characters(ch, start, length);
+        }
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,8 +17,8 @@
 
 package org.apache.tika.example;
 
-/* */
 import java.io.File;
+
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.Field.Index;
@@ -24,23 +27,19 @@ import org.apache.lucene.index.IndexWrit
 import org.apache.tika.Tika;
 
 public class LuceneIndexer {
+    private final Tika tika;
 
-	private final Tika tika;
-
-	private final IndexWriter writer;
-
-	public LuceneIndexer(Tika tika, IndexWriter writer) {
-		this.tika = tika;
-		this.writer = writer;
-	}
-
-	public void indexDocument(File file) throws Exception {
-		Document document = new Document();
-		document.add(new Field("filename", file.getName(), Store.YES,
-				Index.ANALYZED));
-		document.add(new Field("fulltext", tika.parseToString(file), Store.NO,
-				Index.ANALYZED));
-		writer.addDocument(document);
-	}
+    private final IndexWriter writer;
 
+    public LuceneIndexer(Tika tika, IndexWriter writer) {
+        this.tika = tika;
+        this.writer = writer;
+    }
+
+    public void indexDocument(File file) throws Exception {
+        Document document = new Document();
+        document.add(new Field("filename", file.getName(), Store.YES, Index.ANALYZED));
+        document.add(new Field("fulltext", tika.parseToString(file), Store.NO, Index.ANALYZED));
+        writer.addDocument(document);
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -30,35 +33,33 @@ import org.apache.tika.Tika;
 
 @SuppressWarnings("deprecation")
 public class LuceneIndexerExtended {
+    private final Tika tika;
 
-	private final IndexWriter writer;
-
-	private final Tika tika;
-
-	public LuceneIndexerExtended(IndexWriter writer, Tika tika) {
-		this.writer = writer;
-		this.tika = tika;
-	}
-
-	public static void main(String[] args) throws Exception {
-		try (IndexWriter writer = new IndexWriter(new SimpleFSDirectory(new File(
-				args[0])), new StandardAnalyzer(Version.LUCENE_30),
-				MaxFieldLength.UNLIMITED)) {
-			LuceneIndexer indexer = new LuceneIndexer(new Tika(), writer);
-			for (int i = 1; i < args.length; i++) {
-				indexer.indexDocument(new File(args[i]));
-			}
-		}
-	}
-
-	public void indexDocument(File file) throws Exception {
-		try (Reader fulltext = tika.parse(file)) {
-			Document document = new Document();
-			document.add(new Field("filename", file.getName(), Store.YES,
-					Index.ANALYZED));
-			document.add(new Field("fulltext", fulltext));
-			writer.addDocument(document);
-		}
-	}
+    private final IndexWriter writer;
 
+    public LuceneIndexerExtended(IndexWriter writer, Tika tika) {
+        this.writer = writer;
+        this.tika = tika;
+    }
+
+    public static void main(String[] args) throws Exception {
+        try (IndexWriter writer = new IndexWriter(
+                new SimpleFSDirectory(new File(args[0])),
+                new StandardAnalyzer(Version.LUCENE_30),
+                MaxFieldLength.UNLIMITED)) {
+            LuceneIndexer indexer = new LuceneIndexer(new Tika(), writer);
+            for (int i = 1; i < args.length; i++) {
+                indexer.indexDocument(new File(args[i]));
+            }
+        }
+    }
+
+    public void indexDocument(File file) throws Exception {
+        try (Reader fulltext = tika.parse(file)) {
+            Document document = new Document();
+            document.add(new Field("filename", file.getName(), Store.YES, Index.ANALYZED));
+            document.add(new Field("fulltext", fulltext));
+            writer.addDocument(document);
+        }
+    }
 }

Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
  *
- * http://www.apache.org/licenses/LICENSE-2.0
+ *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
@@ -21,38 +24,35 @@ import org.apache.tika.mime.MediaType;
 import org.apache.tika.mime.MediaTypeRegistry;
 
 public class MediaTypeExample {
+    public static void describeMediaType() {
+        MediaType type = MediaType.parse("text/plain; charset=UTF-8");
 
-	public static void describeMediaType() {
-
-		MediaType type = MediaType.parse("text/plain; charset=UTF-8");
-
-		System.out.println("type:    " + type.getType());
-		System.out.println("subtype: " + type.getSubtype());
-
-		Map<String, String> parameters = type.getParameters();
-		System.out.println("parameters:");
-		for (String name : parameters.keySet()) {
-			System.out.println("  " + name + "=" + parameters.get(name));
-		}
-	}
-
-	public static void listAllTypes() {
-		MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
-
-		for (MediaType type : registry.getTypes()) {
-			Set<MediaType> aliases = registry.getAliases(type);
-			System.out.println(type + ", also known as " + aliases);
-		}
-	}
-
-	public static void main(String[] args) throws Exception {
-		MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
-
-		MediaType type = MediaType.parse("image/svg+xml");
-		while (type != null) {
-			System.out.println(type);
-			type = registry.getSupertype(type);
-		}
-	}
+        System.out.println("type:    " + type.getType());
+        System.out.println("subtype: " + type.getSubtype());
 
+        Map<String, String> parameters = type.getParameters();
+        System.out.println("parameters:");
+        for (String name : parameters.keySet()) {
+            System.out.println("  " + name + "=" + parameters.get(name));
+        }
+    }
+
+    public static void listAllTypes() {
+        MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
+
+        for (MediaType type : registry.getTypes()) {
+            Set<MediaType> aliases = registry.getAliases(type);
+            System.out.println(type + ", also known as " + aliases);
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
+
+        MediaType type = MediaType.parse("image/svg+xml");
+        while (type != null) {
+            System.out.println(type);
+            type = registry.getSupertype(type);
+        }
+    }
 }