You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2015/05/04 23:52:53 UTC

svn commit: r1677694 [1/2] - in /tika/trunk/tika-example: ./ src/main/java/org/apache/tika/example/ src/main/resources/org/apache/tika/example/ src/test/java/org/apache/tika/example/

Author: mattmann
Date: Mon May  4 21:52:53 2015
New Revision: 1677694

URL: http://svn.apache.org/r1677694
Log:
TIKA-1562: Add examples from the Tika in Action book

Added:
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java   (with props)
    tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java   (with props)
    tika/trunk/tika-example/src/main/resources/org/apache/tika/example/spring.xml   (with props)
    tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java   (with props)
    tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java   (with props)
    tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTypeDetectorTest.java   (with props)
Modified:
    tika/trunk/tika-example/pom.xml

Modified: tika/trunk/tika-example/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/pom.xml?rev=1677694&r1=1677693&r2=1677694&view=diff
==============================================================================
--- tika/trunk/tika-example/pom.xml (original)
+++ tika/trunk/tika-example/pom.xml Mon May  4 21:52:53 2015
@@ -56,6 +56,17 @@
   <!-- List of dependencies that we depend on for the examples. See the full list of Tika
        modules and how to use them at http://mvnrepository.com/artifact/org.apache.tika.-->
   <dependencies>
+	<dependency>
+	    <groupId>org.apache.tika</groupId>
+	    <artifactId>tika-app</artifactId>
+	    <version>${project.version}</version>
+	    <exclusions>
+	      <exclusion>
+	        <artifactId>tika-parsers</artifactId>
+	        <groupId>org.apache.tika</groupId>
+	      </exclusion>
+	    </exclusions>
+	</dependency>  
     <dependency>
       <groupId>org.apache.tika</groupId>
       <artifactId>tika-parsers</artifactId>
@@ -78,6 +89,36 @@
       <type>test-jar</type>
       <scope>test</scope>
     </dependency>
+	<dependency>
+	    <groupId>javax.jcr</groupId>
+	    <artifactId>jcr</artifactId>
+	    <version>2.0</version>
+	</dependency>
+	<dependency>
+	    <groupId>org.apache.jackrabbit</groupId>
+	    <artifactId>jackrabbit-jcr-server</artifactId>
+	    <version>2.3.6</version>
+	</dependency>
+    <dependency>
+        <groupId>org.apache.jackrabbit</groupId>
+        <artifactId>jackrabbit-core</artifactId>
+        <version>2.3.6</version>
+    </dependency>   	
+	<dependency>
+	    <groupId>org.apache.lucene</groupId>
+	    <artifactId>lucene-core</artifactId>
+	    <version>3.5.0</version>
+	</dependency>	
+	<dependency>
+	    <groupId>commons-io</groupId>
+	    <artifactId>commons-io</artifactId>
+	    <version>2.4</version>
+	</dependency>
+	<dependency>
+	    <groupId>org.springframework</groupId>
+	    <artifactId>spring-context</artifactId>
+	    <version>3.0.2.RELEASE</version>
+	</dependency>
     <dependency>
       <groupId>junit</groupId>
       <artifactId>junit</artifactId>

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java Mon May  4 21:52:53 2015
@@ -0,0 +1,55 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.InputStream;
+
+import org.apache.tika.Tika;
+import org.apache.tika.detect.CompositeDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypesFactory;
+
+public class AdvancedTypeDetector {
+
+	public static String detectWithCustomConfig(String name) throws Exception {
+		String config = "/org/apache/tika/mime/tika-mimetypes.xml";
+		Tika tika = new Tika(MimeTypesFactory.create(config));
+		return tika.detect(name);
+	}
+
+	public static String detectWithCustomDetector(String name) throws Exception {
+		String config = "/org/apache/tika/mime/tika-mimetypes.xml";
+		Detector detector = MimeTypesFactory.create(config);
+
+		Detector custom = new Detector() {
+			private static final long serialVersionUID = -5420638839201540749L;
+
+			public MediaType detect(InputStream input, Metadata metadata) {
+				String type = metadata.get("my-custom-type-override");
+				if (type != null) {
+					return MediaType.parse(type);
+				} else {
+					return MediaType.OCTET_STREAM;
+				}
+			}
+		};
+
+		Tika tika = new Tika(new CompositeDetector(custom, detector));
+		return tika.detect(name);
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java Mon May  4 21:52:53 2015
@@ -0,0 +1,49 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.net.URL;
+
+import org.apache.tika.Tika;
+import org.apache.tika.detect.CompositeDetector;
+import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.mime.MimeTypesFactory;
+
+public class CustomMimeInfo {
+
+	public static String customMimeInfo() throws Exception {
+		String path = "file:///path/to/prescription-type.xml";
+		MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
+		Tika tika = new Tika(typeDatabase);
+		String type = tika.detect("/path/to/prescription.xpd");
+		return type;
+	}
+
+	public static String customCompositeDetector() throws Exception {
+		String path = "file:///path/to/prescription-type.xml";
+		MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
+		Tika tika = new Tika(new CompositeDetector(typeDatabase,
+				new EncryptedPrescriptionDetector()));
+		String type = tika.detect("/path/to/tmp/prescription.xpd");
+		return type;
+	}
+
+	public static void main(String[] args) throws Exception {
+		System.out.println("customMimeInfo=" + customMimeInfo());
+		System.out.println("customCompositeDetector="
+				+ customCompositeDetector());
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java Mon May  4 21:52:53 2015
@@ -0,0 +1,30 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import org.apache.tika.cli.TikaCLI;
+
+/**
+ *
+ * Print the supported Tika Metadata models and their fields.
+ *
+ */
+public class DescribeMetadata {
+
+	public static void main(String[] args) throws Exception {
+		TikaCLI.main(new String[] { "--list-met-models" });
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java Mon May  4 21:52:53 2015
@@ -0,0 +1,139 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Parses the output of /bin/ls and counts the number of files and the number of
+ * executables using Tika.
+ */
+public class DirListParser implements Parser {
+
+	private static final long serialVersionUID = 2717930544410610735L;
+
+	private static Set<MediaType> SUPPORTED_TYPES = new HashSet<MediaType>(
+			Arrays.asList(MediaType.TEXT_PLAIN));
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see org.apache.tika.parser.Parser#getSupportedTypes(
+	 * org.apache.tika.parser.ParseContext)
+	 */
+	public Set<MediaType> getSupportedTypes(ParseContext context) {
+		return SUPPORTED_TYPES;
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+	 * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
+	 */
+	public void parse(InputStream is, ContentHandler handler, Metadata metadata)
+			throws IOException, SAXException, TikaException {
+		this.parse(is, handler, metadata, new ParseContext());
+	}
+
+	/*
+	 * (non-Javadoc)
+	 * 
+	 * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+	 * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+	 * org.apache.tika.parser.ParseContext)
+	 */
+	public void parse(InputStream is, ContentHandler handler,
+			Metadata metadata, ParseContext context) throws IOException,
+			SAXException, TikaException {
+
+		List<String> lines = FileUtils.readLines(TikaInputStream.get(is)
+				.getFile());
+		for (String line : lines) {
+			String[] fileToks = line.split("\\s+");
+			if (fileToks.length < 8)
+				continue;
+			String filePermissions = fileToks[0];
+			String numHardLinks = fileToks[1];
+			String fileOwner = fileToks[2];
+			String fileOwnerGroup = fileToks[3];
+			String fileSize = fileToks[4];
+			StringBuffer lastModDate = new StringBuffer();
+			lastModDate.append(fileToks[5]);
+			lastModDate.append(" ");
+			lastModDate.append(fileToks[6]);
+			lastModDate.append(" ");
+			lastModDate.append(fileToks[7]);
+			StringBuffer fileName = new StringBuffer();
+			for (int i = 8; i < fileToks.length; i++) {
+				fileName.append(fileToks[i]);
+				fileName.append(" ");
+			}
+			fileName.deleteCharAt(fileName.length() - 1);
+			this.addMetadata(metadata, filePermissions, numHardLinks,
+					fileOwner, fileOwnerGroup, fileSize,
+					lastModDate.toString(), fileName.toString());
+		}
+	}
+
+	public static void main(String[] args) throws IOException, SAXException,
+			TikaException {
+		DirListParser parser = new DirListParser();
+		Metadata met = new Metadata();
+		parser.parse(System.in, new BodyContentHandler(), met);
+
+		System.out.println("Num files: " + met.getValues("Filename").length);
+		System.out.println("Num executables: " + met.get("NumExecutables"));
+	}
+
+	private void addMetadata(Metadata metadata, String filePerms,
+			String numHardLinks, String fileOwner, String fileOwnerGroup,
+			String fileSize, String lastModDate, String fileName) {
+		metadata.add("FilePermissions", filePerms);
+		metadata.add("NumHardLinks", numHardLinks);
+		metadata.add("FileOwner", fileOwner);
+		metadata.add("FileOwnerGroup", fileOwnerGroup);
+		metadata.add("FileSize", fileSize);
+		metadata.add("LastModifiedDate", lastModDate);
+		metadata.add("Filename", fileName);
+
+		if (filePerms.indexOf("x") != -1 && filePerms.indexOf("d") == -1) {
+			if (metadata.get("NumExecutables") != null) {
+				int numExecs = Integer.valueOf(metadata.get("NumExecutables"));
+				numExecs++;
+				metadata.set("NumExecutables", String.valueOf(numExecs));
+			} else {
+				metadata.set("NumExecutables", "1");
+			}
+		}
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java Mon May  4 21:52:53 2015
@@ -0,0 +1,46 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.net.URL;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.pdf.PDFParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Grabs a PDF file from a URL and prints its {@link Metadata}
+ */
+public class DisplayMetInstance {
+
+	public static Metadata getMet(URL url) throws IOException, SAXException,
+			TikaException {
+		Metadata met = new Metadata();
+		PDFParser parser = new PDFParser();
+		parser.parse(url.openStream(), new BodyContentHandler(), met,
+				new ParseContext());
+		return met;
+	}
+
+	public static void main(String[] args) throws Exception {
+		Metadata met = DisplayMetInstance.getMet(new URL(args[0]));
+		System.out.println(met);
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java Mon May  4 21:52:53 2015
@@ -0,0 +1,62 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.GeneralSecurityException;
+import java.security.Key;
+
+import javax.crypto.Cipher;
+import javax.crypto.CipherInputStream;
+import javax.xml.namespace.QName;
+
+import org.apache.tika.detect.Detector;
+import org.apache.tika.detect.XmlRootExtractor;
+import org.apache.tika.io.LookaheadInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+public class EncryptedPrescriptionDetector implements Detector {
+
+	private static final long serialVersionUID = -1709652690773421147L;
+
+	public MediaType detect(InputStream stream, Metadata metadata)
+			throws IOException {
+		Key key = Pharmacy.getKey();
+		MediaType type = MediaType.OCTET_STREAM;
+
+		InputStream lookahead = new LookaheadInputStream(stream, 1024);
+		try {
+			Cipher cipher = Cipher.getInstance("RSA");
+
+			cipher.init(Cipher.DECRYPT_MODE, key);
+			InputStream decrypted = new CipherInputStream(lookahead, cipher);
+
+			QName name = new XmlRootExtractor().extractRootElement(decrypted);
+			if (name != null
+					&& "http://example.com/xpd".equals(name.getNamespaceURI())
+					&& "prescription".equals(name.getLocalPart())) {
+				type = MediaType.application("x-prescription");
+			}
+		} catch (GeneralSecurityException e) {
+			// unable to decrypt, fall through
+		} finally {
+			lookahead.close();
+		}
+		return type;
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java Mon May  4 21:52:53 2015
@@ -0,0 +1,60 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.GeneralSecurityException;
+import java.security.Key;
+import java.util.Collections;
+import java.util.Set;
+
+import javax.crypto.Cipher;
+import javax.crypto.CipherInputStream;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class EncryptedPrescriptionParser extends AbstractParser {
+
+	private static final long serialVersionUID = -7816987249611278541L;
+
+	public void parse(InputStream stream, ContentHandler handler,
+			Metadata metadata, ParseContext context) throws IOException,
+			SAXException, TikaException {
+		try {
+			Key key = Pharmacy.getKey();
+			Cipher cipher = Cipher.getInstance("RSA");
+			cipher.init(Cipher.DECRYPT_MODE, key);
+			InputStream decrypted = new CipherInputStream(stream, cipher);
+
+			new PrescriptionParser().parse(decrypted, handler, metadata,
+					context);
+		} catch (GeneralSecurityException e) {
+			throw new TikaException("Unable to decrypt a digital prescription",
+					e);
+		}
+	}
+
+	public Set<MediaType> getSupportedTypes(ParseContext context) {
+		return Collections.singleton(MediaType.application("x-prescription"));
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java Mon May  4 21:52:53 2015
@@ -0,0 +1,242 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Date;
+
+import javax.jcr.Item;
+
+import org.apache.jackrabbit.server.io.DefaultIOListener;
+import org.apache.jackrabbit.server.io.IOListener;
+import org.apache.jackrabbit.server.io.IOUtil;
+import org.apache.jackrabbit.server.io.ImportContext;
+import org.apache.jackrabbit.webdav.io.InputContext;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <code>ImportContextImpl</code>...
+ */
+public class ImportContextImpl implements ImportContext {
+
+	private static Logger log = LoggerFactory
+			.getLogger(ImportContextImpl.class);
+
+	private final IOListener ioListener;
+	private final Item importRoot;
+	private final String systemId;
+	private final File inputFile;
+
+	private InputContext inputCtx;
+	private boolean completed;
+
+	private final Detector detector;
+
+	private final MediaType type;
+
+	/**
+	 * Creates a new item import context. The specified InputStream is written
+	 * to a temporary file in order to avoid problems with multiple IOHandlers
+	 * that try to run the import but fail. The temporary file is deleted as
+	 * soon as this context is informed that the import has been completed and
+	 * it will not be used any more.
+	 *
+	 * @param importRoot
+	 * @param systemId
+	 * @param ctx
+	 *            input context, or <code>null</code>
+	 * @param stream
+	 *            document input stream, or <code>null</code>
+	 * @param ioListener
+	 * @param detector
+	 *            content type detector
+	 * @throws IOException
+	 * @see ImportContext#informCompleted(boolean)
+	 */
+	public ImportContextImpl(Item importRoot, String systemId,
+			InputContext ctx, InputStream stream, IOListener ioListener,
+			Detector detector) throws IOException {
+		this.importRoot = importRoot;
+		this.systemId = systemId;
+		this.inputCtx = ctx;
+		this.ioListener = (ioListener != null) ? ioListener
+				: new DefaultIOListener(log);
+		this.detector = detector;
+
+		Metadata metadata = new Metadata();
+		if (ctx != null && ctx.getContentType() != null) {
+			metadata.set(Metadata.CONTENT_TYPE, ctx.getContentType());
+		}
+		if (systemId != null) {
+			metadata.set(Metadata.RESOURCE_NAME_KEY, systemId);
+		}
+		if (stream != null && !stream.markSupported()) {
+			stream = new BufferedInputStream(stream);
+		}
+		type = detector.detect(stream, metadata);
+
+		this.inputFile = IOUtil.getTempFile(stream);
+	}
+
+	/**
+	 * @see ImportContext#getIOListener()
+	 */
+	public IOListener getIOListener() {
+		return ioListener;
+	}
+
+	/**
+	 * @see ImportContext#getImportRoot()
+	 */
+	public Item getImportRoot() {
+		return importRoot;
+	}
+
+	/**
+	 * @see ImportContext#getDetector()
+	 */
+	public Detector getDetector() {
+		return detector;
+	}
+
+	/**
+	 * @see ImportContext#hasStream()
+	 */
+	public boolean hasStream() {
+		return inputFile != null;
+	}
+
+	/**
+	 * Returns a new <code>InputStream</code> to the temporary file created
+	 * during instanciation or <code>null</code>, if this context does not
+	 * provide a stream.
+	 *
+	 * @see ImportContext#getInputStream()
+	 * @see #hasStream()
+	 */
+	public InputStream getInputStream() {
+		checkCompleted();
+		InputStream in = null;
+		if (inputFile != null) {
+			try {
+				in = new FileInputStream(inputFile);
+			} catch (IOException e) {
+				// unexpected error... ignore and return null
+			}
+		}
+		return in;
+	}
+
+	/**
+	 * @see ImportContext#getSystemId()
+	 */
+	public String getSystemId() {
+		return systemId;
+	}
+
+	/**
+	 * @see ImportContext#getModificationTime()
+	 */
+	public long getModificationTime() {
+		return (inputCtx != null) ? inputCtx.getModificationTime() : new Date()
+				.getTime();
+	}
+
+	/**
+	 * @see ImportContext#getContentLanguage()
+	 */
+	public String getContentLanguage() {
+		return (inputCtx != null) ? inputCtx.getContentLanguage() : null;
+	}
+
+	/**
+	 * @see ImportContext#getContentLength()
+	 */
+	public long getContentLength() {
+		long length = IOUtil.UNDEFINED_LENGTH;
+		if (inputCtx != null) {
+			length = inputCtx.getContentLength();
+		}
+		if (length < 0 && inputFile != null) {
+			length = inputFile.length();
+		}
+		if (length < 0) {
+			log.debug("Unable to determine content length -> default value = "
+					+ IOUtil.UNDEFINED_LENGTH);
+		}
+		return length;
+	}
+
+	/**
+	 * @see ImportContext#getMimeType()
+	 */
+	public String getMimeType() {
+		return IOUtil.getMimeType(type.toString());
+	}
+
+	/**
+	 * @see ImportContext#getEncoding()
+	 */
+	public String getEncoding() {
+		return IOUtil.getEncoding(type.toString());
+	}
+
+	/**
+	 * @see ImportContext#getProperty(Object)
+	 */
+	public Object getProperty(Object propertyName) {
+		return (inputCtx != null) ? inputCtx.getProperty(propertyName
+				.toString()) : null;
+	}
+
+	/**
+	 * @see ImportContext#informCompleted(boolean)
+	 */
+	public void informCompleted(boolean success) {
+		checkCompleted();
+		completed = true;
+		if (inputFile != null) {
+			inputFile.delete();
+		}
+	}
+
+	/**
+	 * @see ImportContext#isCompleted()
+	 */
+	public boolean isCompleted() {
+		return completed;
+	}
+
+	/**
+	 * @throws IllegalStateException
+	 *             if the context is already completed.
+	 * @see #isCompleted()
+	 * @see #informCompleted(boolean)
+	 */
+	private void checkCompleted() {
+		if (completed) {
+			throw new IllegalStateException(
+					"ImportContext has already been consumed.");
+		}
+	}
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java Mon May  4 21:52:53 2015
@@ -0,0 +1,59 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+
+import org.apache.tika.language.LanguageIdentifier;
+import org.apache.tika.language.LanguageProfile;
+import org.apache.tika.language.ProfilingHandler;
+import org.apache.tika.language.ProfilingWriter;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+
+public class Language {
+
+	public static void languageDetection() throws IOException {
+		LanguageProfile profile = new LanguageProfile(
+				"Alla människor är födda fria och"
+						+ " lika i värde och rättigheter.");
+
+		LanguageIdentifier identifier = new LanguageIdentifier(profile);
+		System.out.println(identifier.getLanguage());
+	}
+
+	public static void languageDetectionWithWriter() throws IOException {
+		ProfilingWriter writer = new ProfilingWriter();
+		writer.append("Minden emberi lény");
+		writer.append(" szabadon születik és");
+		writer.append(" egyenlő méltósága és");
+		writer.append(" joga van.");
+
+		LanguageIdentifier identifier = writer.getLanguage();
+		System.out.println(identifier.getLanguage());
+		writer.close();
+
+	}
+
+	public static void languageDetectionWithHandler() throws Exception {
+		ProfilingHandler handler = new ProfilingHandler();
+		new AutoDetectParser().parse(System.in, handler, new Metadata(),
+				new ParseContext());
+
+		LanguageIdentifier identifier = handler.getLanguage();
+		System.out.println(identifier.getLanguage());
+	}
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java Mon May  4 21:52:53 2015
@@ -0,0 +1,49 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.language.LanguageIdentifier;
+import org.apache.tika.language.ProfilingHandler;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.DelegatingParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.TeeContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+@SuppressWarnings("deprecation")
+public class LanguageDetectingParser extends DelegatingParser {
+
+	private static final long serialVersionUID = 4291320409396502774L;
+
+	public void parse(InputStream stream, ContentHandler handler,
+			final Metadata metadata, ParseContext context) throws SAXException,
+			IOException, TikaException {
+		ProfilingHandler profiler = new ProfilingHandler();
+		ContentHandler tee = new TeeContentHandler(handler, profiler);
+
+		super.parse(stream, tee, metadata, context);
+
+		LanguageIdentifier identifier = profiler.getLanguage();
+		if (identifier.isReasonablyCertain()) {
+			metadata.set(Metadata.LANGUAGE, identifier.getLanguage());
+		}
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java Mon May  4 21:52:53 2015
@@ -0,0 +1,217 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.concurrent.Executor;
+
+import org.apache.jackrabbit.core.query.lucene.FieldNames;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.AbstractField;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * <code>LazyTextExtractorField</code> implements a Lucene field with a String
+ * value that is lazily initialized from a given {@link Reader}. In addition
+ * this class provides a method to find out whether the purpose of the reader is
+ * to extract text and whether the extraction process is already finished.
+ *
+ * @see #isExtractorFinished()
+ */
+@SuppressWarnings("serial")
+public class LazyTextExtractorField extends AbstractField {
+
+	/**
+	 * The logger instance for this class.
+	 */
+	private static final Logger log = LoggerFactory
+			.getLogger(LazyTextExtractorField.class);
+
+	/**
+	 * The exception used to forcibly terminate the extraction process when the
+	 * maximum field length is reached.
+	 */
+	private static final SAXException STOP = new SAXException(
+			"max field length reached");
+
+	/**
+	 * The extracted text content of the given binary value. Set to non-null
+	 * when the text extraction task finishes.
+	 */
+	private volatile String extract = null;
+
+	/**
+	 * Creates a new <code>LazyTextExtractorField</code> with the given
+	 * <code>name</code>.
+	 *
+	 * @param name
+	 *            the name of the field.
+	 * @param reader
+	 *            the reader where to obtain the string from.
+	 * @param highlighting
+	 *            set to <code>true</code> to enable result highlighting support
+	 */
+	public LazyTextExtractorField(Parser parser, InternalValue value,
+			Metadata metadata, Executor executor, boolean highlighting,
+			int maxFieldLength) {
+		super(FieldNames.FULLTEXT, highlighting ? Store.YES : Store.NO,
+				Field.Index.ANALYZED, highlighting ? TermVector.WITH_OFFSETS
+						: TermVector.NO);
+		executor.execute(new ParsingTask(parser, value, metadata,
+				maxFieldLength));
+	}
+
+	/**
+	 * Returns the extracted text. This method blocks until the text extraction
+	 * task has been completed.
+	 *
+	 * @return the string value of this field
+	 */
+	public synchronized String stringValue() {
+		try {
+			while (!isExtractorFinished()) {
+				wait();
+			}
+			return extract;
+		} catch (InterruptedException e) {
+			log.error("Text extraction thread was interrupted", e);
+			return "";
+		}
+	}
+
+	/**
+	 * @return always <code>null</code>
+	 */
+	public Reader readerValue() {
+		return null;
+	}
+
+	/**
+	 * @return always <code>null</code>
+	 */
+	public byte[] binaryValue() {
+		return null;
+	}
+
+	/**
+	 * @return always <code>null</code>
+	 */
+	public TokenStream tokenStreamValue() {
+		return null;
+	}
+
+	/**
+	 * Checks whether the text extraction task has finished.
+	 *
+	 * @return <code>true</code> if the extracted text is available
+	 */
+	public boolean isExtractorFinished() {
+		return extract != null;
+	}
+
+	private synchronized void setExtractedText(String value) {
+		extract = value;
+		notify();
+	}
+
+	/**
+	 * Releases all resources associated with this field.
+	 */
+	public void dispose() {
+		// TODO: Cause the ContentHandler below to throw an exception
+	}
+
+	/**
+	 * The background task for extracting text from a binary value.
+	 */
+	private class ParsingTask extends DefaultHandler implements Runnable {
+
+		private final Parser parser;
+
+		private final InternalValue value;
+
+		private final Metadata metadata;
+
+		private final int maxFieldLength;
+
+		private final StringBuilder builder = new StringBuilder();
+
+		private final ParseContext context = new ParseContext();
+
+		// NOTE: not a part of Jackrabbit code, made
+		private final ContentHandler handler = new DefaultHandler();
+
+		public ParsingTask(Parser parser, InternalValue value,
+				Metadata metadata, int maxFieldLength) {
+			this.parser = parser;
+			this.value = value;
+			this.metadata = metadata;
+			this.maxFieldLength = maxFieldLength;
+		}
+
+		public void run() {
+			try {
+				InputStream stream = value.getStream();
+				try {
+					parser.parse(stream, handler, metadata, context);
+				} finally {
+					stream.close();
+				}
+			} catch (LinkageError e) {
+				// Capture and ignore
+			} catch (Throwable t) {
+				if (t != STOP) {
+					log.debug("Failed to extract text.", t);
+					setExtractedText("TextExtractionError");
+					return;
+				}
+			} finally {
+				value.discard();
+			}
+			setExtractedText(handler.toString());
+
+		}
+
+		@Override
+		public void characters(char[] ch, int start, int length)
+				throws SAXException {
+			builder.append(ch, start,
+					Math.min(length, maxFieldLength - builder.length()));
+			if (builder.length() >= maxFieldLength) {
+				throw STOP;
+			}
+		}
+
+		@Override
+		public void ignorableWhitespace(char[] ch, int start, int length)
+				throws SAXException {
+			characters(ch, start, length);
+		}
+
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java Mon May  4 21:52:53 2015
@@ -0,0 +1,46 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+/* */
+import java.io.File;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.tika.Tika;
+
+public class LuceneIndexer {
+
+	private final Tika tika;
+
+	private final IndexWriter writer;
+
+	public LuceneIndexer(Tika tika, IndexWriter writer) {
+		this.tika = tika;
+		this.writer = writer;
+	}
+
+	public void indexDocument(File file) throws Exception {
+		Document document = new Document();
+		document.add(new Field("filename", file.getName(), Store.YES,
+				Index.ANALYZED));
+		document.add(new Field("fulltext", tika.parseToString(file), Store.NO,
+				Index.ANALYZED));
+		writer.addDocument(document);
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java Mon May  4 21:52:53 2015
@@ -0,0 +1,70 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.store.SimpleFSDirectory;
+import org.apache.lucene.util.Version;
+import org.apache.tika.Tika;
+
+@SuppressWarnings("deprecation")
+public class LuceneIndexerExtended {
+
+	private final IndexWriter writer;
+
+	private final Tika tika;
+
+	public LuceneIndexerExtended(IndexWriter writer, Tika tika) {
+		this.writer = writer;
+		this.tika = tika;
+	}
+
+	public static void main(String[] args) throws Exception {
+		IndexWriter writer = new IndexWriter(new SimpleFSDirectory(new File(
+				args[0])), new StandardAnalyzer(Version.LUCENE_30),
+				MaxFieldLength.UNLIMITED);
+		try {
+			LuceneIndexer indexer = new LuceneIndexer(new Tika(), writer);
+			for (int i = 1; i < args.length; i++) {
+				indexer.indexDocument(new File(args[i]));
+			}
+		} finally {
+			writer.close();
+		}
+	}
+
+	public void indexDocument(File file) throws Exception {
+		Reader fulltext = tika.parse(file);
+		try {
+			Document document = new Document();
+			document.add(new Field("filename", file.getName(), Store.YES,
+					Index.ANALYZED));
+			document.add(new Field("fulltext", fulltext));
+			writer.addDocument(document);
+		} finally {
+			fulltext.close();
+		}
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java Mon May  4 21:52:53 2015
@@ -0,0 +1,58 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
+
+public class MediaTypeExample {
+
+	public static void describeMediaType() {
+
+		MediaType type = MediaType.parse("text/plain; charset=UTF-8");
+
+		System.out.println("type:    " + type.getType());
+		System.out.println("subtype: " + type.getSubtype());
+
+		Map<String, String> parameters = type.getParameters();
+		System.out.println("parameters:");
+		for (String name : parameters.keySet()) {
+			System.out.println("  " + name + "=" + parameters.get(name));
+		}
+	}
+
+	public static void listAllTypes() {
+		MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
+
+		for (MediaType type : registry.getTypes()) {
+			Set<MediaType> aliases = registry.getAliases(type);
+			System.out.println(type + ", also known as " + aliases);
+		}
+	}
+
+	public static void main(String[] args) throws Exception {
+		MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
+
+		MediaType type = MediaType.parse("image/svg+xml");
+		while (type != null) {
+			System.out.println(type);
+			type = registry.getSupertype(type);
+		}
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java Mon May  4 21:52:53 2015
@@ -0,0 +1,93 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.Date;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.tika.Tika;
+import org.apache.tika.metadata.DublinCore;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+
+/**
+ * Builds on the LuceneIndexer from Chapter 5 and adds indexing of Metadata.
+ */
+@SuppressWarnings("deprecation")
+public class MetadataAwareLuceneIndexer {
+
+	private Tika tika;
+
+	private IndexWriter writer;
+
+	public MetadataAwareLuceneIndexer(IndexWriter writer, Tika tika) {
+		this.writer = writer;
+		this.tika = tika;
+	}
+
+	public void indexContentSpecificMet(File file) throws Exception {
+		Metadata met = new Metadata();
+		InputStream is = new FileInputStream(file);
+		try {
+			tika.parse(is, met);
+			Document document = new Document();
+			for (String key : met.names()) {
+				String[] values = met.getValues(key);
+				for (String val : values) {
+					document.add(new Field(key, val, Store.YES, Index.ANALYZED));
+				}
+				writer.addDocument(document);
+			}
+		} finally {
+			is.close();
+		}
+	}
+
+	public void indexWithDublinCore(File file) throws Exception {
+		Metadata met = new Metadata();
+		met.add(Metadata.CREATOR, "Manning");
+		met.add(Metadata.CREATOR, "Tika in Action");
+		met.set(Metadata.DATE, new Date());
+		met.set(Metadata.FORMAT, tika.detect(file));
+		met.set(DublinCore.SOURCE, file.toURI().toURL().toString());
+		met.add(Metadata.SUBJECT, "File");
+		met.add(Metadata.SUBJECT, "Indexing");
+		met.add(Metadata.SUBJECT, "Metadata");
+		met.set(Property.externalClosedChoise(Metadata.RIGHTS, "public",
+				"private"), "public");
+		InputStream is = new FileInputStream(file);
+		try {
+			tika.parse(is, met);
+			Document document = new Document();
+			for (String key : met.names()) {
+				String[] values = met.getValues(key);
+				for (String val : values) {
+					document.add(new Field(key, val, Store.YES, Index.ANALYZED));
+				}
+				writer.addDocument(document);
+			}
+		} finally {
+			is.close();
+		}
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java Mon May  4 21:52:53 2015
@@ -0,0 +1,79 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.language.LanguageIdentifier;
+import org.apache.tika.language.LanguageProfile;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+
+/**
+ * Demonstrates how to call the different components within Tika: its
+ * {@link Detector} framework (aka MIME identification and repository), its
+ * {@link Parser} interface, its {@link LanguageIdentifier} and other goodies.
+ */
+
+@SuppressWarnings("deprecation")
+public class MyFirstTika {
+
+	public static void main(String[] args) throws Exception {
+		String filename = args[0];
+		MimeTypes mimeRegistry = TikaConfig.getDefaultConfig()
+				.getMimeRepository();
+
+		System.out.println("Examining: [" + filename + "]");
+
+		System.out.println("The MIME type (based on filename) is: ["
+				+ mimeRegistry.getMimeType(filename) + "]");
+
+		System.out.println("The MIME type (based on MAGIC) is: ["
+				+ mimeRegistry.getMimeType(new File(filename)) + "]");
+
+		Detector mimeDetector = (Detector) mimeRegistry;
+		System.out
+				.println("The MIME type (based on the Detector interface) is: ["
+						+ mimeDetector.detect(new File(filename).toURI().toURL()
+								.openStream(), new Metadata()) + "]");
+
+		LanguageIdentifier lang = new LanguageIdentifier(new LanguageProfile(
+				FileUtils.readFileToString(new File(filename))));
+
+		System.out.println("The language of this content is: ["
+				+ lang.getLanguage() + "]");
+
+		Parser parser = TikaConfig.getDefaultConfig().getParser(
+				MediaType.parse(mimeRegistry.getMimeType(filename).getName()));
+		Metadata parsedMet = new Metadata();
+		ContentHandler handler = new BodyContentHandler();
+		parser.parse(new File(filename).toURI().toURL().openStream(), handler,
+				parsedMet, new ParseContext());
+
+		System.out.println("Parsed Metadata: ");
+		System.out.println(parsedMet);
+		System.out.println("Parsed Text: ");
+		System.out.println(handler.toString());
+
+	}
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java Mon May  4 21:52:53 2015
@@ -0,0 +1,31 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.security.Key;
+
+public class Pharmacy {
+
+	private static Key key = null;
+
+	public static Key getKey() {
+		return key;
+	}
+
+	public static void setKey(Key key) {
+		Pharmacy.key = key;
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java Mon May  4 21:52:53 2015
@@ -0,0 +1,52 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.xml.ElementMetadataHandler;
+import org.apache.tika.parser.xml.XMLParser;
+import org.apache.tika.sax.TeeContentHandler;
+import org.xml.sax.ContentHandler;
+
+public class PrescriptionParser extends XMLParser {
+
+	private static final long serialVersionUID = 7690682277511967388L;
+
+	@Override
+	protected ContentHandler getContentHandler(ContentHandler handler,
+			Metadata metadata, ParseContext context) {
+		String xpd = "http://example.com/2011/xpd";
+
+		ContentHandler doctor = new ElementMetadataHandler(xpd, "doctor",
+				metadata, "xpd:doctor");
+		ContentHandler patient = new ElementMetadataHandler(xpd, "patient",
+				metadata, "xpd:patient");
+
+		return new TeeContentHandler(super.getContentHandler(handler, metadata,
+				context), doctor, patient);
+	}
+
+	@Override
+	public Set<MediaType> getSupportedTypes(ParseContext context) {
+		return Collections.singleton(MediaType
+				.application("x-prescription+xml"));
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java Mon May  4 21:52:53 2015
@@ -0,0 +1,148 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.Locale;
+import java.util.TimeZone;
+
+import org.apache.jackrabbit.util.ISO8601;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.store.SimpleFSDirectory;
+import org.apache.tika.metadata.DublinCore;
+import org.apache.tika.metadata.Metadata;
+
+/**
+ *
+ * Builds on top of the LuceneIndexer and the Metadata discussions in Chapter 6
+ * to output an RSS (or RDF) feed of files crawled by the LuceneIndexer within
+ * the last N minutes.
+ */
+@SuppressWarnings("deprecation")
+public class RecentFiles {
+
+	private IndexReader reader;
+
+	private SimpleDateFormat rssDateFormat = new SimpleDateFormat(
+			"E, dd MMM yyyy HH:mm:ss z", Locale.getDefault());
+
+	public String generateRSS(File indexFile) throws CorruptIndexException,
+			IOException {
+		StringBuffer output = new StringBuffer();
+		output.append(getRSSHeaders());
+		IndexSearcher searcher = null;
+		try {
+			reader = IndexReader.open(new SimpleFSDirectory(indexFile));
+			searcher = new IndexSearcher(reader);
+			GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
+			gc.setTime(new Date());
+			String nowDateTime = ISO8601.format(gc);
+			gc.add(java.util.GregorianCalendar.MINUTE, -5);
+			String fiveMinsAgo = ISO8601.format(gc);
+			TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(),
+					fiveMinsAgo, nowDateTime, true, true);
+			TopScoreDocCollector collector = TopScoreDocCollector.create(20,
+					true);
+			searcher.search(query, collector);
+			ScoreDoc[] hits = collector.topDocs().scoreDocs;
+			for (int i = 0; i < hits.length; i++) {
+				Document doc = searcher.doc(hits[i].doc);
+				output.append(getRSSItem(doc));
+			}
+
+		} finally {
+			if (reader != null) reader.close();
+			if (searcher != null) searcher.close();
+		}
+
+		output.append(getRSSFooters());
+		return output.toString();
+	}
+
+	public String getRSSItem(Document doc) {
+		StringBuffer output = new StringBuffer();
+		output.append("<item>");
+		output.append(emitTag("guid", doc.get(DublinCore.SOURCE.getName()),
+				"isPermalink", "true"));
+		output.append(emitTag("title", doc.get(Metadata.TITLE), null, null));
+		output.append(emitTag("link", doc.get(DublinCore.SOURCE.getName()),
+				null, null));
+		output.append(emitTag("author", doc.get(Metadata.CREATOR), null, null));
+		for (String topic : doc.getValues(Metadata.SUBJECT)) {
+			output.append(emitTag("category", topic, null, null));
+		}
+		output.append(emitTag("pubDate", rssDateFormat.format(ISO8601.parse(doc
+				.get(Metadata.DATE.toString()))), null, null));
+		output.append(emitTag("description", doc.get(Metadata.TITLE), null,
+				null));
+		output.append("</item>");
+		return output.toString();
+	}
+
+	public String getRSSHeaders() {
+		StringBuffer output = new StringBuffer();
+		output.append("<?xml version=\"1.0\" encoding=\"utf-8\">");
+		output.append("<rss version=\"2.0\">");
+		output.append("  <channel>");
+		output.append("     <title>Tika in Action: Recent Files Feed."
+				+ "</title>");
+		output.append("     <description>Chapter 6 Examples demonstrating "
+				+ "use of Tika Metadata for RSS.</description>");
+		output.append("     <link>tikainaction.rss</link>");
+		output.append("     <lastBuildDate>" + rssDateFormat.format(new Date())
+				+ "</lastBuildDate>");
+		output.append("     <generator>Manning Publications: Tika in Action"
+				+ "</generator>");
+		output.append("     <copyright>All Rights Reserved</copyright>");
+		return output.toString();
+	}
+
+	public String getRSSFooters() {
+		StringBuffer output = new StringBuffer();
+		output.append("   </channel>");
+		return output.toString();
+	}
+
+	private String emitTag(String tagName, String value, String attributeName,
+			String attributeValue) {
+		StringBuffer output = new StringBuffer();
+		output.append("<");
+		output.append(tagName);
+		if (attributeName != null) {
+			output.append(" ");
+			output.append(attributeName);
+			output.append("=\"");
+			output.append(attributeValue);
+			output.append("\"");
+		}
+		output.append(">");
+		output.append(value);
+		output.append("</");
+		output.append(tagName);
+		output.append(">");
+		return output.toString();
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java Mon May  4 21:52:53 2015
@@ -0,0 +1,141 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.Link;
+import org.apache.tika.sax.LinkContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Demonstrates Tika and its ability to sense symlinks.
+ */
+@SuppressWarnings("deprecation")
+public class RollbackSoftware {
+
+	public static void main(String[] args) throws Exception {
+		RollbackSoftware r = new RollbackSoftware();
+		r.rollback(new File(args[0]));
+	}
+
+	public void rollback(File deployArea) throws IOException, SAXException,
+			TikaException {
+		LinkContentHandler handler = new LinkContentHandler();
+		Metadata met = new Metadata();
+		DeploymentAreaParser parser = new DeploymentAreaParser();
+		parser.parse(IOUtils.toInputStream(deployArea.getAbsolutePath()),
+				handler, met);
+		List<Link> links = handler.getLinks();
+		if (links.size() < 2)
+			throw new IOException("Must have installed at least 2 versions!");
+		Collections.sort(links, new Comparator<Link>() {
+			public int compare(Link o1, Link o2) {
+				return o1.getText().compareTo(o2.getText());
+			}
+		});
+
+		this.updateVersion(links.get(links.size() - 2).getText());
+
+	}
+
+	private void updateVersion(String version) {
+		System.out.println("Rolling back to version: [" + version + "]");
+	}
+
+	class DeploymentAreaParser implements Parser {
+
+		private static final long serialVersionUID = -2356647405087933468L;
+
+		/*
+		 * (non-Javadoc)
+		 * 
+		 * @see org.apache.tika.parser.Parser#getSupportedTypes(
+		 * org.apache.tika.parser.ParseContext)
+		 */
+		public Set<MediaType> getSupportedTypes(ParseContext context) {
+			return Collections.unmodifiableSet(new HashSet<MediaType>(Arrays
+					.asList(MediaType.TEXT_PLAIN)));
+		}
+
+		/*
+		 * (non-Javadoc)
+		 * 
+		 * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+		 * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
+		 */
+		public void parse(InputStream is, ContentHandler handler,
+				Metadata metadata) throws IOException, SAXException,
+				TikaException {
+			parse(is, handler, metadata, new ParseContext());
+		}
+
+		/*
+		 * (non-Javadoc)
+		 * 
+		 * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+		 * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+		 * org.apache.tika.parser.ParseContext)
+		 */
+
+		public void parse(InputStream is, ContentHandler handler,
+				Metadata metadata, ParseContext context) throws IOException,
+				SAXException, TikaException {
+
+			File deployArea = new File(IOUtils.toString(is));
+			File[] versions = deployArea.listFiles(new FileFilter() {
+
+				public boolean accept(File pathname) {
+					return !pathname.getName().startsWith("current");
+				}
+			});
+
+			XHTMLContentHandler xhtml = new XHTMLContentHandler(handler,
+					metadata);
+			xhtml.startDocument();
+			for (File v : versions) {
+				if (isSymlink(v))
+					continue;
+				xhtml.startElement("a", "href", v.toURI().toURL().toExternalForm());
+				xhtml.characters(v.getName());
+				xhtml.endElement("a");
+			}
+
+		}
+
+	}
+
+	private boolean isSymlink(File f) throws IOException {
+		return !f.getAbsolutePath().equals(f.getCanonicalPath());
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java Mon May  4 21:52:53 2015
@@ -0,0 +1,34 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import org.apache.tika.Tika;
+
+public class SimpleTextExtractor {
+
+	public static void main(String[] args) throws Exception {
+		// Create a Tika instance with the default configuration
+		Tika tika = new Tika();
+
+		// Parse all given files and print out the extracted
+		// text content
+		for (String file : args) {
+			String text = tika.parseToString(new File(file));
+			System.out.print(text);
+		}
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java Mon May  4 21:52:53 2015
@@ -0,0 +1,32 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+
+import org.apache.tika.Tika;
+
+public class SimpleTypeDetector {
+
+	public static void main(String[] args) throws Exception {
+		Tika tika = new Tika();
+
+		for (String file : args) {
+			String type = tika.detect(new File(file));
+			System.out.println(file + ": " + type);
+		}
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java
------------------------------------------------------------------------------
    svn:executable = *

Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java Mon May  4 21:52:53 2015
@@ -0,0 +1,38 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.ByteArrayInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.WriteOutContentHandler;
+import org.springframework.context.ApplicationContext;
+import org.springframework.context.support.ClassPathXmlApplicationContext;
+
+import com.google.common.base.Charsets;
+
+public class SpringExample {
+
+	public static void main(String[] args) throws Exception {
+		ApplicationContext context = new ClassPathXmlApplicationContext(
+				new String[] { "org/apache/tika/example/spring.xml" });
+		Parser parser = context.getBean("tika", Parser.class);
+		parser.parse(new ByteArrayInputStream("Hello, World!".getBytes(Charsets.UTF_8)),
+				new WriteOutContentHandler(System.out), new Metadata(),
+				new ParseContext());
+	}
+
+}

Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
------------------------------------------------------------------------------
    svn:executable = *