You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2015/05/04 23:52:53 UTC
svn commit: r1677694 [1/2] - in /tika/trunk/tika-example: ./
src/main/java/org/apache/tika/example/
src/main/resources/org/apache/tika/example/
src/test/java/org/apache/tika/example/
Author: mattmann
Date: Mon May 4 21:52:53 2015
New Revision: 1677694
URL: http://svn.apache.org/r1677694
Log:
TIKA-1562: Add examples from the Tika in Action book
Added:
tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java (with props)
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java (with props)
tika/trunk/tika-example/src/main/resources/org/apache/tika/example/spring.xml (with props)
tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java (with props)
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTextExtractorTest.java (with props)
tika/trunk/tika-example/src/test/java/org/apache/tika/example/SimpleTypeDetectorTest.java (with props)
Modified:
tika/trunk/tika-example/pom.xml
Modified: tika/trunk/tika-example/pom.xml
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/pom.xml?rev=1677694&r1=1677693&r2=1677694&view=diff
==============================================================================
--- tika/trunk/tika-example/pom.xml (original)
+++ tika/trunk/tika-example/pom.xml Mon May 4 21:52:53 2015
@@ -56,6 +56,17 @@
<!-- List of dependencies that we depend on for the examples. See the full list of Tika
modules and how to use them at http://mvnrepository.com/artifact/org.apache.tika.-->
<dependencies>
+ <dependency>
+ <groupId>org.apache.tika</groupId>
+ <artifactId>tika-app</artifactId>
+ <version>${project.version}</version>
+ <exclusions>
+ <exclusion>
+ <artifactId>tika-parsers</artifactId>
+ <groupId>org.apache.tika</groupId>
+ </exclusion>
+ </exclusions>
+ </dependency>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
@@ -78,6 +89,36 @@
<type>test-jar</type>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>javax.jcr</groupId>
+ <artifactId>jcr</artifactId>
+ <version>2.0</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.jackrabbit</groupId>
+ <artifactId>jackrabbit-jcr-server</artifactId>
+ <version>2.3.6</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.jackrabbit</groupId>
+ <artifactId>jackrabbit-core</artifactId>
+ <version>2.3.6</version>
+ </dependency>
+ <dependency>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-core</artifactId>
+ <version>3.5.0</version>
+ </dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>2.4</version>
+ </dependency>
+ <dependency>
+ <groupId>org.springframework</groupId>
+ <artifactId>spring-context</artifactId>
+ <version>3.0.2.RELEASE</version>
+ </dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java Mon May 4 21:52:53 2015
@@ -0,0 +1,55 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.InputStream;
+
+import org.apache.tika.Tika;
+import org.apache.tika.detect.CompositeDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypesFactory;
+
+public class AdvancedTypeDetector {
+
+ public static String detectWithCustomConfig(String name) throws Exception {
+ String config = "/org/apache/tika/mime/tika-mimetypes.xml";
+ Tika tika = new Tika(MimeTypesFactory.create(config));
+ return tika.detect(name);
+ }
+
+ public static String detectWithCustomDetector(String name) throws Exception {
+ String config = "/org/apache/tika/mime/tika-mimetypes.xml";
+ Detector detector = MimeTypesFactory.create(config);
+
+ Detector custom = new Detector() {
+ private static final long serialVersionUID = -5420638839201540749L;
+
+ public MediaType detect(InputStream input, Metadata metadata) {
+ String type = metadata.get("my-custom-type-override");
+ if (type != null) {
+ return MediaType.parse(type);
+ } else {
+ return MediaType.OCTET_STREAM;
+ }
+ }
+ };
+
+ Tika tika = new Tika(new CompositeDetector(custom, detector));
+ return tika.detect(name);
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java Mon May 4 21:52:53 2015
@@ -0,0 +1,49 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.net.URL;
+
+import org.apache.tika.Tika;
+import org.apache.tika.detect.CompositeDetector;
+import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.mime.MimeTypesFactory;
+
+public class CustomMimeInfo {
+
+ public static String customMimeInfo() throws Exception {
+ String path = "file:///path/to/prescription-type.xml";
+ MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
+ Tika tika = new Tika(typeDatabase);
+ String type = tika.detect("/path/to/prescription.xpd");
+ return type;
+ }
+
+ public static String customCompositeDetector() throws Exception {
+ String path = "file:///path/to/prescription-type.xml";
+ MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
+ Tika tika = new Tika(new CompositeDetector(typeDatabase,
+ new EncryptedPrescriptionDetector()));
+ String type = tika.detect("/path/to/tmp/prescription.xpd");
+ return type;
+ }
+
+ public static void main(String[] args) throws Exception {
+ System.out.println("customMimeInfo=" + customMimeInfo());
+ System.out.println("customCompositeDetector="
+ + customCompositeDetector());
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java Mon May 4 21:52:53 2015
@@ -0,0 +1,30 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import org.apache.tika.cli.TikaCLI;
+
+/**
+ *
+ * Print the supported Tika Metadata models and their fields.
+ *
+ */
+public class DescribeMetadata {
+
+ public static void main(String[] args) throws Exception {
+ TikaCLI.main(new String[] { "--list-met-models" });
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java Mon May 4 21:52:53 2015
@@ -0,0 +1,139 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TikaInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Parses the output of /bin/ls and counts the number of files and the number of
+ * executables using Tika.
+ */
+public class DirListParser implements Parser {
+
+ private static final long serialVersionUID = 2717930544410610735L;
+
+ private static Set<MediaType> SUPPORTED_TYPES = new HashSet<MediaType>(
+ Arrays.asList(MediaType.TEXT_PLAIN));
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.tika.parser.Parser#getSupportedTypes(
+ * org.apache.tika.parser.ParseContext)
+ */
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+ * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
+ */
+ public void parse(InputStream is, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ this.parse(is, handler, metadata, new ParseContext());
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+ * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+ * org.apache.tika.parser.ParseContext)
+ */
+ public void parse(InputStream is, ContentHandler handler,
+ Metadata metadata, ParseContext context) throws IOException,
+ SAXException, TikaException {
+
+ List<String> lines = FileUtils.readLines(TikaInputStream.get(is)
+ .getFile());
+ for (String line : lines) {
+ String[] fileToks = line.split("\\s+");
+ if (fileToks.length < 8)
+ continue;
+ String filePermissions = fileToks[0];
+ String numHardLinks = fileToks[1];
+ String fileOwner = fileToks[2];
+ String fileOwnerGroup = fileToks[3];
+ String fileSize = fileToks[4];
+ StringBuffer lastModDate = new StringBuffer();
+ lastModDate.append(fileToks[5]);
+ lastModDate.append(" ");
+ lastModDate.append(fileToks[6]);
+ lastModDate.append(" ");
+ lastModDate.append(fileToks[7]);
+ StringBuffer fileName = new StringBuffer();
+ for (int i = 8; i < fileToks.length; i++) {
+ fileName.append(fileToks[i]);
+ fileName.append(" ");
+ }
+ fileName.deleteCharAt(fileName.length() - 1);
+ this.addMetadata(metadata, filePermissions, numHardLinks,
+ fileOwner, fileOwnerGroup, fileSize,
+ lastModDate.toString(), fileName.toString());
+ }
+ }
+
+ public static void main(String[] args) throws IOException, SAXException,
+ TikaException {
+ DirListParser parser = new DirListParser();
+ Metadata met = new Metadata();
+ parser.parse(System.in, new BodyContentHandler(), met);
+
+ System.out.println("Num files: " + met.getValues("Filename").length);
+ System.out.println("Num executables: " + met.get("NumExecutables"));
+ }
+
+ private void addMetadata(Metadata metadata, String filePerms,
+ String numHardLinks, String fileOwner, String fileOwnerGroup,
+ String fileSize, String lastModDate, String fileName) {
+ metadata.add("FilePermissions", filePerms);
+ metadata.add("NumHardLinks", numHardLinks);
+ metadata.add("FileOwner", fileOwner);
+ metadata.add("FileOwnerGroup", fileOwnerGroup);
+ metadata.add("FileSize", fileSize);
+ metadata.add("LastModifiedDate", lastModDate);
+ metadata.add("Filename", fileName);
+
+ if (filePerms.indexOf("x") != -1 && filePerms.indexOf("d") == -1) {
+ if (metadata.get("NumExecutables") != null) {
+ int numExecs = Integer.valueOf(metadata.get("NumExecutables"));
+ numExecs++;
+ metadata.set("NumExecutables", String.valueOf(numExecs));
+ } else {
+ metadata.set("NumExecutables", "1");
+ }
+ }
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java Mon May 4 21:52:53 2015
@@ -0,0 +1,46 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.net.URL;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.pdf.PDFParser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Grabs a PDF file from a URL and prints its {@link Metadata}
+ */
+public class DisplayMetInstance {
+
+ public static Metadata getMet(URL url) throws IOException, SAXException,
+ TikaException {
+ Metadata met = new Metadata();
+ PDFParser parser = new PDFParser();
+ parser.parse(url.openStream(), new BodyContentHandler(), met,
+ new ParseContext());
+ return met;
+ }
+
+ public static void main(String[] args) throws Exception {
+ Metadata met = DisplayMetInstance.getMet(new URL(args[0]));
+ System.out.println(met);
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java Mon May 4 21:52:53 2015
@@ -0,0 +1,62 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.GeneralSecurityException;
+import java.security.Key;
+
+import javax.crypto.Cipher;
+import javax.crypto.CipherInputStream;
+import javax.xml.namespace.QName;
+
+import org.apache.tika.detect.Detector;
+import org.apache.tika.detect.XmlRootExtractor;
+import org.apache.tika.io.LookaheadInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+
+public class EncryptedPrescriptionDetector implements Detector {
+
+ private static final long serialVersionUID = -1709652690773421147L;
+
+ public MediaType detect(InputStream stream, Metadata metadata)
+ throws IOException {
+ Key key = Pharmacy.getKey();
+ MediaType type = MediaType.OCTET_STREAM;
+
+ InputStream lookahead = new LookaheadInputStream(stream, 1024);
+ try {
+ Cipher cipher = Cipher.getInstance("RSA");
+
+ cipher.init(Cipher.DECRYPT_MODE, key);
+ InputStream decrypted = new CipherInputStream(lookahead, cipher);
+
+ QName name = new XmlRootExtractor().extractRootElement(decrypted);
+ if (name != null
+ && "http://example.com/xpd".equals(name.getNamespaceURI())
+ && "prescription".equals(name.getLocalPart())) {
+ type = MediaType.application("x-prescription");
+ }
+ } catch (GeneralSecurityException e) {
+ // unable to decrypt, fall through
+ } finally {
+ lookahead.close();
+ }
+ return type;
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java Mon May 4 21:52:53 2015
@@ -0,0 +1,60 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.security.GeneralSecurityException;
+import java.security.Key;
+import java.util.Collections;
+import java.util.Set;
+
+import javax.crypto.Cipher;
+import javax.crypto.CipherInputStream;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.parser.ParseContext;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class EncryptedPrescriptionParser extends AbstractParser {
+
+ private static final long serialVersionUID = -7816987249611278541L;
+
+ public void parse(InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context) throws IOException,
+ SAXException, TikaException {
+ try {
+ Key key = Pharmacy.getKey();
+ Cipher cipher = Cipher.getInstance("RSA");
+ cipher.init(Cipher.DECRYPT_MODE, key);
+ InputStream decrypted = new CipherInputStream(stream, cipher);
+
+ new PrescriptionParser().parse(decrypted, handler, metadata,
+ context);
+ } catch (GeneralSecurityException e) {
+ throw new TikaException("Unable to decrypt a digital prescription",
+ e);
+ }
+ }
+
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return Collections.singleton(MediaType.application("x-prescription"));
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java Mon May 4 21:52:53 2015
@@ -0,0 +1,242 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Date;
+
+import javax.jcr.Item;
+
+import org.apache.jackrabbit.server.io.DefaultIOListener;
+import org.apache.jackrabbit.server.io.IOListener;
+import org.apache.jackrabbit.server.io.IOUtil;
+import org.apache.jackrabbit.server.io.ImportContext;
+import org.apache.jackrabbit.webdav.io.InputContext;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * <code>ImportContextImpl</code>...
+ */
+public class ImportContextImpl implements ImportContext {
+
+ private static Logger log = LoggerFactory
+ .getLogger(ImportContextImpl.class);
+
+ private final IOListener ioListener;
+ private final Item importRoot;
+ private final String systemId;
+ private final File inputFile;
+
+ private InputContext inputCtx;
+ private boolean completed;
+
+ private final Detector detector;
+
+ private final MediaType type;
+
+ /**
+ * Creates a new item import context. The specified InputStream is written
+ * to a temporary file in order to avoid problems with multiple IOHandlers
+ * that try to run the import but fail. The temporary file is deleted as
+ * soon as this context is informed that the import has been completed and
+ * it will not be used any more.
+ *
+ * @param importRoot
+ * @param systemId
+ * @param ctx
+ * input context, or <code>null</code>
+ * @param stream
+ * document input stream, or <code>null</code>
+ * @param ioListener
+ * @param detector
+ * content type detector
+ * @throws IOException
+ * @see ImportContext#informCompleted(boolean)
+ */
+ public ImportContextImpl(Item importRoot, String systemId,
+ InputContext ctx, InputStream stream, IOListener ioListener,
+ Detector detector) throws IOException {
+ this.importRoot = importRoot;
+ this.systemId = systemId;
+ this.inputCtx = ctx;
+ this.ioListener = (ioListener != null) ? ioListener
+ : new DefaultIOListener(log);
+ this.detector = detector;
+
+ Metadata metadata = new Metadata();
+ if (ctx != null && ctx.getContentType() != null) {
+ metadata.set(Metadata.CONTENT_TYPE, ctx.getContentType());
+ }
+ if (systemId != null) {
+ metadata.set(Metadata.RESOURCE_NAME_KEY, systemId);
+ }
+ if (stream != null && !stream.markSupported()) {
+ stream = new BufferedInputStream(stream);
+ }
+ type = detector.detect(stream, metadata);
+
+ this.inputFile = IOUtil.getTempFile(stream);
+ }
+
+ /**
+ * @see ImportContext#getIOListener()
+ */
+ public IOListener getIOListener() {
+ return ioListener;
+ }
+
+ /**
+ * @see ImportContext#getImportRoot()
+ */
+ public Item getImportRoot() {
+ return importRoot;
+ }
+
+ /**
+ * @see ImportContext#getDetector()
+ */
+ public Detector getDetector() {
+ return detector;
+ }
+
+ /**
+ * @see ImportContext#hasStream()
+ */
+ public boolean hasStream() {
+ return inputFile != null;
+ }
+
+ /**
+ * Returns a new <code>InputStream</code> to the temporary file created
+ * during instanciation or <code>null</code>, if this context does not
+ * provide a stream.
+ *
+ * @see ImportContext#getInputStream()
+ * @see #hasStream()
+ */
+ public InputStream getInputStream() {
+ checkCompleted();
+ InputStream in = null;
+ if (inputFile != null) {
+ try {
+ in = new FileInputStream(inputFile);
+ } catch (IOException e) {
+ // unexpected error... ignore and return null
+ }
+ }
+ return in;
+ }
+
+ /**
+ * @see ImportContext#getSystemId()
+ */
+ public String getSystemId() {
+ return systemId;
+ }
+
+ /**
+ * @see ImportContext#getModificationTime()
+ */
+ public long getModificationTime() {
+ return (inputCtx != null) ? inputCtx.getModificationTime() : new Date()
+ .getTime();
+ }
+
+ /**
+ * @see ImportContext#getContentLanguage()
+ */
+ public String getContentLanguage() {
+ return (inputCtx != null) ? inputCtx.getContentLanguage() : null;
+ }
+
+ /**
+ * @see ImportContext#getContentLength()
+ */
+ public long getContentLength() {
+ long length = IOUtil.UNDEFINED_LENGTH;
+ if (inputCtx != null) {
+ length = inputCtx.getContentLength();
+ }
+ if (length < 0 && inputFile != null) {
+ length = inputFile.length();
+ }
+ if (length < 0) {
+ log.debug("Unable to determine content length -> default value = "
+ + IOUtil.UNDEFINED_LENGTH);
+ }
+ return length;
+ }
+
+ /**
+ * @see ImportContext#getMimeType()
+ */
+ public String getMimeType() {
+ return IOUtil.getMimeType(type.toString());
+ }
+
+ /**
+ * @see ImportContext#getEncoding()
+ */
+ public String getEncoding() {
+ return IOUtil.getEncoding(type.toString());
+ }
+
+ /**
+ * @see ImportContext#getProperty(Object)
+ */
+ public Object getProperty(Object propertyName) {
+ return (inputCtx != null) ? inputCtx.getProperty(propertyName
+ .toString()) : null;
+ }
+
+ /**
+ * @see ImportContext#informCompleted(boolean)
+ */
+ public void informCompleted(boolean success) {
+ checkCompleted();
+ completed = true;
+ if (inputFile != null) {
+ inputFile.delete();
+ }
+ }
+
+ /**
+ * @see ImportContext#isCompleted()
+ */
+ public boolean isCompleted() {
+ return completed;
+ }
+
+ /**
+ * @throws IllegalStateException
+ * if the context is already completed.
+ * @see #isCompleted()
+ * @see #informCompleted(boolean)
+ */
+ private void checkCompleted() {
+ if (completed) {
+ throw new IllegalStateException(
+ "ImportContext has already been consumed.");
+ }
+ }
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java Mon May 4 21:52:53 2015
@@ -0,0 +1,59 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+
+import org.apache.tika.language.LanguageIdentifier;
+import org.apache.tika.language.LanguageProfile;
+import org.apache.tika.language.ProfilingHandler;
+import org.apache.tika.language.ProfilingWriter;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+
+public class Language {
+
+ public static void languageDetection() throws IOException {
+ LanguageProfile profile = new LanguageProfile(
+ "Alla människor är födda fria och"
+ + " lika i värde och rättigheter.");
+
+ LanguageIdentifier identifier = new LanguageIdentifier(profile);
+ System.out.println(identifier.getLanguage());
+ }
+
+ public static void languageDetectionWithWriter() throws IOException {
+ ProfilingWriter writer = new ProfilingWriter();
+ writer.append("Minden emberi lény");
+ writer.append(" szabadon születik és");
+ writer.append(" egyenlŠméltósága és");
+ writer.append(" joga van.");
+
+ LanguageIdentifier identifier = writer.getLanguage();
+ System.out.println(identifier.getLanguage());
+ writer.close();
+
+ }
+
+ public static void languageDetectionWithHandler() throws Exception {
+ ProfilingHandler handler = new ProfilingHandler();
+ new AutoDetectParser().parse(System.in, handler, new Metadata(),
+ new ParseContext());
+
+ LanguageIdentifier identifier = handler.getLanguage();
+ System.out.println(identifier.getLanguage());
+ }
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java Mon May 4 21:52:53 2015
@@ -0,0 +1,49 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.language.LanguageIdentifier;
+import org.apache.tika.language.ProfilingHandler;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.DelegatingParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.sax.TeeContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+@SuppressWarnings("deprecation")
+public class LanguageDetectingParser extends DelegatingParser {
+
+ private static final long serialVersionUID = 4291320409396502774L;
+
+ public void parse(InputStream stream, ContentHandler handler,
+ final Metadata metadata, ParseContext context) throws SAXException,
+ IOException, TikaException {
+ ProfilingHandler profiler = new ProfilingHandler();
+ ContentHandler tee = new TeeContentHandler(handler, profiler);
+
+ super.parse(stream, tee, metadata, context);
+
+ LanguageIdentifier identifier = profiler.getLanguage();
+ if (identifier.isReasonablyCertain()) {
+ metadata.set(Metadata.LANGUAGE, identifier.getLanguage());
+ }
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java Mon May 4 21:52:53 2015
@@ -0,0 +1,217 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.InputStream;
+import java.io.Reader;
+import java.util.concurrent.Executor;
+
+import org.apache.jackrabbit.core.query.lucene.FieldNames;
+import org.apache.jackrabbit.core.value.InternalValue;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.document.AbstractField;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.document.Field.TermVector;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+import org.xml.sax.helpers.DefaultHandler;
+
+/**
+ * <code>LazyTextExtractorField</code> implements a Lucene field with a String
+ * value that is lazily initialized from a given {@link Reader}. In addition
+ * this class provides a method to find out whether the purpose of the reader is
+ * to extract text and whether the extraction process is already finished.
+ *
+ * @see #isExtractorFinished()
+ */
+@SuppressWarnings("serial")
+public class LazyTextExtractorField extends AbstractField {
+
+ /**
+ * The logger instance for this class.
+ */
+ private static final Logger log = LoggerFactory
+ .getLogger(LazyTextExtractorField.class);
+
+ /**
+ * The exception used to forcibly terminate the extraction process when the
+ * maximum field length is reached.
+ */
+ private static final SAXException STOP = new SAXException(
+ "max field length reached");
+
+ /**
+ * The extracted text content of the given binary value. Set to non-null
+ * when the text extraction task finishes.
+ */
+ private volatile String extract = null;
+
+ /**
+ * Creates a new <code>LazyTextExtractorField</code> with the given
+ * <code>name</code>.
+ *
+ * @param name
+ * the name of the field.
+ * @param reader
+ * the reader where to obtain the string from.
+ * @param highlighting
+ * set to <code>true</code> to enable result highlighting support
+ */
+ public LazyTextExtractorField(Parser parser, InternalValue value,
+ Metadata metadata, Executor executor, boolean highlighting,
+ int maxFieldLength) {
+ super(FieldNames.FULLTEXT, highlighting ? Store.YES : Store.NO,
+ Field.Index.ANALYZED, highlighting ? TermVector.WITH_OFFSETS
+ : TermVector.NO);
+ executor.execute(new ParsingTask(parser, value, metadata,
+ maxFieldLength));
+ }
+
+ /**
+ * Returns the extracted text. This method blocks until the text extraction
+ * task has been completed.
+ *
+ * @return the string value of this field
+ */
+ public synchronized String stringValue() {
+ try {
+ while (!isExtractorFinished()) {
+ wait();
+ }
+ return extract;
+ } catch (InterruptedException e) {
+ log.error("Text extraction thread was interrupted", e);
+ return "";
+ }
+ }
+
+ /**
+ * @return always <code>null</code>
+ */
+ public Reader readerValue() {
+ return null;
+ }
+
+ /**
+ * @return always <code>null</code>
+ */
+ public byte[] binaryValue() {
+ return null;
+ }
+
+ /**
+ * @return always <code>null</code>
+ */
+ public TokenStream tokenStreamValue() {
+ return null;
+ }
+
+ /**
+ * Checks whether the text extraction task has finished.
+ *
+ * @return <code>true</code> if the extracted text is available
+ */
+ public boolean isExtractorFinished() {
+ return extract != null;
+ }
+
+ private synchronized void setExtractedText(String value) {
+ extract = value;
+ notify();
+ }
+
+ /**
+ * Releases all resources associated with this field.
+ */
+ public void dispose() {
+ // TODO: Cause the ContentHandler below to throw an exception
+ }
+
+ /**
+ * The background task for extracting text from a binary value.
+ */
+ private class ParsingTask extends DefaultHandler implements Runnable {
+
+ private final Parser parser;
+
+ private final InternalValue value;
+
+ private final Metadata metadata;
+
+ private final int maxFieldLength;
+
+ private final StringBuilder builder = new StringBuilder();
+
+ private final ParseContext context = new ParseContext();
+
+ // NOTE: not a part of Jackrabbit code, made
+ private final ContentHandler handler = new DefaultHandler();
+
+ public ParsingTask(Parser parser, InternalValue value,
+ Metadata metadata, int maxFieldLength) {
+ this.parser = parser;
+ this.value = value;
+ this.metadata = metadata;
+ this.maxFieldLength = maxFieldLength;
+ }
+
+ public void run() {
+ try {
+ InputStream stream = value.getStream();
+ try {
+ parser.parse(stream, handler, metadata, context);
+ } finally {
+ stream.close();
+ }
+ } catch (LinkageError e) {
+ // Capture and ignore
+ } catch (Throwable t) {
+ if (t != STOP) {
+ log.debug("Failed to extract text.", t);
+ setExtractedText("TextExtractionError");
+ return;
+ }
+ } finally {
+ value.discard();
+ }
+ setExtractedText(handler.toString());
+
+ }
+
+ @Override
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ builder.append(ch, start,
+ Math.min(length, maxFieldLength - builder.length()));
+ if (builder.length() >= maxFieldLength) {
+ throw STOP;
+ }
+ }
+
+ @Override
+ public void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException {
+ characters(ch, start, length);
+ }
+
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java Mon May 4 21:52:53 2015
@@ -0,0 +1,46 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+/* */
+import java.io.File;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.tika.Tika;
+
+public class LuceneIndexer {
+
+ private final Tika tika;
+
+ private final IndexWriter writer;
+
+ public LuceneIndexer(Tika tika, IndexWriter writer) {
+ this.tika = tika;
+ this.writer = writer;
+ }
+
+ public void indexDocument(File file) throws Exception {
+ Document document = new Document();
+ document.add(new Field("filename", file.getName(), Store.YES,
+ Index.ANALYZED));
+ document.add(new Field("fulltext", tika.parseToString(file), Store.NO,
+ Index.ANALYZED));
+ writer.addDocument(document);
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java Mon May 4 21:52:53 2015
@@ -0,0 +1,70 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.Reader;
+
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriter.MaxFieldLength;
+import org.apache.lucene.store.SimpleFSDirectory;
+import org.apache.lucene.util.Version;
+import org.apache.tika.Tika;
+
+@SuppressWarnings("deprecation")
+public class LuceneIndexerExtended {
+
+ private final IndexWriter writer;
+
+ private final Tika tika;
+
+ public LuceneIndexerExtended(IndexWriter writer, Tika tika) {
+ this.writer = writer;
+ this.tika = tika;
+ }
+
+ public static void main(String[] args) throws Exception {
+ IndexWriter writer = new IndexWriter(new SimpleFSDirectory(new File(
+ args[0])), new StandardAnalyzer(Version.LUCENE_30),
+ MaxFieldLength.UNLIMITED);
+ try {
+ LuceneIndexer indexer = new LuceneIndexer(new Tika(), writer);
+ for (int i = 1; i < args.length; i++) {
+ indexer.indexDocument(new File(args[i]));
+ }
+ } finally {
+ writer.close();
+ }
+ }
+
+ public void indexDocument(File file) throws Exception {
+ Reader fulltext = tika.parse(file);
+ try {
+ Document document = new Document();
+ document.add(new Field("filename", file.getName(), Store.YES,
+ Index.ANALYZED));
+ document.add(new Field("fulltext", fulltext));
+ writer.addDocument(document);
+ } finally {
+ fulltext.close();
+ }
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java Mon May 4 21:52:53 2015
@@ -0,0 +1,58 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MediaTypeRegistry;
+
+public class MediaTypeExample {
+
+ public static void describeMediaType() {
+
+ MediaType type = MediaType.parse("text/plain; charset=UTF-8");
+
+ System.out.println("type: " + type.getType());
+ System.out.println("subtype: " + type.getSubtype());
+
+ Map<String, String> parameters = type.getParameters();
+ System.out.println("parameters:");
+ for (String name : parameters.keySet()) {
+ System.out.println(" " + name + "=" + parameters.get(name));
+ }
+ }
+
+ public static void listAllTypes() {
+ MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
+
+ for (MediaType type : registry.getTypes()) {
+ Set<MediaType> aliases = registry.getAliases(type);
+ System.out.println(type + ", also known as " + aliases);
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
+
+ MediaType type = MediaType.parse("image/svg+xml");
+ while (type != null) {
+ System.out.println(type);
+ type = registry.getSupertype(type);
+ }
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java Mon May 4 21:52:53 2015
@@ -0,0 +1,93 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.Date;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.tika.Tika;
+import org.apache.tika.metadata.DublinCore;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.Property;
+
+/**
+ * Builds on the LuceneIndexer from Chapter 5 and adds indexing of Metadata.
+ */
+@SuppressWarnings("deprecation")
+public class MetadataAwareLuceneIndexer {
+
+ private Tika tika;
+
+ private IndexWriter writer;
+
+ public MetadataAwareLuceneIndexer(IndexWriter writer, Tika tika) {
+ this.writer = writer;
+ this.tika = tika;
+ }
+
+ public void indexContentSpecificMet(File file) throws Exception {
+ Metadata met = new Metadata();
+ InputStream is = new FileInputStream(file);
+ try {
+ tika.parse(is, met);
+ Document document = new Document();
+ for (String key : met.names()) {
+ String[] values = met.getValues(key);
+ for (String val : values) {
+ document.add(new Field(key, val, Store.YES, Index.ANALYZED));
+ }
+ writer.addDocument(document);
+ }
+ } finally {
+ is.close();
+ }
+ }
+
+ public void indexWithDublinCore(File file) throws Exception {
+ Metadata met = new Metadata();
+ met.add(Metadata.CREATOR, "Manning");
+ met.add(Metadata.CREATOR, "Tika in Action");
+ met.set(Metadata.DATE, new Date());
+ met.set(Metadata.FORMAT, tika.detect(file));
+ met.set(DublinCore.SOURCE, file.toURI().toURL().toString());
+ met.add(Metadata.SUBJECT, "File");
+ met.add(Metadata.SUBJECT, "Indexing");
+ met.add(Metadata.SUBJECT, "Metadata");
+ met.set(Property.externalClosedChoise(Metadata.RIGHTS, "public",
+ "private"), "public");
+ InputStream is = new FileInputStream(file);
+ try {
+ tika.parse(is, met);
+ Document document = new Document();
+ for (String key : met.names()) {
+ String[] values = met.getValues(key);
+ for (String val : values) {
+ document.add(new Field(key, val, Store.YES, Index.ANALYZED));
+ }
+ writer.addDocument(document);
+ }
+ } finally {
+ is.close();
+ }
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java Mon May 4 21:52:53 2015
@@ -0,0 +1,79 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.language.LanguageIdentifier;
+import org.apache.tika.language.LanguageProfile;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.mime.MimeTypes;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+
+/**
+ * Demonstrates how to call the different components within Tika: its
+ * {@link Detector} framework (aka MIME identification and repository), its
+ * {@link Parser} interface, its {@link LanguageIdentifier} and other goodies.
+ */
+
+@SuppressWarnings("deprecation")
+public class MyFirstTika {
+
+ public static void main(String[] args) throws Exception {
+ String filename = args[0];
+ MimeTypes mimeRegistry = TikaConfig.getDefaultConfig()
+ .getMimeRepository();
+
+ System.out.println("Examining: [" + filename + "]");
+
+ System.out.println("The MIME type (based on filename) is: ["
+ + mimeRegistry.getMimeType(filename) + "]");
+
+ System.out.println("The MIME type (based on MAGIC) is: ["
+ + mimeRegistry.getMimeType(new File(filename)) + "]");
+
+ Detector mimeDetector = (Detector) mimeRegistry;
+ System.out
+ .println("The MIME type (based on the Detector interface) is: ["
+ + mimeDetector.detect(new File(filename).toURI().toURL()
+ .openStream(), new Metadata()) + "]");
+
+ LanguageIdentifier lang = new LanguageIdentifier(new LanguageProfile(
+ FileUtils.readFileToString(new File(filename))));
+
+ System.out.println("The language of this content is: ["
+ + lang.getLanguage() + "]");
+
+ Parser parser = TikaConfig.getDefaultConfig().getParser(
+ MediaType.parse(mimeRegistry.getMimeType(filename).getName()));
+ Metadata parsedMet = new Metadata();
+ ContentHandler handler = new BodyContentHandler();
+ parser.parse(new File(filename).toURI().toURL().openStream(), handler,
+ parsedMet, new ParseContext());
+
+ System.out.println("Parsed Metadata: ");
+ System.out.println(parsedMet);
+ System.out.println("Parsed Text: ");
+ System.out.println(handler.toString());
+
+ }
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java Mon May 4 21:52:53 2015
@@ -0,0 +1,31 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.security.Key;
+
+public class Pharmacy {
+
+ private static Key key = null;
+
+ public static Key getKey() {
+ return key;
+ }
+
+ public static void setKey(Key key) {
+ Pharmacy.key = key;
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java Mon May 4 21:52:53 2015
@@ -0,0 +1,52 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.util.Collections;
+import java.util.Set;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.xml.ElementMetadataHandler;
+import org.apache.tika.parser.xml.XMLParser;
+import org.apache.tika.sax.TeeContentHandler;
+import org.xml.sax.ContentHandler;
+
+public class PrescriptionParser extends XMLParser {
+
+ private static final long serialVersionUID = 7690682277511967388L;
+
+ @Override
+ protected ContentHandler getContentHandler(ContentHandler handler,
+ Metadata metadata, ParseContext context) {
+ String xpd = "http://example.com/2011/xpd";
+
+ ContentHandler doctor = new ElementMetadataHandler(xpd, "doctor",
+ metadata, "xpd:doctor");
+ ContentHandler patient = new ElementMetadataHandler(xpd, "patient",
+ metadata, "xpd:patient");
+
+ return new TeeContentHandler(super.getContentHandler(handler, metadata,
+ context), doctor, patient);
+ }
+
+ @Override
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return Collections.singleton(MediaType
+ .application("x-prescription+xml"));
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java Mon May 4 21:52:53 2015
@@ -0,0 +1,148 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.IOException;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.Locale;
+import java.util.TimeZone;
+
+import org.apache.jackrabbit.util.ISO8601;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.TermRangeQuery;
+import org.apache.lucene.search.TopScoreDocCollector;
+import org.apache.lucene.store.SimpleFSDirectory;
+import org.apache.tika.metadata.DublinCore;
+import org.apache.tika.metadata.Metadata;
+
+/**
+ *
+ * Builds on top of the LuceneIndexer and the Metadata discussions in Chapter 6
+ * to output an RSS (or RDF) feed of files crawled by the LuceneIndexer within
+ * the last N minutes.
+ */
+@SuppressWarnings("deprecation")
+public class RecentFiles {
+
+ private IndexReader reader;
+
+ private SimpleDateFormat rssDateFormat = new SimpleDateFormat(
+ "E, dd MMM yyyy HH:mm:ss z", Locale.getDefault());
+
+ public String generateRSS(File indexFile) throws CorruptIndexException,
+ IOException {
+ StringBuffer output = new StringBuffer();
+ output.append(getRSSHeaders());
+ IndexSearcher searcher = null;
+ try {
+ reader = IndexReader.open(new SimpleFSDirectory(indexFile));
+ searcher = new IndexSearcher(reader);
+ GregorianCalendar gc = new java.util.GregorianCalendar(TimeZone.getDefault(), Locale.getDefault());
+ gc.setTime(new Date());
+ String nowDateTime = ISO8601.format(gc);
+ gc.add(java.util.GregorianCalendar.MINUTE, -5);
+ String fiveMinsAgo = ISO8601.format(gc);
+ TermRangeQuery query = new TermRangeQuery(Metadata.DATE.toString(),
+ fiveMinsAgo, nowDateTime, true, true);
+ TopScoreDocCollector collector = TopScoreDocCollector.create(20,
+ true);
+ searcher.search(query, collector);
+ ScoreDoc[] hits = collector.topDocs().scoreDocs;
+ for (int i = 0; i < hits.length; i++) {
+ Document doc = searcher.doc(hits[i].doc);
+ output.append(getRSSItem(doc));
+ }
+
+ } finally {
+ if (reader != null) reader.close();
+ if (searcher != null) searcher.close();
+ }
+
+ output.append(getRSSFooters());
+ return output.toString();
+ }
+
+ public String getRSSItem(Document doc) {
+ StringBuffer output = new StringBuffer();
+ output.append("<item>");
+ output.append(emitTag("guid", doc.get(DublinCore.SOURCE.getName()),
+ "isPermalink", "true"));
+ output.append(emitTag("title", doc.get(Metadata.TITLE), null, null));
+ output.append(emitTag("link", doc.get(DublinCore.SOURCE.getName()),
+ null, null));
+ output.append(emitTag("author", doc.get(Metadata.CREATOR), null, null));
+ for (String topic : doc.getValues(Metadata.SUBJECT)) {
+ output.append(emitTag("category", topic, null, null));
+ }
+ output.append(emitTag("pubDate", rssDateFormat.format(ISO8601.parse(doc
+ .get(Metadata.DATE.toString()))), null, null));
+ output.append(emitTag("description", doc.get(Metadata.TITLE), null,
+ null));
+ output.append("</item>");
+ return output.toString();
+ }
+
+ public String getRSSHeaders() {
+ StringBuffer output = new StringBuffer();
+ output.append("<?xml version=\"1.0\" encoding=\"utf-8\">");
+ output.append("<rss version=\"2.0\">");
+ output.append(" <channel>");
+ output.append(" <title>Tika in Action: Recent Files Feed."
+ + "</title>");
+ output.append(" <description>Chapter 6 Examples demonstrating "
+ + "use of Tika Metadata for RSS.</description>");
+ output.append(" <link>tikainaction.rss</link>");
+ output.append(" <lastBuildDate>" + rssDateFormat.format(new Date())
+ + "</lastBuildDate>");
+ output.append(" <generator>Manning Publications: Tika in Action"
+ + "</generator>");
+ output.append(" <copyright>All Rights Reserved</copyright>");
+ return output.toString();
+ }
+
+ public String getRSSFooters() {
+ StringBuffer output = new StringBuffer();
+ output.append(" </channel>");
+ return output.toString();
+ }
+
+ private String emitTag(String tagName, String value, String attributeName,
+ String attributeValue) {
+ StringBuffer output = new StringBuffer();
+ output.append("<");
+ output.append(tagName);
+ if (attributeName != null) {
+ output.append(" ");
+ output.append(attributeName);
+ output.append("=\"");
+ output.append(attributeValue);
+ output.append("\"");
+ }
+ output.append(">");
+ output.append(value);
+ output.append("</");
+ output.append(tagName);
+ output.append(">");
+ return output.toString();
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java Mon May 4 21:52:53 2015
@@ -0,0 +1,141 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.FileFilter;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+import org.apache.commons.io.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.Link;
+import org.apache.tika.sax.LinkContentHandler;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+/**
+ * Demonstrates Tika and its ability to sense symlinks.
+ */
+@SuppressWarnings("deprecation")
+public class RollbackSoftware {
+
+ public static void main(String[] args) throws Exception {
+ RollbackSoftware r = new RollbackSoftware();
+ r.rollback(new File(args[0]));
+ }
+
+ public void rollback(File deployArea) throws IOException, SAXException,
+ TikaException {
+ LinkContentHandler handler = new LinkContentHandler();
+ Metadata met = new Metadata();
+ DeploymentAreaParser parser = new DeploymentAreaParser();
+ parser.parse(IOUtils.toInputStream(deployArea.getAbsolutePath()),
+ handler, met);
+ List<Link> links = handler.getLinks();
+ if (links.size() < 2)
+ throw new IOException("Must have installed at least 2 versions!");
+ Collections.sort(links, new Comparator<Link>() {
+ public int compare(Link o1, Link o2) {
+ return o1.getText().compareTo(o2.getText());
+ }
+ });
+
+ this.updateVersion(links.get(links.size() - 2).getText());
+
+ }
+
+ private void updateVersion(String version) {
+ System.out.println("Rolling back to version: [" + version + "]");
+ }
+
+ class DeploymentAreaParser implements Parser {
+
+ private static final long serialVersionUID = -2356647405087933468L;
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.tika.parser.Parser#getSupportedTypes(
+ * org.apache.tika.parser.ParseContext)
+ */
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return Collections.unmodifiableSet(new HashSet<MediaType>(Arrays
+ .asList(MediaType.TEXT_PLAIN)));
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+ * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
+ */
+ public void parse(InputStream is, ContentHandler handler,
+ Metadata metadata) throws IOException, SAXException,
+ TikaException {
+ parse(is, handler, metadata, new ParseContext());
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+ * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+ * org.apache.tika.parser.ParseContext)
+ */
+
+ public void parse(InputStream is, ContentHandler handler,
+ Metadata metadata, ParseContext context) throws IOException,
+ SAXException, TikaException {
+
+ File deployArea = new File(IOUtils.toString(is));
+ File[] versions = deployArea.listFiles(new FileFilter() {
+
+ public boolean accept(File pathname) {
+ return !pathname.getName().startsWith("current");
+ }
+ });
+
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler,
+ metadata);
+ xhtml.startDocument();
+ for (File v : versions) {
+ if (isSymlink(v))
+ continue;
+ xhtml.startElement("a", "href", v.toURI().toURL().toExternalForm());
+ xhtml.characters(v.getName());
+ xhtml.endElement("a");
+ }
+
+ }
+
+ }
+
+ private boolean isSymlink(File f) throws IOException {
+ return !f.getAbsolutePath().equals(f.getCanonicalPath());
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java Mon May 4 21:52:53 2015
@@ -0,0 +1,34 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+import org.apache.tika.Tika;
+
+public class SimpleTextExtractor {
+
+ public static void main(String[] args) throws Exception {
+ // Create a Tika instance with the default configuration
+ Tika tika = new Tika();
+
+ // Parse all given files and print out the extracted
+ // text content
+ for (String file : args) {
+ String text = tika.parseToString(new File(file));
+ System.out.print(text);
+ }
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java Mon May 4 21:52:53 2015
@@ -0,0 +1,32 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.File;
+
+import org.apache.tika.Tika;
+
+public class SimpleTypeDetector {
+
+ public static void main(String[] args) throws Exception {
+ Tika tika = new Tika();
+
+ for (String file : args) {
+ String type = tika.detect(new File(file));
+ System.out.println(file + ": " + type);
+ }
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java
------------------------------------------------------------------------------
svn:executable = *
Added: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java?rev=1677694&view=auto
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java (added)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java Mon May 4 21:52:53 2015
@@ -0,0 +1,38 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.example;
+
+import java.io.ByteArrayInputStream;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.WriteOutContentHandler;
+import org.springframework.context.ApplicationContext;
+import org.springframework.context.support.ClassPathXmlApplicationContext;
+
+import com.google.common.base.Charsets;
+
+public class SpringExample {
+
+ public static void main(String[] args) throws Exception {
+ ApplicationContext context = new ClassPathXmlApplicationContext(
+ new String[] { "org/apache/tika/example/spring.xml" });
+ Parser parser = context.getBean("tika", Parser.class);
+ parser.parse(new ByteArrayInputStream("Hello, World!".getBytes(Charsets.UTF_8)),
+ new WriteOutContentHandler(System.out), new Metadata(),
+ new ParseContext());
+ }
+
+}
Propchange: tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
------------------------------------------------------------------------------
svn:executable = *