You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by gr...@apache.org on 2015/09/21 19:19:42 UTC
svn commit: r1704368 [1/2] - in /tika/trunk/tika-example/src:
main/java/org/apache/tika/example/ test/java/org/apache/tika/example/
Author: grossws
Date: Mon Sep 21 17:19:26 2015
New Revision: 1704368
URL: http://svn.apache.org/viewvc?rev=1704368&view=rev
Log:
Fix license headers and reformat in tika-example
Apache License v2.0 headers are changed to recommended[1] (as used in o.a.tika.Tika).
Reformat from tabs to 4 spaces.
Added paragraphs in javadoc.
Trivial migrations to java 7 (like diamond operator).
Removed some extra lines to have consistent formatting in examples.
[1]: http://www.apache.org/legal/src-headers.html#headers
Modified:
tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MetadataAwareLuceneIndexer.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/MyFirstTika.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ParsingExample.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/Pharmacy.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/PrescriptionParser.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RecentFiles.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/RollbackSoftware.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTextExtractor.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SimpleTypeDetector.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/SpringExample.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TIAParsingExample.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/TrecDocumentGenerator.java
tika/trunk/tika-example/src/main/java/org/apache/tika/example/ZipListFiles.java
tika/trunk/tika-example/src/test/java/org/apache/tika/example/AdvancedTypeDetectorTest.java
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/AdvancedTypeDetector.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -24,32 +27,30 @@ import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MimeTypesFactory;
public class AdvancedTypeDetector {
-
- public static String detectWithCustomConfig(String name) throws Exception {
- String config = "/org/apache/tika/mime/tika-mimetypes.xml";
- Tika tika = new Tika(MimeTypesFactory.create(config));
- return tika.detect(name);
- }
-
- public static String detectWithCustomDetector(String name) throws Exception {
- String config = "/org/apache/tika/mime/tika-mimetypes.xml";
- Detector detector = MimeTypesFactory.create(config);
-
- Detector custom = new Detector() {
- private static final long serialVersionUID = -5420638839201540749L;
-
- public MediaType detect(InputStream input, Metadata metadata) {
- String type = metadata.get("my-custom-type-override");
- if (type != null) {
- return MediaType.parse(type);
- } else {
- return MediaType.OCTET_STREAM;
- }
- }
- };
-
- Tika tika = new Tika(new CompositeDetector(custom, detector));
- return tika.detect(name);
- }
-
+ public static String detectWithCustomConfig(String name) throws Exception {
+ String config = "/org/apache/tika/mime/tika-mimetypes.xml";
+ Tika tika = new Tika(MimeTypesFactory.create(config));
+ return tika.detect(name);
+ }
+
+ public static String detectWithCustomDetector(String name) throws Exception {
+ String config = "/org/apache/tika/mime/tika-mimetypes.xml";
+ Detector detector = MimeTypesFactory.create(config);
+
+ Detector custom = new Detector() {
+ private static final long serialVersionUID = -5420638839201540749L;
+
+ public MediaType detect(InputStream input, Metadata metadata) {
+ String type = metadata.get("my-custom-type-override");
+ if (type != null) {
+ return MediaType.parse(type);
+ } else {
+ return MediaType.OCTET_STREAM;
+ }
+ }
+ };
+
+ Tika tika = new Tika(new CompositeDetector(custom, detector));
+ return tika.detect(name);
+ }
}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/ContentHandlerExample.java Mon Sep 21 17:19:26 2015
@@ -14,6 +14,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
package org.apache.tika.example;
import java.io.IOException;
@@ -36,7 +37,7 @@ import org.xml.sax.SAXException;
/**
* Examples of using different Content Handlers to
- * get different parts of the file's contents
+ * get different parts of the file's contents
*/
public class ContentHandlerExample {
/**
@@ -67,10 +68,10 @@ public class ContentHandlerExample {
return handler.toString();
}
}
-
+
/**
* Example of extracting just the body as HTML, without the
- * head part, as a string
+ * head part, as a string
*/
public String parseBodyToHTML() throws IOException, SAXException, TikaException {
ContentHandler handler = new BodyContentHandler(
@@ -83,16 +84,15 @@ public class ContentHandlerExample {
return handler.toString();
}
}
-
+
/**
* Example of extracting just one part of the document's body,
- * as HTML as a string, excluding the rest
+ * as HTML as a string, excluding the rest
*/
public String parseOnePartToHTML() throws IOException, SAXException, TikaException {
// Only get things under html -> body -> div (class=header)
XPathParser xhtmlParser = new XPathParser("xhtml", XHTMLContentHandler.XHTML);
- Matcher divContentMatcher = xhtmlParser.parse(
- "/xhtml:html/xhtml:body/xhtml:div/descendant::node()");
+ Matcher divContentMatcher = xhtmlParser.parse("/xhtml:html/xhtml:body/xhtml:div/descendant::node()");
ContentHandler handler = new MatchingContentHandler(
new ToXMLContentHandler(), divContentMatcher);
@@ -103,25 +103,26 @@ public class ContentHandlerExample {
return handler.toString();
}
}
-
+
protected final int MAXIMUM_TEXT_CHUNK_SIZE = 40;
+
/**
* Example of extracting the plain text in chunks, with each chunk
- * of no more than a certain maximum size
+ * of no more than a certain maximum size
*/
public List<String> parseToPlainTextChunks() throws IOException, SAXException, TikaException {
- final List<String> chunks = new ArrayList<String>();
+ final List<String> chunks = new ArrayList<>();
chunks.add("");
ContentHandlerDecorator handler = new ContentHandlerDecorator() {
@Override
public void characters(char[] ch, int start, int length) {
- String lastChunk = chunks.get(chunks.size()-1);
+ String lastChunk = chunks.get(chunks.size() - 1);
String thisStr = new String(ch, start, length);
-
- if (lastChunk.length()+length > MAXIMUM_TEXT_CHUNK_SIZE) {
+
+ if (lastChunk.length() + length > MAXIMUM_TEXT_CHUNK_SIZE) {
chunks.add(thisStr);
} else {
- chunks.set(chunks.size()-1, lastChunk+thisStr);
+ chunks.set(chunks.size() - 1, lastChunk + thisStr);
}
}
};
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/CustomMimeInfo.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -22,28 +25,25 @@ import org.apache.tika.mime.MimeTypes;
import org.apache.tika.mime.MimeTypesFactory;
public class CustomMimeInfo {
-
- public static String customMimeInfo() throws Exception {
- String path = "file:///path/to/prescription-type.xml";
- MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
- Tika tika = new Tika(typeDatabase);
- String type = tika.detect("/path/to/prescription.xpd");
- return type;
- }
-
- public static String customCompositeDetector() throws Exception {
- String path = "file:///path/to/prescription-type.xml";
- MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
- Tika tika = new Tika(new CompositeDetector(typeDatabase,
- new EncryptedPrescriptionDetector()));
- String type = tika.detect("/path/to/tmp/prescription.xpd");
- return type;
- }
-
- public static void main(String[] args) throws Exception {
- System.out.println("customMimeInfo=" + customMimeInfo());
- System.out.println("customCompositeDetector="
- + customCompositeDetector());
- }
-
+ public static String customMimeInfo() throws Exception {
+ String path = "file:///path/to/prescription-type.xml";
+ MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
+ Tika tika = new Tika(typeDatabase);
+ String type = tika.detect("/path/to/prescription.xpd");
+ return type;
+ }
+
+ public static String customCompositeDetector() throws Exception {
+ String path = "file:///path/to/prescription-type.xml";
+ MimeTypes typeDatabase = MimeTypesFactory.create(new URL(path));
+ Tika tika = new Tika(new CompositeDetector(typeDatabase,
+ new EncryptedPrescriptionDetector()));
+ String type = tika.detect("/path/to/tmp/prescription.xpd");
+ return type;
+ }
+
+ public static void main(String[] args) throws Exception {
+ System.out.println("customMimeInfo=" + customMimeInfo());
+ System.out.println("customCompositeDetector=" + customCompositeDetector());
+ }
}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DescribeMetadata.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -17,14 +20,10 @@ package org.apache.tika.example;
import org.apache.tika.cli.TikaCLI;
/**
- *
* Print the supported Tika Metadata models and their fields.
- *
*/
public class DescribeMetadata {
-
- public static void main(String[] args) throws Exception {
- TikaCLI.main(new String[] { "--list-met-models" });
- }
-
+ public static void main(String[] args) throws Exception {
+ TikaCLI.main(new String[]{"--list-met-models"});
+ }
}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DirListParser.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -16,7 +19,7 @@ package org.apache.tika.example;
import java.io.IOException;
import java.io.InputStream;
-import java.util.Arrays;
+import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@@ -40,101 +43,101 @@ import static java.nio.charset.StandardC
*/
public class DirListParser implements Parser {
- private static final long serialVersionUID = 2717930544410610735L;
+ private static final long serialVersionUID = 2717930544410610735L;
- private static Set<MediaType> SUPPORTED_TYPES = new HashSet<MediaType>(
- Arrays.asList(MediaType.TEXT_PLAIN));
+ private static Set<MediaType> SUPPORTED_TYPES = new HashSet<>(
+ Collections.singletonList(MediaType.TEXT_PLAIN));
- /*
- * (non-Javadoc)
- *
- * @see org.apache.tika.parser.Parser#getSupportedTypes(
- * org.apache.tika.parser.ParseContext)
- */
- public Set<MediaType> getSupportedTypes(ParseContext context) {
- return SUPPORTED_TYPES;
- }
-
- /*
- * (non-Javadoc)
- *
- * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
- * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
- */
- public void parse(InputStream is, ContentHandler handler, Metadata metadata)
- throws IOException, SAXException, TikaException {
- this.parse(is, handler, metadata, new ParseContext());
- }
-
- /*
- * (non-Javadoc)
- *
- * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
- * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
- * org.apache.tika.parser.ParseContext)
- */
- public void parse(InputStream is, ContentHandler handler,
- Metadata metadata, ParseContext context) throws IOException,
- SAXException, TikaException {
-
- List<String> lines = FileUtils.readLines(TikaInputStream.get(is).getFile(), UTF_8);
- for (String line : lines) {
- String[] fileToks = line.split("\\s+");
- if (fileToks.length < 8)
- continue;
- String filePermissions = fileToks[0];
- String numHardLinks = fileToks[1];
- String fileOwner = fileToks[2];
- String fileOwnerGroup = fileToks[3];
- String fileSize = fileToks[4];
- StringBuffer lastModDate = new StringBuffer();
- lastModDate.append(fileToks[5]);
- lastModDate.append(" ");
- lastModDate.append(fileToks[6]);
- lastModDate.append(" ");
- lastModDate.append(fileToks[7]);
- StringBuffer fileName = new StringBuffer();
- for (int i = 8; i < fileToks.length; i++) {
- fileName.append(fileToks[i]);
- fileName.append(" ");
- }
- fileName.deleteCharAt(fileName.length() - 1);
- this.addMetadata(metadata, filePermissions, numHardLinks,
- fileOwner, fileOwnerGroup, fileSize,
- lastModDate.toString(), fileName.toString());
- }
- }
-
- public static void main(String[] args) throws IOException, SAXException,
- TikaException {
- DirListParser parser = new DirListParser();
- Metadata met = new Metadata();
- parser.parse(System.in, new BodyContentHandler(), met);
-
- System.out.println("Num files: " + met.getValues("Filename").length);
- System.out.println("Num executables: " + met.get("NumExecutables"));
- }
-
- private void addMetadata(Metadata metadata, String filePerms,
- String numHardLinks, String fileOwner, String fileOwnerGroup,
- String fileSize, String lastModDate, String fileName) {
- metadata.add("FilePermissions", filePerms);
- metadata.add("NumHardLinks", numHardLinks);
- metadata.add("FileOwner", fileOwner);
- metadata.add("FileOwnerGroup", fileOwnerGroup);
- metadata.add("FileSize", fileSize);
- metadata.add("LastModifiedDate", lastModDate);
- metadata.add("Filename", fileName);
-
- if (filePerms.indexOf("x") != -1 && filePerms.indexOf("d") == -1) {
- if (metadata.get("NumExecutables") != null) {
- int numExecs = Integer.valueOf(metadata.get("NumExecutables"));
- numExecs++;
- metadata.set("NumExecutables", String.valueOf(numExecs));
- } else {
- metadata.set("NumExecutables", "1");
- }
- }
- }
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.tika.parser.Parser#getSupportedTypes(
+ * org.apache.tika.parser.ParseContext)
+ */
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+ * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata)
+ */
+ public void parse(InputStream is, ContentHandler handler, Metadata metadata)
+ throws IOException, SAXException, TikaException {
+ this.parse(is, handler, metadata, new ParseContext());
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see org.apache.tika.parser.Parser#parse(java.io.InputStream,
+ * org.xml.sax.ContentHandler, org.apache.tika.metadata.Metadata,
+ * org.apache.tika.parser.ParseContext)
+ */
+ public void parse(InputStream is, ContentHandler handler,
+ Metadata metadata, ParseContext context) throws IOException,
+ SAXException, TikaException {
+
+ List<String> lines = FileUtils.readLines(TikaInputStream.get(is).getFile(), UTF_8);
+ for (String line : lines) {
+ String[] fileToks = line.split("\\s+");
+ if (fileToks.length < 8)
+ continue;
+ String filePermissions = fileToks[0];
+ String numHardLinks = fileToks[1];
+ String fileOwner = fileToks[2];
+ String fileOwnerGroup = fileToks[3];
+ String fileSize = fileToks[4];
+ StringBuilder lastModDate = new StringBuilder();
+ lastModDate.append(fileToks[5]);
+ lastModDate.append(" ");
+ lastModDate.append(fileToks[6]);
+ lastModDate.append(" ");
+ lastModDate.append(fileToks[7]);
+ StringBuilder fileName = new StringBuilder();
+ for (int i = 8; i < fileToks.length; i++) {
+ fileName.append(fileToks[i]);
+ fileName.append(" ");
+ }
+ fileName.deleteCharAt(fileName.length() - 1);
+ this.addMetadata(metadata, filePermissions, numHardLinks,
+ fileOwner, fileOwnerGroup, fileSize,
+ lastModDate.toString(), fileName.toString());
+ }
+ }
+
+ public static void main(String[] args) throws IOException, SAXException,
+ TikaException {
+ DirListParser parser = new DirListParser();
+ Metadata met = new Metadata();
+ parser.parse(System.in, new BodyContentHandler(), met);
+
+ System.out.println("Num files: " + met.getValues("Filename").length);
+ System.out.println("Num executables: " + met.get("NumExecutables"));
+ }
+
+ private void addMetadata(Metadata metadata, String filePerms,
+ String numHardLinks, String fileOwner, String fileOwnerGroup,
+ String fileSize, String lastModDate, String fileName) {
+ metadata.add("FilePermissions", filePerms);
+ metadata.add("NumHardLinks", numHardLinks);
+ metadata.add("FileOwner", fileOwner);
+ metadata.add("FileOwnerGroup", fileOwnerGroup);
+ metadata.add("FileSize", fileSize);
+ metadata.add("LastModifiedDate", lastModDate);
+ metadata.add("Filename", fileName);
+
+ if (filePerms.indexOf("x") != -1 && filePerms.indexOf("d") == -1) {
+ if (metadata.get("NumExecutables") != null) {
+ int numExecs = Integer.valueOf(metadata.get("NumExecutables"));
+ numExecs++;
+ metadata.set("NumExecutables", String.valueOf(numExecs));
+ } else {
+ metadata.set("NumExecutables", "1");
+ }
+ }
+ }
}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DisplayMetInstance.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -28,19 +31,16 @@ import org.xml.sax.SAXException;
* Grabs a PDF file from a URL and prints its {@link Metadata}
*/
public class DisplayMetInstance {
-
- public static Metadata getMet(URL url) throws IOException, SAXException,
- TikaException {
- Metadata met = new Metadata();
- PDFParser parser = new PDFParser();
- parser.parse(url.openStream(), new BodyContentHandler(), met,
- new ParseContext());
- return met;
- }
-
- public static void main(String[] args) throws Exception {
- Metadata met = DisplayMetInstance.getMet(new URL(args[0]));
- System.out.println(met);
- }
-
+ public static Metadata getMet(URL url) throws IOException, SAXException,
+ TikaException {
+ Metadata met = new Metadata();
+ PDFParser parser = new PDFParser();
+ parser.parse(url.openStream(), new BodyContentHandler(), met, new ParseContext());
+ return met;
+ }
+
+ public static void main(String[] args) throws Exception {
+ Metadata met = DisplayMetInstance.getMet(new URL(args[0]));
+ System.out.println(met);
+ }
}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/DumpTikaConfigExample.java Mon Sep 21 17:19:26 2015
@@ -1,4 +1,3 @@
-package org.apache.tika.example;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -16,13 +15,7 @@ package org.apache.tika.example;
* limitations under the License.
*/
-import javax.xml.parsers.DocumentBuilder;
-import javax.xml.parsers.DocumentBuilderFactory;
-import javax.xml.transform.OutputKeys;
-import javax.xml.transform.Transformer;
-import javax.xml.transform.TransformerFactory;
-import javax.xml.transform.dom.DOMSource;
-import javax.xml.transform.stream.StreamResult;
+package org.apache.tika.example;
import java.io.File;
import java.io.FileOutputStream;
@@ -36,6 +29,13 @@ import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
+import javax.xml.parsers.DocumentBuilder;
+import javax.xml.parsers.DocumentBuilderFactory;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.DefaultDetector;
@@ -65,9 +65,7 @@ import static java.nio.charset.StandardC
* for your custom mime types.
*/
public class DumpTikaConfigExample {
-
/**
- *
* @param config config file to dump
* @param writer writer to which to write
* @throws Exception
@@ -104,8 +102,7 @@ public class DumpTikaConfigExample {
Translator translator = config.getTranslator();
if (translator instanceof DefaultTranslator) {
Node mimeComment = doc.createComment(
- "for example: "+
- "<translator class=\"org.apache.tika.language.translate.GoogleTranslator\"/>");
+ "for example: <translator class=\"org.apache.tika.language.translate.GoogleTranslator\"/>");
rootElement.appendChild(mimeComment);
} else {
Element translatorElement = doc.createElement("translator");
@@ -125,7 +122,7 @@ public class DumpTikaConfigExample {
Element detectorsElement = doc.createElement("detectors");
if (detector instanceof DefaultDetector) {
- List<Detector> children = ((DefaultDetector)detector).getDetectors();
+ List<Detector> children = ((DefaultDetector) detector).getDetectors();
for (Detector d : children) {
Element detectorElement = doc.createElement("detector");
detectorElement.setAttribute("class", d.getClass().getCanonicalName());
@@ -147,9 +144,9 @@ public class DumpTikaConfigExample {
Parser child = e.getValue();
String className = e.getKey();
parserElement.setAttribute("class", className);
- Set<MediaType> types = new TreeSet<MediaType>();
+ Set<MediaType> types = new TreeSet<>();
types.addAll(child.getSupportedTypes(context));
- for (MediaType type : types){
+ for (MediaType type : types) {
Element mimeElement = doc.createElement("mime");
mimeElement.appendChild(doc.createTextNode(type.toString()));
parserElement.appendChild(mimeElement);
@@ -160,10 +157,10 @@ public class DumpTikaConfigExample {
}
- private Map<String, Parser> getConcreteParsers(Parser parentParser)throws TikaException, IOException {
- Map<String, Parser> parsers = new TreeMap<String, Parser>();
+ private Map<String, Parser> getConcreteParsers(Parser parentParser) throws TikaException, IOException {
+ Map<String, Parser> parsers = new TreeMap<>();
if (parentParser instanceof CompositeParser) {
- addParsers((CompositeParser)parentParser, parsers);
+ addParsers((CompositeParser) parentParser, parsers);
} else {
addParser(parentParser, parsers);
}
@@ -173,7 +170,7 @@ public class DumpTikaConfigExample {
private void addParsers(CompositeParser p, Map<String, Parser> parsers) {
for (Parser child : p.getParsers().values()) {
if (child instanceof CompositeParser) {
- addParsers((CompositeParser)child, parsers);
+ addParsers((CompositeParser) child, parsers);
} else {
addParser(child, parsers);
}
@@ -185,12 +182,10 @@ public class DumpTikaConfigExample {
}
/**
- *
* @param args outputFile, outputEncoding, if args is empty, this prints to console
* @throws Exception
*/
public static void main(String[] args) throws Exception {
-
Charset encoding = UTF_8;
Writer writer = null;
if (args.length > 0) {
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionDetector.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -18,7 +21,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.security.GeneralSecurityException;
import java.security.Key;
-
import javax.crypto.Cipher;
import javax.crypto.CipherInputStream;
import javax.xml.namespace.QName;
@@ -30,30 +32,28 @@ import org.apache.tika.metadata.Metadata
import org.apache.tika.mime.MediaType;
public class EncryptedPrescriptionDetector implements Detector {
+ private static final long serialVersionUID = -1709652690773421147L;
- private static final long serialVersionUID = -1709652690773421147L;
-
- public MediaType detect(InputStream stream, Metadata metadata)
- throws IOException {
- Key key = Pharmacy.getKey();
- MediaType type = MediaType.OCTET_STREAM;
-
- try (InputStream lookahead = new LookaheadInputStream(stream, 1024)) {
- Cipher cipher = Cipher.getInstance("RSA");
-
- cipher.init(Cipher.DECRYPT_MODE, key);
- InputStream decrypted = new CipherInputStream(lookahead, cipher);
-
- QName name = new XmlRootExtractor().extractRootElement(decrypted);
- if (name != null
- && "http://example.com/xpd".equals(name.getNamespaceURI())
- && "prescription".equals(name.getLocalPart())) {
- type = MediaType.application("x-prescription");
- }
- } catch (GeneralSecurityException e) {
- // unable to decrypt, fall through
- }
- return type;
- }
-
+ public MediaType detect(InputStream stream, Metadata metadata)
+ throws IOException {
+ Key key = Pharmacy.getKey();
+ MediaType type = MediaType.OCTET_STREAM;
+
+ try (InputStream lookahead = new LookaheadInputStream(stream, 1024)) {
+ Cipher cipher = Cipher.getInstance("RSA");
+
+ cipher.init(Cipher.DECRYPT_MODE, key);
+ InputStream decrypted = new CipherInputStream(lookahead, cipher);
+
+ QName name = new XmlRootExtractor().extractRootElement(decrypted);
+ if (name != null
+ && "http://example.com/xpd".equals(name.getNamespaceURI())
+ && "prescription".equals(name.getLocalPart())) {
+ type = MediaType.application("x-prescription");
+ }
+ } catch (GeneralSecurityException e) {
+ // unable to decrypt, fall through
+ }
+ return type;
+ }
}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/EncryptedPrescriptionParser.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -20,7 +23,6 @@ import java.security.GeneralSecurityExce
import java.security.Key;
import java.util.Collections;
import java.util.Set;
-
import javax.crypto.Cipher;
import javax.crypto.CipherInputStream;
@@ -33,28 +35,26 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
public class EncryptedPrescriptionParser extends AbstractParser {
+ private static final long serialVersionUID = -7816987249611278541L;
- private static final long serialVersionUID = -7816987249611278541L;
-
- public void parse(InputStream stream, ContentHandler handler,
- Metadata metadata, ParseContext context) throws IOException,
- SAXException, TikaException {
- try {
- Key key = Pharmacy.getKey();
- Cipher cipher = Cipher.getInstance("RSA");
- cipher.init(Cipher.DECRYPT_MODE, key);
- InputStream decrypted = new CipherInputStream(stream, cipher);
-
- new PrescriptionParser().parse(decrypted, handler, metadata,
- context);
- } catch (GeneralSecurityException e) {
- throw new TikaException("Unable to decrypt a digital prescription",
- e);
- }
- }
-
- public Set<MediaType> getSupportedTypes(ParseContext context) {
- return Collections.singleton(MediaType.application("x-prescription"));
- }
-
+ public void parse(InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context) throws IOException,
+ SAXException, TikaException {
+ try {
+ Key key = Pharmacy.getKey();
+ Cipher cipher = Cipher.getInstance("RSA");
+ cipher.init(Cipher.DECRYPT_MODE, key);
+ InputStream decrypted = new CipherInputStream(stream, cipher);
+
+ new PrescriptionParser().parse(decrypted, handler, metadata,
+ context);
+ } catch (GeneralSecurityException e) {
+ throw new TikaException("Unable to decrypt a digital prescription",
+ e);
+ }
+ }
+
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return Collections.singleton(MediaType.application("x-prescription"));
+ }
}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/ExtractEmbeddedFiles.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,7 +17,6 @@
package org.apache.tika.example;
-
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
@@ -37,9 +39,8 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
public class ExtractEmbeddedFiles {
-
private Parser parser = new AutoDetectParser();
- private Detector detector = ((AutoDetectParser)parser).getDetector();
+ private Detector detector = ((AutoDetectParser) parser).getDetector();
private TikaConfig config = TikaConfig.getDefaultConfig();
public void extract(InputStream is, Path outputDir) throws SAXException, TikaException, IOException {
@@ -87,7 +88,7 @@ public class ExtractEmbeddedFiles {
//now try to figure out the right extension for the embedded file
MediaType contentType = detector.detect(stream, metadata);
- if (name.indexOf('.')==-1 && contentType!=null) {
+ if (name.indexOf('.') == -1 && contentType != null) {
try {
name += config.getMimeRepository().forName(
contentType.toString()).getExtension();
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/GrabPhoneNumbersExample.java Mon Sep 21 17:19:26 2015
@@ -1,10 +1,12 @@
-package org.apache.tika.example;
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -12,6 +14,14 @@ package org.apache.tika.example;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+
+package org.apache.tika.example;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.HashSet;
+
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
@@ -19,15 +29,10 @@ import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.PhoneExtractingContentHandler;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStream;
-import java.util.HashSet;
-
/**
* Class to demonstrate how to use the {@link org.apache.tika.sax.PhoneExtractingContentHandler}
* to get a list of all of the phone numbers from every file in a directory.
- *
+ * <p>
* You can run this main method by running
* <code>
* mvn exec:java -Dexec.mainClass="org.apache.tika.example.GrabPhoneNumbersExample" -Dexec.args="/path/to/directory"
@@ -38,7 +43,7 @@ public class GrabPhoneNumbersExample {
private static HashSet<String> phoneNumbers = new HashSet<String>();
private static int failedFiles, successfulFiles = 0;
- public static void main(String[] args){
+ public static void main(String[] args) {
if (args.length != 1) {
System.err.println("Usage `java GrabPhoneNumbers [corpus]");
return;
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/ImportContextImpl.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -20,7 +23,6 @@ import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;
-
import javax.jcr.Item;
import org.apache.jackrabbit.server.io.DefaultIOListener;
@@ -38,205 +40,196 @@ import org.slf4j.LoggerFactory;
* <code>ImportContextImpl</code>...
*/
public class ImportContextImpl implements ImportContext {
+ private static Logger log = LoggerFactory.getLogger(ImportContextImpl.class);
- private static Logger log = LoggerFactory
- .getLogger(ImportContextImpl.class);
-
- private final IOListener ioListener;
- private final Item importRoot;
- private final String systemId;
- private final File inputFile;
-
- private InputContext inputCtx;
- private boolean completed;
-
- private final Detector detector;
-
- private final MediaType type;
-
- /**
- * Creates a new item import context. The specified InputStream is written
- * to a temporary file in order to avoid problems with multiple IOHandlers
- * that try to run the import but fail. The temporary file is deleted as
- * soon as this context is informed that the import has been completed and
- * it will not be used any more.
- *
- * @param importRoot
- * @param systemId
- * @param ctx
- * input context, or <code>null</code>
- * @param stream
- * document input stream, or <code>null</code>
- * @param ioListener
- * @param detector
- * content type detector
- * @throws IOException
- * @see ImportContext#informCompleted(boolean)
- */
- public ImportContextImpl(Item importRoot, String systemId,
- InputContext ctx, InputStream stream, IOListener ioListener,
- Detector detector) throws IOException {
- this.importRoot = importRoot;
- this.systemId = systemId;
- this.inputCtx = ctx;
- this.ioListener = (ioListener != null) ? ioListener
- : new DefaultIOListener(log);
- this.detector = detector;
-
- Metadata metadata = new Metadata();
- if (ctx != null && ctx.getContentType() != null) {
- metadata.set(Metadata.CONTENT_TYPE, ctx.getContentType());
- }
- if (systemId != null) {
- metadata.set(Metadata.RESOURCE_NAME_KEY, systemId);
- }
- if (stream != null && !stream.markSupported()) {
- stream = new BufferedInputStream(stream);
- }
- type = detector.detect(stream, metadata);
-
- this.inputFile = IOUtil.getTempFile(stream);
- }
-
- /**
- * @see ImportContext#getIOListener()
- */
- public IOListener getIOListener() {
- return ioListener;
- }
-
- /**
- * @see ImportContext#getImportRoot()
- */
- public Item getImportRoot() {
- return importRoot;
- }
-
- /**
- * @see ImportContext#getDetector()
- */
- public Detector getDetector() {
- return detector;
- }
-
- /**
- * @see ImportContext#hasStream()
- */
- public boolean hasStream() {
- return inputFile != null;
- }
-
- /**
- * Returns a new <code>InputStream</code> to the temporary file created
- * during instanciation or <code>null</code>, if this context does not
- * provide a stream.
- *
- * @see ImportContext#getInputStream()
- * @see #hasStream()
- */
- public InputStream getInputStream() {
- checkCompleted();
- InputStream in = null;
- if (inputFile != null) {
- try {
- in = new FileInputStream(inputFile);
- } catch (IOException e) {
- // unexpected error... ignore and return null
- }
- }
- return in;
- }
-
- /**
- * @see ImportContext#getSystemId()
- */
- public String getSystemId() {
- return systemId;
- }
-
- /**
- * @see ImportContext#getModificationTime()
- */
- public long getModificationTime() {
- return (inputCtx != null) ? inputCtx.getModificationTime() : new Date()
- .getTime();
- }
-
- /**
- * @see ImportContext#getContentLanguage()
- */
- public String getContentLanguage() {
- return (inputCtx != null) ? inputCtx.getContentLanguage() : null;
- }
-
- /**
- * @see ImportContext#getContentLength()
- */
- public long getContentLength() {
- long length = IOUtil.UNDEFINED_LENGTH;
- if (inputCtx != null) {
- length = inputCtx.getContentLength();
- }
- if (length < 0 && inputFile != null) {
- length = inputFile.length();
- }
- if (length < 0) {
- log.debug("Unable to determine content length -> default value = "
- + IOUtil.UNDEFINED_LENGTH);
- }
- return length;
- }
-
- /**
- * @see ImportContext#getMimeType()
- */
- public String getMimeType() {
- return IOUtil.getMimeType(type.toString());
- }
-
- /**
- * @see ImportContext#getEncoding()
- */
- public String getEncoding() {
- return IOUtil.getEncoding(type.toString());
- }
-
- /**
- * @see ImportContext#getProperty(Object)
- */
- public Object getProperty(Object propertyName) {
- return (inputCtx != null) ? inputCtx.getProperty(propertyName
- .toString()) : null;
- }
-
- /**
- * @see ImportContext#informCompleted(boolean)
- */
- public void informCompleted(boolean success) {
- checkCompleted();
- completed = true;
- if (inputFile != null) {
- inputFile.delete();
- }
- }
-
- /**
- * @see ImportContext#isCompleted()
- */
- public boolean isCompleted() {
- return completed;
- }
-
- /**
- * @throws IllegalStateException
- * if the context is already completed.
- * @see #isCompleted()
- * @see #informCompleted(boolean)
- */
- private void checkCompleted() {
- if (completed) {
- throw new IllegalStateException(
- "ImportContext has already been consumed.");
- }
- }
+ private final IOListener ioListener;
+ private final Item importRoot;
+ private final String systemId;
+ private final File inputFile;
+
+ private InputContext inputCtx;
+ private boolean completed;
+
+ private final Detector detector;
+
+ private final MediaType type;
+
+ /**
+ * Creates a new item import context. The specified InputStream is written
+ * to a temporary file in order to avoid problems with multiple IOHandlers
+ * that try to run the import but fail. The temporary file is deleted as
+ * soon as this context is informed that the import has been completed and
+ * it will not be used any more.
+ *
+ * @param importRoot
+ * @param systemId
+ * @param ctx input context, or <code>null</code>
+ * @param stream document input stream, or <code>null</code>
+ * @param ioListener
+ * @param detector content type detector
+ * @throws IOException
+ * @see ImportContext#informCompleted(boolean)
+ */
+ public ImportContextImpl(Item importRoot, String systemId,
+ InputContext ctx, InputStream stream, IOListener ioListener,
+ Detector detector) throws IOException {
+ this.importRoot = importRoot;
+ this.systemId = systemId;
+ this.inputCtx = ctx;
+ this.ioListener = (ioListener != null) ? ioListener
+ : new DefaultIOListener(log);
+ this.detector = detector;
+
+ Metadata metadata = new Metadata();
+ if (ctx != null && ctx.getContentType() != null) {
+ metadata.set(Metadata.CONTENT_TYPE, ctx.getContentType());
+ }
+ if (systemId != null) {
+ metadata.set(Metadata.RESOURCE_NAME_KEY, systemId);
+ }
+ if (stream != null && !stream.markSupported()) {
+ stream = new BufferedInputStream(stream);
+ }
+ type = detector.detect(stream, metadata);
+
+ this.inputFile = IOUtil.getTempFile(stream);
+ }
+
+ /**
+ * @see ImportContext#getIOListener()
+ */
+ public IOListener getIOListener() {
+ return ioListener;
+ }
+
+ /**
+ * @see ImportContext#getImportRoot()
+ */
+ public Item getImportRoot() {
+ return importRoot;
+ }
+
+ /**
+ * @see ImportContext#getDetector()
+ */
+ public Detector getDetector() {
+ return detector;
+ }
+
+ /**
+ * @see ImportContext#hasStream()
+ */
+ public boolean hasStream() {
+ return inputFile != null;
+ }
+
+ /**
+ * Returns a new <code>InputStream</code> to the temporary file created
+ * during instanciation or <code>null</code>, if this context does not
+ * provide a stream.
+ *
+ * @see ImportContext#getInputStream()
+ * @see #hasStream()
+ */
+ public InputStream getInputStream() {
+ checkCompleted();
+ InputStream in = null;
+ if (inputFile != null) {
+ try {
+ in = new FileInputStream(inputFile);
+ } catch (IOException e) {
+ // unexpected error... ignore and return null
+ }
+ }
+ return in;
+ }
+
+ /**
+ * @see ImportContext#getSystemId()
+ */
+ public String getSystemId() {
+ return systemId;
+ }
+
+ /**
+ * @see ImportContext#getModificationTime()
+ */
+ public long getModificationTime() {
+ return (inputCtx != null) ? inputCtx.getModificationTime() : new Date().getTime();
+ }
+
+ /**
+ * @see ImportContext#getContentLanguage()
+ */
+ public String getContentLanguage() {
+ return (inputCtx != null) ? inputCtx.getContentLanguage() : null;
+ }
+
+ /**
+ * @see ImportContext#getContentLength()
+ */
+ public long getContentLength() {
+ long length = IOUtil.UNDEFINED_LENGTH;
+ if (inputCtx != null) {
+ length = inputCtx.getContentLength();
+ }
+ if (length < 0 && inputFile != null) {
+ length = inputFile.length();
+ }
+ if (length < 0) {
+ log.debug("Unable to determine content length -> default value = "
+ + IOUtil.UNDEFINED_LENGTH);
+ }
+ return length;
+ }
+
+ /**
+ * @see ImportContext#getMimeType()
+ */
+ public String getMimeType() {
+ return IOUtil.getMimeType(type.toString());
+ }
+
+ /**
+ * @see ImportContext#getEncoding()
+ */
+ public String getEncoding() {
+ return IOUtil.getEncoding(type.toString());
+ }
+
+ /**
+ * @see ImportContext#getProperty(Object)
+ */
+ public Object getProperty(Object propertyName) {
+ return (inputCtx != null) ? inputCtx.getProperty(propertyName.toString()) : null;
+ }
+
+ /**
+ * @see ImportContext#informCompleted(boolean)
+ */
+ public void informCompleted(boolean success) {
+ checkCompleted();
+ completed = true;
+ if (inputFile != null) {
+ inputFile.delete();
+ }
+ }
+
+ /**
+ * @see ImportContext#isCompleted()
+ */
+ public boolean isCompleted() {
+ return completed;
+ }
+
+ /**
+ * @throws IllegalStateException if the context is already completed.
+ * @see #isCompleted()
+ * @see #informCompleted(boolean)
+ */
+ private void checkCompleted() {
+ if (completed) {
+ throw new IllegalStateException("ImportContext has already been consumed.");
+ }
+ }
}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/Language.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -25,35 +28,31 @@ import org.apache.tika.parser.AutoDetect
import org.apache.tika.parser.ParseContext;
public class Language {
-
- public static void languageDetection() throws IOException {
- LanguageProfile profile = new LanguageProfile(
- "Alla människor är födda fria och"
- + " lika i värde och rättigheter.");
-
- LanguageIdentifier identifier = new LanguageIdentifier(profile);
- System.out.println(identifier.getLanguage());
- }
-
- public static void languageDetectionWithWriter() throws IOException {
- ProfilingWriter writer = new ProfilingWriter();
- writer.append("Minden emberi lény");
- writer.append(" szabadon születik és");
- writer.append(" egyenlŠméltósága és");
- writer.append(" joga van.");
-
- LanguageIdentifier identifier = writer.getLanguage();
- System.out.println(identifier.getLanguage());
- writer.close();
-
- }
-
- public static void languageDetectionWithHandler() throws Exception {
- ProfilingHandler handler = new ProfilingHandler();
- new AutoDetectParser().parse(System.in, handler, new Metadata(),
- new ParseContext());
-
- LanguageIdentifier identifier = handler.getLanguage();
- System.out.println(identifier.getLanguage());
- }
+ public static void languageDetection() throws IOException {
+ LanguageProfile profile = new LanguageProfile(
+ "Alla människor är födda fria och lika i värde och rättigheter.");
+
+ LanguageIdentifier identifier = new LanguageIdentifier(profile);
+ System.out.println(identifier.getLanguage());
+ }
+
+ public static void languageDetectionWithWriter() throws IOException {
+ ProfilingWriter writer = new ProfilingWriter();
+ writer.append("Minden emberi lény");
+ writer.append(" szabadon születik és");
+ writer.append(" egyenlŠméltósága és");
+ writer.append(" joga van.");
+
+ LanguageIdentifier identifier = writer.getLanguage();
+ System.out.println(identifier.getLanguage());
+ writer.close();
+ }
+
+ public static void languageDetectionWithHandler() throws Exception {
+ ProfilingHandler handler = new ProfilingHandler();
+ new AutoDetectParser().parse(System.in, handler, new Metadata(), new ParseContext());
+
+ LanguageIdentifier identifier = handler.getLanguage();
+ System.out.println(identifier.getLanguage());
+ }
}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LanguageDetectingParser.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -29,21 +32,19 @@ import org.xml.sax.SAXException;
@SuppressWarnings("deprecation")
public class LanguageDetectingParser extends DelegatingParser {
+ private static final long serialVersionUID = 4291320409396502774L;
- private static final long serialVersionUID = 4291320409396502774L;
-
- public void parse(InputStream stream, ContentHandler handler,
- final Metadata metadata, ParseContext context) throws SAXException,
- IOException, TikaException {
- ProfilingHandler profiler = new ProfilingHandler();
- ContentHandler tee = new TeeContentHandler(handler, profiler);
-
- super.parse(stream, tee, metadata, context);
-
- LanguageIdentifier identifier = profiler.getLanguage();
- if (identifier.isReasonablyCertain()) {
- metadata.set(Metadata.LANGUAGE, identifier.getLanguage());
- }
- }
-
+ public void parse(InputStream stream, ContentHandler handler,
+ final Metadata metadata, ParseContext context) throws SAXException,
+ IOException, TikaException {
+ ProfilingHandler profiler = new ProfilingHandler();
+ ContentHandler tee = new TeeContentHandler(handler, profiler);
+
+ super.parse(stream, tee, metadata, context);
+
+ LanguageIdentifier identifier = profiler.getLanguage();
+ if (identifier.isReasonablyCertain()) {
+ metadata.set(Metadata.LANGUAGE, identifier.getLanguage());
+ }
+ }
}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LazyTextExtractorField.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -44,171 +47,164 @@ import org.xml.sax.helpers.DefaultHandle
*/
@SuppressWarnings("serial")
public class LazyTextExtractorField extends AbstractField {
-
- /**
- * The logger instance for this class.
- */
- private static final Logger log = LoggerFactory
- .getLogger(LazyTextExtractorField.class);
-
- /**
- * The exception used to forcibly terminate the extraction process when the
- * maximum field length is reached.
- */
- private static final SAXException STOP = new SAXException(
- "max field length reached");
-
- /**
- * The extracted text content of the given binary value. Set to non-null
- * when the text extraction task finishes.
- */
- private volatile String extract = null;
-
- /**
- * Creates a new <code>LazyTextExtractorField</code> with the given
- * <code>name</code>.
- *
- * @param name
- * the name of the field.
- * @param reader
- * the reader where to obtain the string from.
- * @param highlighting
- * set to <code>true</code> to enable result highlighting support
- */
- public LazyTextExtractorField(Parser parser, InternalValue value,
- Metadata metadata, Executor executor, boolean highlighting,
- int maxFieldLength) {
- super(FieldNames.FULLTEXT, highlighting ? Store.YES : Store.NO,
- Field.Index.ANALYZED, highlighting ? TermVector.WITH_OFFSETS
- : TermVector.NO);
- executor.execute(new ParsingTask(parser, value, metadata,
- maxFieldLength));
- }
-
- /**
- * Returns the extracted text. This method blocks until the text extraction
- * task has been completed.
- *
- * @return the string value of this field
- */
- public synchronized String stringValue() {
- try {
- while (!isExtractorFinished()) {
- wait();
- }
- return extract;
- } catch (InterruptedException e) {
- log.error("Text extraction thread was interrupted", e);
- return "";
- }
- }
-
- /**
- * @return always <code>null</code>
- */
- public Reader readerValue() {
- return null;
- }
-
- /**
- * @return always <code>null</code>
- */
- public byte[] binaryValue() {
- return null;
- }
-
- /**
- * @return always <code>null</code>
- */
- public TokenStream tokenStreamValue() {
- return null;
- }
-
- /**
- * Checks whether the text extraction task has finished.
- *
- * @return <code>true</code> if the extracted text is available
- */
- public boolean isExtractorFinished() {
- return extract != null;
- }
-
- private synchronized void setExtractedText(String value) {
- extract = value;
- notify();
- }
-
- /**
- * Releases all resources associated with this field.
- */
- public void dispose() {
- // TODO: Cause the ContentHandler below to throw an exception
- }
-
- /**
- * The background task for extracting text from a binary value.
- */
- private class ParsingTask extends DefaultHandler implements Runnable {
-
- private final Parser parser;
-
- private final InternalValue value;
-
- private final Metadata metadata;
-
- private final int maxFieldLength;
-
- private final StringBuilder builder = new StringBuilder();
-
- private final ParseContext context = new ParseContext();
-
- // NOTE: not a part of Jackrabbit code, made
- private final ContentHandler handler = new DefaultHandler();
-
- public ParsingTask(Parser parser, InternalValue value,
- Metadata metadata, int maxFieldLength) {
- this.parser = parser;
- this.value = value;
- this.metadata = metadata;
- this.maxFieldLength = maxFieldLength;
- }
-
- public void run() {
- try {
- try (InputStream stream = value.getStream()) {
- parser.parse(stream, handler, metadata, context);
- }
- } catch (LinkageError e) {
- // Capture and ignore
- } catch (Throwable t) {
- if (t != STOP) {
- log.debug("Failed to extract text.", t);
- setExtractedText("TextExtractionError");
- return;
- }
- } finally {
- value.discard();
- }
- setExtractedText(handler.toString());
-
- }
-
- @Override
- public void characters(char[] ch, int start, int length)
- throws SAXException {
- builder.append(ch, start,
- Math.min(length, maxFieldLength - builder.length()));
- if (builder.length() >= maxFieldLength) {
- throw STOP;
- }
- }
-
- @Override
- public void ignorableWhitespace(char[] ch, int start, int length)
- throws SAXException {
- characters(ch, start, length);
- }
-
- }
-
+ /**
+ * The logger instance for this class.
+ */
+ private static final Logger log = LoggerFactory.getLogger(LazyTextExtractorField.class);
+
+ /**
+ * The exception used to forcibly terminate the extraction process when the
+ * maximum field length is reached.
+ * <p>
+ * Such exceptions shouldn't be used in logging since its stack trace is meaningless.
+ */
+ private static final SAXException STOP = new SAXException("max field length reached");
+
+ /**
+ * The extracted text content of the given binary value. Set to non-null
+ * when the text extraction task finishes.
+ */
+ private volatile String extract = null;
+
+ /**
+ * Creates a new <code>LazyTextExtractorField</code> with the given
+ * <code>name</code>.
+ *
+ * @param name the name of the field.
+ * @param reader the reader where to obtain the string from.
+ * @param highlighting set to <code>true</code> to enable result highlighting support
+ */
+ public LazyTextExtractorField(Parser parser, InternalValue value,
+ Metadata metadata, Executor executor, boolean highlighting,
+ int maxFieldLength) {
+ super(FieldNames.FULLTEXT, highlighting ? Store.YES : Store.NO,
+ Field.Index.ANALYZED, highlighting ? TermVector.WITH_OFFSETS
+ : TermVector.NO);
+ executor.execute(new ParsingTask(parser, value, metadata,
+ maxFieldLength));
+ }
+
+ /**
+ * Returns the extracted text. This method blocks until the text extraction
+ * task has been completed.
+ *
+ * @return the string value of this field
+ */
+ public synchronized String stringValue() {
+ try {
+ while (!isExtractorFinished()) {
+ wait();
+ }
+ return extract;
+ } catch (InterruptedException e) {
+ log.error("Text extraction thread was interrupted", e);
+ return "";
+ }
+ }
+
+ /**
+ * @return always <code>null</code>
+ */
+ public Reader readerValue() {
+ return null;
+ }
+
+ /**
+ * @return always <code>null</code>
+ */
+ public byte[] binaryValue() {
+ return null;
+ }
+
+ /**
+ * @return always <code>null</code>
+ */
+ public TokenStream tokenStreamValue() {
+ return null;
+ }
+
+ /**
+ * Checks whether the text extraction task has finished.
+ *
+ * @return <code>true</code> if the extracted text is available
+ */
+ public boolean isExtractorFinished() {
+ return extract != null;
+ }
+
+ private synchronized void setExtractedText(String value) {
+ extract = value;
+ notify();
+ }
+
+ /**
+ * Releases all resources associated with this field.
+ */
+ public void dispose() {
+ // TODO: Cause the ContentHandler below to throw an exception
+ }
+
+ /**
+ * The background task for extracting text from a binary value.
+ */
+ private class ParsingTask extends DefaultHandler implements Runnable {
+ private final Parser parser;
+
+ private final InternalValue value;
+
+ private final Metadata metadata;
+
+ private final int maxFieldLength;
+
+ private final StringBuilder builder = new StringBuilder();
+
+ private final ParseContext context = new ParseContext();
+
+ // NOTE: not a part of Jackrabbit code, made
+ private final ContentHandler handler = new DefaultHandler();
+
+ public ParsingTask(Parser parser, InternalValue value,
+ Metadata metadata, int maxFieldLength) {
+ this.parser = parser;
+ this.value = value;
+ this.metadata = metadata;
+ this.maxFieldLength = maxFieldLength;
+ }
+
+ public void run() {
+ try {
+ try (InputStream stream = value.getStream()) {
+ parser.parse(stream, handler, metadata, context);
+ }
+ } catch (LinkageError e) {
+ // Capture and ignore
+ } catch (Throwable t) {
+ if (t != STOP) {
+ log.debug("Failed to extract text.", t);
+ setExtractedText("TextExtractionError");
+ return;
+ }
+ } finally {
+ value.discard();
+ }
+ setExtractedText(handler.toString());
+
+ }
+
+ @Override
+ public void characters(char[] ch, int start, int length)
+ throws SAXException {
+ builder.append(ch, start,
+ Math.min(length, maxFieldLength - builder.length()));
+ if (builder.length() >= maxFieldLength) {
+ throw STOP;
+ }
+ }
+
+ @Override
+ public void ignorableWhitespace(char[] ch, int start, int length)
+ throws SAXException {
+ characters(ch, start, length);
+ }
+ }
}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexer.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -14,8 +17,8 @@
package org.apache.tika.example;
-/* */
import java.io.File;
+
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
@@ -24,23 +27,19 @@ import org.apache.lucene.index.IndexWrit
import org.apache.tika.Tika;
public class LuceneIndexer {
+ private final Tika tika;
- private final Tika tika;
-
- private final IndexWriter writer;
-
- public LuceneIndexer(Tika tika, IndexWriter writer) {
- this.tika = tika;
- this.writer = writer;
- }
-
- public void indexDocument(File file) throws Exception {
- Document document = new Document();
- document.add(new Field("filename", file.getName(), Store.YES,
- Index.ANALYZED));
- document.add(new Field("fulltext", tika.parseToString(file), Store.NO,
- Index.ANALYZED));
- writer.addDocument(document);
- }
+ private final IndexWriter writer;
+ public LuceneIndexer(Tika tika, IndexWriter writer) {
+ this.tika = tika;
+ this.writer = writer;
+ }
+
+ public void indexDocument(File file) throws Exception {
+ Document document = new Document();
+ document.add(new Field("filename", file.getName(), Store.YES, Index.ANALYZED));
+ document.add(new Field("fulltext", tika.parseToString(file), Store.NO, Index.ANALYZED));
+ writer.addDocument(document);
+ }
}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/LuceneIndexerExtended.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -30,35 +33,33 @@ import org.apache.tika.Tika;
@SuppressWarnings("deprecation")
public class LuceneIndexerExtended {
+ private final Tika tika;
- private final IndexWriter writer;
-
- private final Tika tika;
-
- public LuceneIndexerExtended(IndexWriter writer, Tika tika) {
- this.writer = writer;
- this.tika = tika;
- }
-
- public static void main(String[] args) throws Exception {
- try (IndexWriter writer = new IndexWriter(new SimpleFSDirectory(new File(
- args[0])), new StandardAnalyzer(Version.LUCENE_30),
- MaxFieldLength.UNLIMITED)) {
- LuceneIndexer indexer = new LuceneIndexer(new Tika(), writer);
- for (int i = 1; i < args.length; i++) {
- indexer.indexDocument(new File(args[i]));
- }
- }
- }
-
- public void indexDocument(File file) throws Exception {
- try (Reader fulltext = tika.parse(file)) {
- Document document = new Document();
- document.add(new Field("filename", file.getName(), Store.YES,
- Index.ANALYZED));
- document.add(new Field("fulltext", fulltext));
- writer.addDocument(document);
- }
- }
+ private final IndexWriter writer;
+ public LuceneIndexerExtended(IndexWriter writer, Tika tika) {
+ this.writer = writer;
+ this.tika = tika;
+ }
+
+ public static void main(String[] args) throws Exception {
+ try (IndexWriter writer = new IndexWriter(
+ new SimpleFSDirectory(new File(args[0])),
+ new StandardAnalyzer(Version.LUCENE_30),
+ MaxFieldLength.UNLIMITED)) {
+ LuceneIndexer indexer = new LuceneIndexer(new Tika(), writer);
+ for (int i = 1; i < args.length; i++) {
+ indexer.indexDocument(new File(args[i]));
+ }
+ }
+ }
+
+ public void indexDocument(File file) throws Exception {
+ try (Reader fulltext = tika.parse(file)) {
+ Document document = new Document();
+ document.add(new Field("filename", file.getName(), Store.YES, Index.ANALYZED));
+ document.add(new Field("fulltext", fulltext));
+ writer.addDocument(document);
+ }
+ }
}
Modified: tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java?rev=1704368&r1=1704367&r2=1704368&view=diff
==============================================================================
--- tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java (original)
+++ tika/trunk/tika-example/src/main/java/org/apache/tika/example/MediaTypeExample.java Mon Sep 21 17:19:26 2015
@@ -1,9 +1,12 @@
-/**
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
*
- * http://www.apache.org/licenses/LICENSE-2.0
+ * http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
@@ -21,38 +24,35 @@ import org.apache.tika.mime.MediaType;
import org.apache.tika.mime.MediaTypeRegistry;
public class MediaTypeExample {
+ public static void describeMediaType() {
+ MediaType type = MediaType.parse("text/plain; charset=UTF-8");
- public static void describeMediaType() {
-
- MediaType type = MediaType.parse("text/plain; charset=UTF-8");
-
- System.out.println("type: " + type.getType());
- System.out.println("subtype: " + type.getSubtype());
-
- Map<String, String> parameters = type.getParameters();
- System.out.println("parameters:");
- for (String name : parameters.keySet()) {
- System.out.println(" " + name + "=" + parameters.get(name));
- }
- }
-
- public static void listAllTypes() {
- MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
-
- for (MediaType type : registry.getTypes()) {
- Set<MediaType> aliases = registry.getAliases(type);
- System.out.println(type + ", also known as " + aliases);
- }
- }
-
- public static void main(String[] args) throws Exception {
- MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
-
- MediaType type = MediaType.parse("image/svg+xml");
- while (type != null) {
- System.out.println(type);
- type = registry.getSupertype(type);
- }
- }
+ System.out.println("type: " + type.getType());
+ System.out.println("subtype: " + type.getSubtype());
+ Map<String, String> parameters = type.getParameters();
+ System.out.println("parameters:");
+ for (String name : parameters.keySet()) {
+ System.out.println(" " + name + "=" + parameters.get(name));
+ }
+ }
+
+ public static void listAllTypes() {
+ MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
+
+ for (MediaType type : registry.getTypes()) {
+ Set<MediaType> aliases = registry.getAliases(type);
+ System.out.println(type + ", also known as " + aliases);
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ MediaTypeRegistry registry = MediaTypeRegistry.getDefaultRegistry();
+
+ MediaType type = MediaType.parse("image/svg+xml");
+ while (type != null) {
+ System.out.println(type);
+ type = registry.getSupertype(type);
+ }
+ }
}