You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/09/18 14:13:26 UTC
[tika] branch main updated: Add close for some InputStream (#359)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new b5c3485 Add close for some InputStream (#359)
b5c3485 is described below
commit b5c3485e9f1c390565ccc0abc5e4609fdab9bd0a
Author: Lee <55...@users.noreply.github.com>
AuthorDate: Fri Sep 18 22:13:15 2020 +0800
Add close for some InputStream (#359)
---
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 4 +-
.../org/apache/tika/mime/CustomReaderTest.java | 14 ++--
.../org/apache/tika/mime/MimeDetectionTest.java | 10 ++-
.../tika/mime/ProbabilisticMimeDetectionTest.java | 13 +--
.../ProbabilisticMimeDetectionTestWithTika.java | 13 +--
.../java/org/apache/tika/eval/TikaEvalCLI.java | 13 +--
.../tika/eval/io/FatalExceptionReaderTest.java | 7 +-
.../filetypedetector/TikaFileTypeDetectorTest.java | 11 ++-
.../tika/langdetect/LanguageDetectorTest.java | 33 ++++----
.../apache/tika/parser/html/HtmlParserTest.java | 40 +++++----
.../apache/tika/parser/image/ICNSParserTest.java | 12 +--
.../apache/tika/parser/image/ImageParserTest.java | 31 ++++---
.../apache/tika/parser/image/JpegParserTest.java | 58 +++++++------
.../apache/tika/parser/image/PSDParserTest.java | 12 +--
.../tika/config/TikaEncodingDetectorTest.java | 13 +--
.../tika/parser/RecursiveParserWrapperTest.java | 8 +-
.../java/org/apache/tika/parser/TestXXEInXML.java | 5 +-
.../parser/fork/ForkParserIntegrationTest.java | 3 +-
.../apache/tika/parser/html/HtmlParserTest.java | 25 +++---
.../tika/parser/ocr/TesseractOCRParserTest.java | 24 +++---
.../org/apache/tika/parser/pdf/PDFParserTest.java | 14 ++--
.../tika/parser/microsoft/ExcelParserTest.java | 22 ++---
.../tika/parser/microsoft/OutlookParserTest.java | 40 ++++-----
.../tika/parser/microsoft/WordParserTest.java | 10 ++-
.../parser/microsoft/ooxml/OOXMLParserTest.java | 55 ++++++------
.../parser/microsoft/ooxml/SXSLFExtractorTest.java | 13 +--
.../parser/microsoft/ooxml/SXWPFExtractorTest.java | 14 ++--
.../tika/parser/microsoft/rtf/RTFParserTest.java | 36 ++++----
.../apache/tika/parser/epub/EpubParserTest.java | 23 ++---
.../tika/parser/wordperfect/WordPerfectTest.java | 13 +--
.../apache/tika/parser/ocr/TesseractOCRParser.java | 5 +-
.../org/apache/tika/parser/pdf/PDFParserTest.java | 69 +++++++--------
.../tika/parser/txt/CharsetDetectorTest.java | 4 +-
.../tika/parser/xmp/JempboxExtractorTest.java | 98 ++++++++++++----------
.../tika/parser/recognition/AgeRecogniserTest.java | 28 ++++---
.../tika/dl/imagerec/DL4JInceptionV3NetTest.java | 11 ++-
.../apache/tika/dl/imagerec/DL4JVGG16NetTest.java | 10 ++-
.../tika/parser/ner/NamedEntityParserTest.java | 72 ++++++++--------
.../tika/parser/sqlite3/SQLite3ParserTest.java | 12 ++-
.../java/org/apache/tika/server/HTMLHelper.java | 11 +--
40 files changed, 495 insertions(+), 414 deletions(-)
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index a6c0009..7a3a30c 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -129,7 +129,9 @@ public class TikaCLI {
TikaCLI cli = new TikaCLI();
if (!isConfigured()) {
- PropertyConfigurator.configure(cli.getClass().getResourceAsStream("/log4j.properties"));
+ try (InputStream is = cli.getClass().getResourceAsStream("/log4j.properties")) {
+ PropertyConfigurator.configure(is);
+ }
}
if (cli.testForHelp(args)) {
diff --git a/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java b/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java
index df51d45..37c6d76 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/CustomReaderTest.java
@@ -16,6 +16,7 @@
*/
package org.apache.tika.mime;
+import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@@ -72,8 +73,9 @@ public class CustomReaderTest {
public void testCustomReader() throws Exception {
MimeTypes mimeTypes = new MimeTypes();
CustomMimeTypesReader reader = new CustomMimeTypesReader(mimeTypes);
- reader.read(getClass().getResourceAsStream("custom-mimetypes.xml"));
-
+ try (InputStream is = getClass().getResourceAsStream("custom-mimetypes.xml")) {
+ reader.read(is);
+ }
String key = "hello/world-file";
MimeType hello = mimeTypes.forName(key);
@@ -82,14 +84,14 @@ public class CustomReaderTest {
assertEquals(0, reader.ignorePatterns.size());
// Now add another resource with conflicting regex
- reader.read(getClass().getResourceAsStream("custom-mimetypes2.xml"));
-
+ try (InputStream is = getClass().getResourceAsStream("custom-mimetypes2.xml")) {
+ reader.read(is);
+ }
key = "another/world-file";
MimeType another = mimeTypes.forName(key);
assertEquals("kittens", reader.values.get(key));
assertEquals(1, reader.ignorePatterns.size());
- assertEquals(another.toString()+">>*"+hello.getExtension(),
- reader.ignorePatterns.get(0));
+ assertEquals(another.toString()+">>*"+hello.getExtension(), reader.ignorePatterns.get(0));
assertTrue("Server-side script type not detected", another.isInterpreted());
//System.out.println( mimeTypes.getMediaTypeRegistry().getTypes() );
diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
index c0f1109..ef3155a 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
@@ -157,13 +157,15 @@ public class MimeDetectionTest {
}
private void testUrl(String expected, String url, String file) throws IOException{
- InputStream in = getClass().getResourceAsStream(file);
- testStream(expected, url, in);
+ try (InputStream in = getClass().getResourceAsStream(file)) {
+ testStream(expected, url, in);
+ }
}
private void testFile(String expected, String filename) throws IOException {
- InputStream in = getClass().getResourceAsStream(filename);
- testStream(expected, filename, in);
+ try (InputStream in = getClass().getResourceAsStream(filename)) {
+ testStream(expected, filename, in);
+ }
}
private void testStream(String expected, String urlOrFileName, InputStream in) throws IOException{
diff --git a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java
index e285449..d23a594 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTest.java
@@ -130,15 +130,16 @@ public class ProbabilisticMimeDetectionTest {
testStream(expected, url, in);
}
- private void testUrl(String expected, String url, String file)
- throws IOException {
- InputStream in = getClass().getResourceAsStream(file);
- testStream(expected, url, in);
+ private void testUrl(String expected, String url, String file) throws IOException {
+ try (InputStream in = getClass().getResourceAsStream(file)) {
+ testStream(expected, url, in);
+ }
}
private void testFile(String expected, String filename) throws IOException {
- InputStream in = getClass().getResourceAsStream(filename);
- testStream(expected, filename, in);
+ try (InputStream in = getClass().getResourceAsStream(filename)) {
+ testStream(expected, filename, in);
+ }
}
private void testStream(String expected, String urlOrFileName,
diff --git a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
index 871f975..8a314d8 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/ProbabilisticMimeDetectionTestWithTika.java
@@ -150,15 +150,16 @@ public class ProbabilisticMimeDetectionTestWithTika {
testStream(expected, url, in);
}
- private void testUrl(String expected, String url, String file)
- throws IOException {
- InputStream in = getClass().getResourceAsStream(file);
- testStream(expected, url, in);
+ private void testUrl(String expected, String url, String file) throws IOException {
+ try (InputStream in = getClass().getResourceAsStream(file)) {
+ testStream(expected, url, in);
+ }
}
private void testFile(String expected, String filename) throws IOException {
- InputStream in = getClass().getResourceAsStream(filename);
- testStream(expected, filename, in);
+ try (InputStream in = getClass().getResourceAsStream(filename)) {
+ testStream(expected, filename, in);
+ }
}
private void testStream(String expected, String urlOrFileName,
diff --git a/tika-eval/src/main/java/org/apache/tika/eval/TikaEvalCLI.java b/tika-eval/src/main/java/org/apache/tika/eval/TikaEvalCLI.java
index fc3f22b..4baa3b0 100644
--- a/tika-eval/src/main/java/org/apache/tika/eval/TikaEvalCLI.java
+++ b/tika-eval/src/main/java/org/apache/tika/eval/TikaEvalCLI.java
@@ -16,6 +16,7 @@
*/
package org.apache.tika.eval;
+import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
@@ -139,9 +140,9 @@ public class TikaEvalCLI {
try {
tmpBCConfig = Files.createTempFile("tika-eval-profiler", ".xml");
if (! containsBC) {
- Files.copy(
- this.getClass().getResourceAsStream("/tika-eval-profiler-config.xml"),
- tmpBCConfig, StandardCopyOption.REPLACE_EXISTING);
+ try (InputStream is = this.getClass().getResourceAsStream("/tika-eval-profiler-config.xml")) {
+ Files.copy(is, tmpBCConfig, StandardCopyOption.REPLACE_EXISTING);
+ }
argList.add("-bc");
argList.add(tmpBCConfig.toAbsolutePath().toString());
}
@@ -230,9 +231,9 @@ public class TikaEvalCLI {
try {
tmpBCConfig = Files.createTempFile("tika-eval", ".xml");
if (! containsBC) {
- Files.copy(
- this.getClass().getResourceAsStream("/tika-eval-comparison-config.xml"),
- tmpBCConfig, StandardCopyOption.REPLACE_EXISTING);
+ try (InputStream is = this.getClass().getResourceAsStream("/tika-eval-comparison-config.xml")) {
+ Files.copy(is, tmpBCConfig, StandardCopyOption.REPLACE_EXISTING);
+ }
argList.add("-bc");
argList.add(tmpBCConfig.toAbsolutePath().toString());
diff --git a/tika-eval/src/test/java/org/apache/tika/eval/io/FatalExceptionReaderTest.java b/tika-eval/src/test/java/org/apache/tika/eval/io/FatalExceptionReaderTest.java
index 3b99a76..179b875 100644
--- a/tika-eval/src/test/java/org/apache/tika/eval/io/FatalExceptionReaderTest.java
+++ b/tika-eval/src/test/java/org/apache/tika/eval/io/FatalExceptionReaderTest.java
@@ -25,8 +25,9 @@ import org.junit.Test;
public class FatalExceptionReaderTest {
@Test
public void testSimpleRead() throws Exception {
- InputStream is = this.getClass().getResourceAsStream("/test-dirs/batch-logs/batch-process-fatal.xml");
- XMLLogReader reader = new XMLLogReader();
- //reader.read(is);
+ try (InputStream is = this.getClass().getResourceAsStream("/test-dirs/batch-logs/batch-process-fatal.xml")) {
+ XMLLogReader reader = new XMLLogReader();
+ //reader.read(is);
+ }
}
}
diff --git a/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java b/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java
index fad7e1a..6133dd6 100644
--- a/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java
+++ b/tika-java7/src/test/java/org/apache/tika/filetypedetector/TikaFileTypeDetectorTest.java
@@ -19,6 +19,7 @@ package org.apache.tika.filetypedetector;
import static org.junit.Assert.*;
import java.io.IOException;
+import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.spi.FileTypeDetector;
@@ -45,10 +46,12 @@ public class TikaFileTypeDetectorTest {
@Before
public void setUp() throws Exception {
testDirectory = tempDir.newFolder().toPath();
- Files.copy(this.getClass().getResourceAsStream(TEST_CLASSPATH),
- testDirectory.resolve(TEST_HTML));
- Files.copy(this.getClass().getResourceAsStream(TEST_CLASSPATH),
- testDirectory.resolve(TEST_UNRECOGNISED_EXTENSION));
+ try (InputStream is = this.getClass().getResourceAsStream(TEST_CLASSPATH)) {
+ Files.copy(is, testDirectory.resolve(TEST_HTML));
+ }
+ try (InputStream is = this.getClass().getResourceAsStream(TEST_CLASSPATH)) {
+ Files.copy(is, testDirectory.resolve(TEST_UNRECOGNISED_EXTENSION));
+ }
}
@After
diff --git a/tika-langdetect/tika-langdetect-commons/src/test/java/org/apache/tika/langdetect/LanguageDetectorTest.java b/tika-langdetect/tika-langdetect-commons/src/test/java/org/apache/tika/langdetect/LanguageDetectorTest.java
index 8120e67..5c2b892 100644
--- a/tika-langdetect/tika-langdetect-commons/src/test/java/org/apache/tika/langdetect/LanguageDetectorTest.java
+++ b/tika-langdetect/tika-langdetect-commons/src/test/java/org/apache/tika/langdetect/LanguageDetectorTest.java
@@ -33,23 +33,22 @@ public abstract class LanguageDetectorTest {
protected String[] getTestLanguages() throws IOException {
List<String> result = new ArrayList<>();
- List<String> lines = IOUtils.readLines(
- this.getClass().getResourceAsStream("language-codes.txt"),
- UTF_8);
- for (String line : lines) {
- line = line.trim();
- if (line.isEmpty() || line.startsWith("#")) {
- continue;
- }
-
- String[] parsed = line.split("\t");
- String language = parsed[0];
- if (hasTestLanguage(language)) {
- result.add(language);
- }
- }
-
- return result.toArray(new String[result.size()]);
+ try (InputStream is = this.getClass().getResourceAsStream("language-codes.txt")) {
+ List<String> lines = IOUtils.readLines(is, UTF_8);
+ for (String line : lines) {
+ line = line.trim();
+ if (line.isEmpty() || line.startsWith("#")) {
+ continue;
+ }
+
+ String[] parsed = line.split("\t");
+ String language = parsed[0];
+ if (hasTestLanguage(language)) {
+ result.add(language);
+ }
+ }
+ return result.toArray(new String[result.size()]);
+ }
}
diff --git a/tika-parser-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java b/tika-parser-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
index 937cf96..9e18a75 100644
--- a/tika-parser-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
+++ b/tika-parser-modules/tika-parser-html-module/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
@@ -1303,17 +1303,16 @@ public class HtmlParserTest extends TikaTest {
@Test
public void testConfigExtractScript() throws Exception {
- InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/html/tika-config.xml");
- assertNotNull(is);
- TikaConfig tikaConfig = new TikaConfig(is);
- Parser p = new AutoDetectParser(tikaConfig);
- List<Metadata> metadataList = getRecursiveMetadata("testHTMLGoodScript.html", p);
- assertEquals(2, metadataList.size());
- assertEquals("MACRO", metadataList.get(1).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE));
- assertContains("cool",
- metadataList.get(1).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
- assertNotContained("cool", metadataList.get(0).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
-
+ try (InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/html/tika-config.xml")) {
+ assertNotNull(is);
+ TikaConfig tikaConfig = new TikaConfig(is);
+ Parser p = new AutoDetectParser(tikaConfig);
+ List<Metadata> metadataList = getRecursiveMetadata("testHTMLGoodScript.html", p);
+ assertEquals(2, metadataList.size());
+ assertEquals("MACRO", metadataList.get(1).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE));
+ assertContains("cool", metadataList.get(1).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+ assertNotContained("cool", metadataList.get(0).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT));
+ }
}
@@ -1426,15 +1425,14 @@ public class HtmlParserTest extends TikaTest {
metadataList.get(0).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
//make sure to include it if a user wants scripts to be extracted
- InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/html/tika-config.xml");
- assertNotNull(is);
- TikaConfig tikaConfig = new TikaConfig(is);
- Parser p = new AutoDetectParser(tikaConfig);
- metadataList = getRecursiveMetadata("testHTML_embedded_data_uri_js.html", p);
- assertEquals(2, metadataList.size());
- assertContains("alert( 'Hello, world!' );",
- metadataList.get(1).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
-
-
+ try (InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/html/tika-config.xml")) {
+ assertNotNull(is);
+ TikaConfig tikaConfig = new TikaConfig(is);
+ Parser p = new AutoDetectParser(tikaConfig);
+ metadataList = getRecursiveMetadata("testHTML_embedded_data_uri_js.html", p);
+ assertEquals(2, metadataList.size());
+ assertContains("alert( 'Hello, world!' );",
+ metadataList.get(1).get(RecursiveParserWrapperHandler.TIKA_CONTENT));
+ }
}
}
diff --git a/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ICNSParserTest.java b/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ICNSParserTest.java
index 5404ad7..4a9f806 100644
--- a/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ICNSParserTest.java
+++ b/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ICNSParserTest.java
@@ -40,9 +40,9 @@ public class ICNSParserTest {
metadata.set("Icons details", "512x512 (JPEG 2000 or PNG format)");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testICNS_basic.icns");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testICNS_basic.icns")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
}
/**
@@ -58,8 +58,8 @@ public class ICNSParserTest {
metadata.set("Masked icon details", "16x16 (8 bpp), 32x32 (8 bpp)");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testICNS.icns");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testICNS.icns")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
}
}
diff --git a/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java b/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
index 5f9d81a..4b3f2b2 100644
--- a/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
+++ b/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
@@ -36,9 +36,9 @@ public class ImageParserTest extends TikaTest {
public void testBMP() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/bmp");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testBMP.bmp");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testBMP.bmp")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
assertEquals("75", metadata.get("height"));
assertEquals("100", metadata.get("width"));
@@ -59,9 +59,9 @@ public class ImageParserTest extends TikaTest {
public void testGIF() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/gif");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testGIF.gif");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testGIF.gif")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
assertEquals("75", metadata.get("height"));
assertEquals("100", metadata.get("width"));
@@ -90,9 +90,9 @@ public class ImageParserTest extends TikaTest {
public void testJPEG() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG.jpg")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
assertEquals("75", metadata.get("height"));
assertEquals("100", metadata.get("width"));
@@ -126,9 +126,9 @@ public class ImageParserTest extends TikaTest {
public void testPNG() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/png");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testPNG.png");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testPNG.png")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
assertEquals("75", metadata.get("height"));
assertEquals("100", metadata.get("width"));
@@ -164,10 +164,9 @@ public class ImageParserTest extends TikaTest {
public void testJBIG2() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/x-jbig2");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJBIG2.jb2");
- parser.parse(
- stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testJBIG2.jb2")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
assertEquals("78", metadata.get("height"));
assertEquals("328", metadata.get("width"));
assertEquals("image/x-jbig2", metadata.get("Content-Type"));
diff --git a/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java b/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
index 20842c2..0f51fc4 100644
--- a/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
+++ b/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/JpegParserTest.java
@@ -61,9 +61,9 @@ public class JpegParserTest {
public void testJPEG() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_EXIF.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_EXIF.jpg")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
// Core EXIF/TIFF tags
assertEquals("3888", metadata.get(Metadata.IMAGE_WIDTH));
@@ -107,9 +107,9 @@ public class JpegParserTest {
public void testJPEGGeo() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_GEO.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_GEO.jpg")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
// Geo tags
assertEquals("12.54321", metadata.get(Metadata.LATITUDE));
@@ -153,9 +153,9 @@ public class JpegParserTest {
public void testJPEGGeo2() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_GEO_2.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_GEO_2.jpg")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
// Geo tags should be there with 5dp, and not rounded
assertEquals("51.575762", metadata.get(Metadata.LATITUDE));
@@ -166,9 +166,9 @@ public class JpegParserTest {
public void testJPEGTitleAndDescription() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
// embedded comments with non-ascii characters
assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
@@ -209,9 +209,10 @@ public class JpegParserTest {
public void testJPEGTitleAndDescriptionPhotoshop() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_commented_pspcs2mac.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_commented_pspcs2mac.jpg")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
// embedded comments with non-ascii characters
assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
@@ -225,9 +226,10 @@ public class JpegParserTest {
public void testJPEGTitleAndDescriptionXnviewmp() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_commented_xnviewmp026.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_commented_xnviewmp026.jpg")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
// XnViewMp's default comment dialog has only comment, not headline.
// Comment is embedded only if "Write comments in XMP" is enabled in settings
@@ -244,9 +246,9 @@ public class JpegParserTest {
public void testJPEGoddTagComponent() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_oddTagComponent.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_oddTagComponent.jpg")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
assertEquals(null, metadata.get(TikaCoreProperties.DESCRIPTION));
@@ -258,9 +260,10 @@ public class JpegParserTest {
public void testJPEGEmptyEXIFDateTime() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_EXIF_emptyDateTime.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_EXIF_emptyDateTime.jpg")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
assertEquals("300.0", metadata.get(TIFF.RESOLUTION_HORIZONTAL));
assertEquals("300.0", metadata.get(TIFF.RESOLUTION_VERTICAL));
}
@@ -269,9 +272,10 @@ public class JpegParserTest {
public void testJPEGXMPMM() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_EXIF_emptyDateTime.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_EXIF_emptyDateTime.jpg")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
//TODO: when jempbox is fixed/xmpbox is used
//add tests for history...currently not extracted
diff --git a/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/PSDParserTest.java b/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/PSDParserTest.java
index 104d96e..abf3874 100644
--- a/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/PSDParserTest.java
+++ b/tika-parser-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/PSDParserTest.java
@@ -39,9 +39,9 @@ public class PSDParserTest extends TikaTest {
public void testPSD() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/x-psd");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testPSD.psd");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testPSD.psd")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
assertEquals("537", metadata.get(Metadata.IMAGE_WIDTH));
assertEquals("51", metadata.get(Metadata.IMAGE_LENGTH));
@@ -56,9 +56,9 @@ public class PSDParserTest extends TikaTest {
public void testOddPSD() throws Exception {
Metadata metadata = new Metadata();
metadata.set(Metadata.CONTENT_TYPE, "image/x-psd");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testPSD2.psd");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testPSD2.psd")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
assertEquals("69", metadata.get(Metadata.IMAGE_WIDTH));
assertEquals("70", metadata.get(Metadata.IMAGE_LENGTH));
assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
diff --git a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/config/TikaEncodingDetectorTest.java b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/config/TikaEncodingDetectorTest.java
index 86fd328..6e8ac3f 100644
--- a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/config/TikaEncodingDetectorTest.java
+++ b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/config/TikaEncodingDetectorTest.java
@@ -21,6 +21,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -128,8 +129,10 @@ public class TikaEncodingDetectorTest extends AbstractTikaConfigTest {
@Test
public void testNonDetectingDetectorParams() throws Exception {
- TikaConfig tikaConfig = new TikaConfig(
- getResourceAsStream("/org/apache/tika/config/TIKA-2273-non-detecting-params.xml"));
+ TikaConfig tikaConfig = null;
+ try (InputStream is = getResourceAsStream("/org/apache/tika/config/TIKA-2273-non-detecting-params.xml")) {
+ tikaConfig = new TikaConfig(is);
+ }
AutoDetectParser p = new AutoDetectParser(tikaConfig);
List<Parser> parsers = new ArrayList<>();
findEncodingDetectionParsers(p, parsers);
@@ -147,9 +150,9 @@ public class TikaEncodingDetectorTest extends AbstractTikaConfigTest {
@Test
public void testNonDetectingDetectorParamsBadCharset() throws Exception {
- try {
- TikaConfig tikaConfig = new TikaConfig(
- getResourceAsStream("/org/apache/tika/config/TIKA-2273-non-detecting-params-bad-charset.xml"));
+ try (InputStream is =
+ getResourceAsStream("/org/apache/tika/config/TIKA-2273-non-detecting-params-bad-charset.xml")){
+ TikaConfig tikaConfig = new TikaConfig(is);
fail("should have thrown TikaConfigException");
} catch (TikaConfigException e) {
diff --git a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
index be9f950..3781552 100644
--- a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
+++ b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/RecursiveParserWrapperTest.java
@@ -372,8 +372,12 @@ public class RecursiveParserWrapperTest extends TikaTest {
//TIKA-3137
ParseContext context = new ParseContext();
Metadata metadata = new Metadata();
- TikaConfig tikaConfig = new TikaConfig(getClass().getResourceAsStream("TIKA-3137-include.xml"));
- Parser p = new AutoDetectParser(tikaConfig);
+ TikaConfig tikaConfig = null;
+ Parser p = null;
+ try (InputStream is = getClass().getResourceAsStream("TIKA-3137-include.xml")) {
+ tikaConfig = new TikaConfig(is);
+ p = new AutoDetectParser(tikaConfig);
+ }
RecursiveParserWrapper wrapper = new RecursiveParserWrapper(p, true);
String path = "/test-documents/test_recursive_embedded.docx";
ContentHandlerFactory contentHandlerFactory =
diff --git a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/TestXXEInXML.java b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/TestXXEInXML.java
index 5a08cb1..5687a33 100644
--- a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/TestXXEInXML.java
+++ b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/TestXXEInXML.java
@@ -67,8 +67,9 @@ public class TestXXEInXML extends XMLTestBase {
@Test
public void testXML() throws Exception {
- parse("testXXE.xml", getResourceAsStream("/test-documents/testXXE.xml"),
- AUTO_DETECT_PARSER, new ParseContext());
+ try (InputStream is = getResourceAsStream("/test-documents/testXXE.xml")) {
+ parse("testXXE.xml", is, AUTO_DETECT_PARSER, new ParseContext());
+ }
}
@Test
diff --git a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
index 2dfac4f..0801798 100644
--- a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
+++ b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/fork/ForkParserIntegrationTest.java
@@ -154,7 +154,7 @@ public class ForkParserIntegrationTest extends MultiThreadedTikaTest {
BrokenParser brokenParser = new BrokenParser();
ForkParser parser = new ForkParser(ForkParser.class.getClassLoader(), brokenParser);
InputStream stream = getClass().getResourceAsStream("/test-documents/testTXT.txt");
-
+
// With a serializable error, we'll get that back
try {
ContentHandler output = new BodyContentHandler();
@@ -165,6 +165,7 @@ public class ForkParserIntegrationTest extends MultiThreadedTikaTest {
assertEquals(brokenParser.err, e.getCause());
} finally {
parser.close();
+ stream.close();
}
// With a non serializable one, we'll get something else
diff --git a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
index de59ff4..8732578 100644
--- a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
+++ b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/html/HtmlParserTest.java
@@ -49,17 +49,18 @@ public class HtmlParserTest extends TikaTest {
@Test
public void testDataURIInJS() throws Exception {
- InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/html/tika-config.xml");
- assertNotNull(is);
- TikaConfig tikaConfig = new TikaConfig(is);
- Parser p = new AutoDetectParser(tikaConfig);
- List<Metadata> metadataList = getRecursiveMetadata("testHTML_embedded_img_in_js.html", p);
- assertEquals(3, metadataList.size());
- String content = metadataList.get(0).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT);
- assertContains("some content", content);
- Metadata imgMetadata = metadataList.get(1);
- assertEquals("image/jpeg", imgMetadata.get(Metadata.CONTENT_TYPE));
- assertContains("moscow-birds",
- Arrays.asList(imgMetadata.getValues(TikaCoreProperties.SUBJECT)));
+ try (InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/html/tika-config.xml")) {
+ assertNotNull(is);
+ TikaConfig tikaConfig = new TikaConfig(is);
+ Parser p = new AutoDetectParser(tikaConfig);
+ List<Metadata> metadataList = getRecursiveMetadata("testHTML_embedded_img_in_js.html", p);
+ assertEquals(3, metadataList.size());
+ String content = metadataList.get(0).get(AbstractRecursiveParserWrapperHandler.TIKA_CONTENT);
+ assertContains("some content", content);
+ Metadata imgMetadata = metadataList.get(1);
+ assertEquals("image/jpeg", imgMetadata.get(Metadata.CONTENT_TYPE));
+ assertContains("moscow-birds",
+ Arrays.asList(imgMetadata.getValues(TikaCoreProperties.SUBJECT)));
+ }
}
}
diff --git a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
index 2ff3dcd..ae8e924 100644
--- a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
+++ b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/ocr/TesseractOCRParserTest.java
@@ -304,17 +304,19 @@ public class TesseractOCRParserTest extends TikaTest {
@Test
public void testConfig() throws Exception {
- TikaConfig config = new TikaConfig(getResourceAsStream("/org/apache/tika/config/TIKA-2705-tesseract.xml"));
- Parser p = config.getParser();
- Parser tesseractOCRParser = findParser(p, org.apache.tika.parser.ocr.TesseractOCRParser.class);
- assertNotNull(tesseractOCRParser);
-
- TesseractOCRConfig tesseractOCRConfig = ((TesseractOCRParser)tesseractOCRParser).getDefaultConfig();
- Assert.assertEquals(241, tesseractOCRConfig.getTimeout());
- Assert.assertEquals(TesseractOCRConfig.OUTPUT_TYPE.HOCR, tesseractOCRConfig.getOutputType());
- Assert.assertEquals("ceb", tesseractOCRConfig.getLanguage());
- Assert.assertEquals(false, tesseractOCRConfig.getApplyRotation());
- assertContains("myspecial", tesseractOCRConfig.getTesseractPath());
+ try (InputStream is = getResourceAsStream("/org/apache/tika/config/TIKA-2705-tesseract.xml")) {
+ TikaConfig config = new TikaConfig(is);
+ Parser p = config.getParser();
+ Parser tesseractOCRParser = findParser(p, org.apache.tika.parser.ocr.TesseractOCRParser.class);
+ assertNotNull(tesseractOCRParser);
+
+ TesseractOCRConfig tesseractOCRConfig = ((TesseractOCRParser)tesseractOCRParser).getDefaultConfig();
+ Assert.assertEquals(241, tesseractOCRConfig.getTimeout());
+ Assert.assertEquals(TesseractOCRConfig.OUTPUT_TYPE.HOCR, tesseractOCRConfig.getOutputType());
+ Assert.assertEquals("ceb", tesseractOCRConfig.getLanguage());
+ Assert.assertEquals(false, tesseractOCRConfig.getApplyRotation());
+ assertContains("myspecial", tesseractOCRConfig.getTesseractPath());
+ }
}
}
diff --git a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 5d41adb..54d4915 100644
--- a/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parser-modules/tika-parser-integration-tests/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -91,14 +91,16 @@ public class PDFParserTest extends TikaTest {
//test that it is triggered when added to the default parser
//via the config, tesseract should skip this file because it is too large
- InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/pdf/tika-xml-profiler-config.xml");
- assertNotNull(is);
- TikaConfig tikaConfig = new TikaConfig(is);
- Parser p = new AutoDetectParser(tikaConfig);
+ try (InputStream is =
+ getClass().getResourceAsStream("/org/apache/tika/parser/pdf/tika-xml-profiler-config.xml")) {
+ assertNotNull(is);
+ TikaConfig tikaConfig = new TikaConfig(is);
+ Parser p = new AutoDetectParser(tikaConfig);
- metadataList = getRecursiveMetadata("testPDF_XFA_govdocs1_258578.pdf", p);
- assertEquals(3, metadataList.size());
+ metadataList = getRecursiveMetadata("testPDF_XFA_govdocs1_258578.pdf", p);
+ assertEquals(3, metadataList.size());
+ }
int xmlProfilers = 0;
for (Metadata metadata : metadataList) {
String[] parsedBy = metadata.getValues("X-Parsed-By");
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
index 4e1528a..03d3bed 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ExcelParserTest.java
@@ -514,10 +514,11 @@ public class ExcelParserTest extends TikaTest {
assertContainsAtLeast(minExpected, getRecursiveMetadata("testEXCEL_macro.xls", context));
//test configuring via config file
- TikaConfig tikaConfig = new TikaConfig(this.getClass().getResourceAsStream("tika-config-macros.xml"));
- AutoDetectParser parser = new AutoDetectParser(tikaConfig);
- assertContainsAtLeast(minExpected, getRecursiveMetadata("testEXCEL_macro.xls", parser));
-
+ try (InputStream is = this.getClass().getResourceAsStream("tika-config-macros.xml")) {
+ TikaConfig tikaConfig = new TikaConfig(is);
+ AutoDetectParser parser = new AutoDetectParser(tikaConfig);
+ assertContainsAtLeast(minExpected, getRecursiveMetadata("testEXCEL_macro.xls", parser));
+ }
}
@Test
@@ -575,11 +576,12 @@ public class ExcelParserTest extends TikaTest {
@Test
public void testDateFormat() throws Exception {
- TikaConfig tikaConfig = new TikaConfig(
- this.getClass().getResourceAsStream("tika-config-custom-date-override.xml"));
- Parser p = new AutoDetectParser(tikaConfig);
- String xml = getXML("testEXCEL_dateFormats.xls", p).xml;
- assertContains("2018-09-20", xml);
- assertContains("1996-08-10", xml);
+ try (InputStream is = this.getClass().getResourceAsStream("tika-config-custom-date-override.xml")) {
+ TikaConfig tikaConfig = new TikaConfig(is);
+ Parser p = new AutoDetectParser(tikaConfig);
+ String xml = getXML("testEXCEL_dateFormats.xls", p).xml;
+ assertContains("2018-09-20", xml);
+ assertContains("1996-08-10", xml);
+ }
}
}
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
index 164270d..3933127 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/OutlookParserTest.java
@@ -309,25 +309,27 @@ public class OutlookParserTest extends TikaTest {
//now try extracting all bodies
//they should each appear as standalone attachments
//with no content in the body of the msg level
- TikaConfig tikaConfig = new TikaConfig(getResourceAsStream("tika-config-extract-all-alternatives-msg.xml"));
- Parser p = new AutoDetectParser(tikaConfig);
-
- metadataList = getRecursiveMetadata("testMSG.msg", p);
- assertEquals(3, metadataList.size());
-
- assertNotContained("breaking your application",
- metadataList.get(0).get(RecursiveParserWrapper.TIKA_CONTENT));
- assertEquals("application/vnd.ms-outlook",
- metadataList.get(0).get(Metadata.CONTENT_TYPE));
-
- assertContains("breaking your application",
- metadataList.get(1).get(RecursiveParserWrapper.TIKA_CONTENT));
- assertEquals("application/rtf",
- metadataList.get(1).get(Metadata.CONTENT_TYPE));
-
- assertContains("breaking your application",
- metadataList.get(2).get(RecursiveParserWrapper.TIKA_CONTENT));
- assertTrue(metadataList.get(2).get(Metadata.CONTENT_TYPE).startsWith("text/plain"));
+ try (InputStream is = getResourceAsStream("tika-config-extract-all-alternatives-msg.xml")) {
+ TikaConfig tikaConfig = new TikaConfig(is);
+ Parser p = new AutoDetectParser(tikaConfig);
+
+ metadataList = getRecursiveMetadata("testMSG.msg", p);
+ assertEquals(3, metadataList.size());
+
+ assertNotContained("breaking your application",
+ metadataList.get(0).get(RecursiveParserWrapper.TIKA_CONTENT));
+ assertEquals("application/vnd.ms-outlook",
+ metadataList.get(0).get(Metadata.CONTENT_TYPE));
+
+ assertContains("breaking your application",
+ metadataList.get(1).get(RecursiveParserWrapper.TIKA_CONTENT));
+ assertEquals("application/rtf",
+ metadataList.get(1).get(Metadata.CONTENT_TYPE));
+
+ assertContains("breaking your application",
+ metadataList.get(2).get(RecursiveParserWrapper.TIKA_CONTENT));
+ assertTrue(metadataList.get(2).get(Metadata.CONTENT_TYPE).startsWith("text/plain"));
+ }
}
}
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
index 1c0f3b5..a3f2bed 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/WordParserTest.java
@@ -597,11 +597,13 @@ public class WordParserTest extends TikaTest {
assertContainsAtLeast(minExpected, metadataList);
//test configuring via config file
- TikaConfig tikaConfig = new TikaConfig(this.getClass().getResourceAsStream("tika-config-macros.xml"));
- AutoDetectParser parser = new AutoDetectParser(tikaConfig);
+ try (InputStream is = this.getClass().getResourceAsStream("tika-config-macros.xml")) {
+ TikaConfig tikaConfig = new TikaConfig(is);
+ AutoDetectParser parser = new AutoDetectParser(tikaConfig);
- metadataList = getRecursiveMetadata("testWORD_macros.doc", parser);
- assertContainsAtLeast(minExpected, metadataList);
+ metadataList = getRecursiveMetadata("testWORD_macros.doc", parser);
+ assertContainsAtLeast(minExpected, metadataList);
+ }
}
@Test
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index fdefbc4..26d2ad5 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -1348,10 +1348,11 @@ public class OOXMLParserTest extends TikaTest {
assertContainsAtLeast(minExpected, getRecursiveMetadata("testWORD_macros.docm", context));
//test configuring via config file
- TikaConfig tikaConfig = new TikaConfig(this.getClass().getResourceAsStream("tika-config-dom-macros.xml"));
- AutoDetectParser parser = new AutoDetectParser(tikaConfig);
- assertContainsAtLeast(minExpected, getRecursiveMetadata("testWORD_macros.docm", parser));
-
+ try (InputStream is = this.getClass().getResourceAsStream("tika-config-dom-macros.xml")) {
+ TikaConfig tikaConfig = new TikaConfig(is);
+ AutoDetectParser parser = new AutoDetectParser(tikaConfig);
+ assertContainsAtLeast(minExpected, getRecursiveMetadata("testWORD_macros.docm", parser));
+ }
}
@Test
@@ -1380,10 +1381,11 @@ public class OOXMLParserTest extends TikaTest {
assertContainsAtLeast(minExpected, getRecursiveMetadata("testPPT_macros.pptm", context));
//test configuring via config file
- TikaConfig tikaConfig = new TikaConfig(this.getClass().getResourceAsStream("tika-config-dom-macros.xml"));
- AutoDetectParser parser = new AutoDetectParser(tikaConfig);
- assertContainsAtLeast(minExpected, getRecursiveMetadata("testPPT_macros.pptm", parser));
-
+ try (InputStream is = this.getClass().getResourceAsStream("tika-config-dom-macros.xml")) {
+ TikaConfig tikaConfig = new TikaConfig(is);
+ AutoDetectParser parser = new AutoDetectParser(tikaConfig);
+ assertContainsAtLeast(minExpected, getRecursiveMetadata("testPPT_macros.pptm", parser));
+ }
}
@Test
@@ -1413,10 +1415,11 @@ public class OOXMLParserTest extends TikaTest {
getRecursiveMetadata("testEXCEL_macro.xlsm", context));
//test configuring via config file
- TikaConfig tikaConfig = new TikaConfig(this.getClass().getResourceAsStream("tika-config-dom-macros.xml"));
- AutoDetectParser parser = new AutoDetectParser(tikaConfig);
- assertContainsAtLeast(minExpected, getRecursiveMetadata("testEXCEL_macro.xlsm", parser));
-
+ try (InputStream is = this.getClass().getResourceAsStream("tika-config-dom-macros.xml")) {
+ TikaConfig tikaConfig = new TikaConfig(is);
+ AutoDetectParser parser = new AutoDetectParser(tikaConfig);
+ assertContainsAtLeast(minExpected, getRecursiveMetadata("testEXCEL_macro.xlsm", parser));
+ }
}
//@Test //use this for lightweight benchmarking to compare xwpf options
@@ -1450,13 +1453,14 @@ public class OOXMLParserTest extends TikaTest {
//NOTE: this test relies on a bug in the DOM extractor that
//is passing over the title information.
//once we fix that, this test will no longer be meaningful!
- InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/microsoft/tika-config-sax-docx.xml");
- assertNotNull(is);
- TikaConfig tikaConfig = new TikaConfig(is);
- AutoDetectParser p = new AutoDetectParser(tikaConfig);
- XMLResult xml = getXML("testWORD_2006ml.docx", p, new Metadata());
- assertContains("engaging title", xml.xml);
-
+ try (InputStream is =
+ getClass().getResourceAsStream("/org/apache/tika/parser/microsoft/tika-config-sax-docx.xml")) {
+ assertNotNull(is);
+ TikaConfig tikaConfig = new TikaConfig(is);
+ AutoDetectParser p = new AutoDetectParser(tikaConfig);
+ XMLResult xml = getXML("testWORD_2006ml.docx", p, new Metadata());
+ assertContains("engaging title", xml.xml);
+ }
}
@Test
@@ -1750,12 +1754,13 @@ public class OOXMLParserTest extends TikaTest {
@Test
public void testDateFormat() throws Exception {
- TikaConfig tikaConfig = new TikaConfig(
- this.getClass().getResourceAsStream("tika-config-custom-date-override.xml"));
- Parser p = new AutoDetectParser(tikaConfig);
- String xml = getXML("testEXCEL_dateFormats.xlsx", p).xml;
- assertContains("2018-09-20", xml);
- assertContains("1996-08-10", xml);
+ try (InputStream is = this.getClass().getResourceAsStream("tika-config-custom-date-override.xml")) {
+ TikaConfig tikaConfig = new TikaConfig(is);
+ Parser p = new AutoDetectParser(tikaConfig);
+ String xml = getXML("testEXCEL_dateFormats.xlsx", p).xml;
+ assertContains("2018-09-20", xml);
+ assertContains("1996-08-10", xml);
+ }
}
@Test
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXSLFExtractorTest.java b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXSLFExtractorTest.java
index d87b9ae..c368e8a 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXSLFExtractorTest.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXSLFExtractorTest.java
@@ -569,12 +569,13 @@ public class SXSLFExtractorTest extends TikaTest {
assertContainsAtLeast(parsedBy, metadataList);
//test configuring via config file
- TikaConfig tikaConfig = new TikaConfig(this.getClass().getResourceAsStream("tika-config-sax-macros.xml"));
- AutoDetectParser parser = new AutoDetectParser(tikaConfig);
- metadataList = getRecursiveMetadata("testPPT_macros.pptm", parser);
- assertContainsAtLeast(minExpected, metadataList);
- assertContainsAtLeast(parsedBy, metadataList);
-
+ try (InputStream is = this.getClass().getResourceAsStream("tika-config-sax-macros.xml")) {
+ TikaConfig tikaConfig = new TikaConfig(is);
+ AutoDetectParser parser = new AutoDetectParser(tikaConfig);
+ metadataList = getRecursiveMetadata("testPPT_macros.pptm", parser);
+ assertContainsAtLeast(minExpected, metadataList);
+ assertContainsAtLeast(parsedBy, metadataList);
+ }
}
@Test
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
index 0c02056..bcff5ad 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/SXWPFExtractorTest.java
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.ByteArrayOutputStream;
+import java.io.InputStream;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.HashMap;
@@ -736,12 +737,13 @@ public class SXWPFExtractorTest extends TikaTest {
assertContainsAtLeast(parsedBy, metadataList);
//test configuring via config file
- TikaConfig tikaConfig = new TikaConfig(this.getClass().getResourceAsStream("tika-config-sax-macros.xml"));
- AutoDetectParser parser = new AutoDetectParser(tikaConfig);
- metadataList = getRecursiveMetadata("testWORD_macros.docm", parser);
- assertContainsAtLeast(minExpected, metadataList);
- assertContainsAtLeast(parsedBy, metadataList);
-
+ try (InputStream is = this.getClass().getResourceAsStream("tika-config-sax-macros.xml")) {
+ TikaConfig tikaConfig = new TikaConfig(is);
+ AutoDetectParser parser = new AutoDetectParser(tikaConfig);
+ metadataList = getRecursiveMetadata("testWORD_macros.docm", parser);
+ assertContainsAtLeast(minExpected, metadataList);
+ assertContainsAtLeast(parsedBy, metadataList);
+ }
}
@Test
diff --git a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/rtf/RTFParserTest.java b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/rtf/RTFParserTest.java
index 011895c..fde70b1 100644
--- a/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/rtf/RTFParserTest.java
+++ b/tika-parser-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/rtf/RTFParserTest.java
@@ -334,15 +334,16 @@ public class RTFParserTest extends TikaTest {
@Test
public void testTurningOffList() throws Exception {
- InputStream is = getClass().getResourceAsStream(
- "/org/apache/tika/parser/microsoft/rtf/ignoreListMarkup-tika-config.xml");
- assertNotNull(is);
- TikaConfig tikaConfig = new TikaConfig(is);
- Parser p = new AutoDetectParser(tikaConfig);
- String content = getXML("testRTFListMicrosoftWord.rtf", p).xml;
- assertNotContained("<ol>", content);
- assertNotContained("<ul>", content);
- assertNotContained("<li>", content);
+ try (InputStream is = getClass().getResourceAsStream(
+ "/org/apache/tika/parser/microsoft/rtf/ignoreListMarkup-tika-config.xml")) {
+ assertNotNull(is);
+ TikaConfig tikaConfig = new TikaConfig(is);
+ Parser p = new AutoDetectParser(tikaConfig);
+ String content = getXML("testRTFListMicrosoftWord.rtf", p).xml;
+ assertNotContained("<ol>", content);
+ assertNotContained("<ul>", content);
+ assertNotContained("<li>", content);
+ }
}
@Test
@@ -445,13 +446,16 @@ public class RTFParserTest extends TikaTest {
//test that memory allocation of the bin element is limited
//via the config file. Unfortunately, this test file's bin embedding contains 10 bytes
//so we had to set the config to 0.
- InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/microsoft/rtf/tika-config.xml");
- assertNotNull(is);
- TikaConfig tikaConfig = new TikaConfig(is);
- Parser p = new AutoDetectParser(tikaConfig);
- List<Metadata> metadataList = getRecursiveMetadata("testBinControlWord.rtf", p);
- assertEquals(1, metadataList.size());
- assertContains("TikaMemoryLimitException", metadataList.get(0).get(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM));
+ try (InputStream is =
+ getClass().getResourceAsStream("/org/apache/tika/parser/microsoft/rtf/tika-config.xml")) {
+ assertNotNull(is);
+ TikaConfig tikaConfig = new TikaConfig(is);
+ Parser p = new AutoDetectParser(tikaConfig);
+ List<Metadata> metadataList = getRecursiveMetadata("testBinControlWord.rtf", p);
+ assertEquals(1, metadataList.size());
+ assertContains("TikaMemoryLimitException",
+ metadataList.get(0).get(TikaCoreProperties.TIKA_META_EXCEPTION_EMBEDDED_STREAM));
+ }
}
@Test
diff --git a/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/epub/EpubParserTest.java b/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/epub/EpubParserTest.java
index a16732c..a317b53 100644
--- a/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/epub/EpubParserTest.java
+++ b/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/epub/EpubParserTest.java
@@ -81,17 +81,18 @@ public class EpubParserTest extends TikaTest {
assert(tocIndex < ch2);
assert(ch1 < ch2);
- InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/epub/tika-config.xml");
- assertNotNull(is);
- Parser p = new AutoDetectParser(new TikaConfig(is));
- xml = getXML("testEPUB.epub", p).xml;
- tocIndex = xml.indexOf("h3 class=\"toc_heading\">Table of Contents<");
- ch1 = xml.indexOf("<h1>Chapter 1");
- ch2 = xml.indexOf("<h1>Chapter 2");
- assert(tocIndex > -1 && ch1 > -1 && ch2 > -1);
- assert(tocIndex > ch1);
- assert(tocIndex > ch2);
- assert(ch1 < ch2);
+ try (InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/epub/tika-config.xml")) {
+ assertNotNull(is);
+ Parser p = new AutoDetectParser(new TikaConfig(is));
+ xml = getXML("testEPUB.epub", p).xml;
+ tocIndex = xml.indexOf("h3 class=\"toc_heading\">Table of Contents<");
+ ch1 = xml.indexOf("<h1>Chapter 1");
+ ch2 = xml.indexOf("<h1>Chapter 2");
+ assert(tocIndex > -1 && ch1 > -1 && ch2 > -1);
+ assert(tocIndex > ch1);
+ assert(tocIndex > ch2);
+ assert(ch1 < ch2);
+ }
}
diff --git a/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/wordperfect/WordPerfectTest.java b/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/wordperfect/WordPerfectTest.java
index 2572d8a..d9d3ab2 100644
--- a/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/wordperfect/WordPerfectTest.java
+++ b/tika-parser-modules/tika-parser-miscoffice-module/src/test/java/org/apache/tika/parser/wordperfect/WordPerfectTest.java
@@ -70,13 +70,14 @@ public class WordPerfectTest extends TikaTest {
assertContains("this was deleted.", xml);
- InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/wordperfect/tika-config.xml");
- assertNotNull(is);
- TikaConfig tikaConfig = new TikaConfig(is);
+ try (InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/wordperfect/tika-config.xml")) {
+ assertNotNull(is);
+ TikaConfig tikaConfig = new TikaConfig(is);
- Parser p = tikaConfig.getParser();
+ Parser p = tikaConfig.getParser();
- xml = getXML("testWordPerfect.wpd", p).xml;
- assertNotContained("this was deleted", xml);
+ xml = getXML("testWordPerfect.wpd", p).xml;
+ assertNotContained("this was deleted", xml);
+ }
}
}
diff --git a/tika-parser-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java b/tika-parser-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
index 6d6a357..ee4b81a 100644
--- a/tika-parser-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
+++ b/tika-parser-modules/tika-parser-ocr-module/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
@@ -350,10 +350,11 @@ public class TesseractOCRParser extends AbstractParser implements Initializable
private void processImage(File scratchFile, TesseractOCRConfig config) throws IOException, TikaException {
// fetch rotation script from resources
- InputStream in = getClass().getResourceAsStream("rotation.py");
TemporaryResources tmp = new TemporaryResources();
File rotationScript = tmp.createTemporaryFile();
- Files.copy(in, rotationScript.toPath(), StandardCopyOption.REPLACE_EXISTING);
+ try (InputStream in = getClass().getResourceAsStream("rotation.py")) {
+ Files.copy(in, rotationScript.toPath(), StandardCopyOption.REPLACE_EXISTING);
+ }
CommandLine commandLine = new CommandLine("python");
String[] args = {"-W",
diff --git a/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index c5f5c39..6bb4bdf 100644
--- a/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parser-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -1180,47 +1180,50 @@ public class PDFParserTest extends TikaTest {
assumeTrue("can run OCR", canRunOCR());
//via the config, tesseract should skip this file because it is too large
- InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/pdf/tika-ocr-config.xml");
- assertNotNull(is);
- TikaConfig tikaConfig = new TikaConfig(is);
- Parser p = new AutoDetectParser(tikaConfig);
- String text = getText(getResourceAsStream("/test-documents/testOCR.pdf"), p);
- assertTrue(StringUtils.isAllBlank(text));
-
- //now override the max file size to ocr, and you should get text
- ParseContext pc = new ParseContext();
- TesseractOCRConfig tesseractOCRConfig = new TesseractOCRConfig();
- pc.set(TesseractOCRConfig.class, tesseractOCRConfig);
- text = getText(getResourceAsStream("/test-documents/testOCR.pdf"), p, pc);
- assertContains("Happy", text);
+ try (InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/pdf/tika-ocr-config.xml")) {
+ assertNotNull(is);
+ TikaConfig tikaConfig = new TikaConfig(is);
+ Parser p = new AutoDetectParser(tikaConfig);
+ String text = getText(getResourceAsStream("/test-documents/testOCR.pdf"), p);
+ assertTrue(StringUtils.isAllBlank(text));
+
+ //now override the max file size to ocr, and you should get text
+ ParseContext pc = new ParseContext();
+ TesseractOCRConfig tesseractOCRConfig = new TesseractOCRConfig();
+ pc.set(TesseractOCRConfig.class, tesseractOCRConfig);
+ text = getText(getResourceAsStream("/test-documents/testOCR.pdf"), p, pc);
+ assertContains("Happy", text);
+ }
}
@Test
public void testInitializationViaConfig() throws Exception {
- InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/pdf/tika-config.xml");
- assertNotNull(is);
- TikaConfig tikaConfig = new TikaConfig(is);
- Parser p = new AutoDetectParser(tikaConfig);
- String text = getText(getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf"), p);
- text = text.replaceAll("\\s+", " ");
-
- // Column text is now interleaved:
- assertContains("Left column line 1 Right column line 1 Left colu mn line 2 Right column line 2", text);
+ try (InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/pdf/tika-config.xml")) {
+ assertNotNull(is);
+ TikaConfig tikaConfig = new TikaConfig(is);
+ Parser p = new AutoDetectParser(tikaConfig);
+ String text = getText(getResourceAsStream("/test-documents/testPDFTwoTextBoxes.pdf"), p);
+ text = text.replaceAll("\\s+", " ");
+
+ // Column text is now interleaved:
+ assertContains("Left column line 1 Right column line 1 Left colu mn line 2 Right column line 2", text);
+ }
}
@Test
public void testInitializationOfNonPrimitivesViaConfig() throws Exception {
- InputStream is = getClass().getResourceAsStream("/org/apache/tika/parser/pdf/tika-config-non-primitives.xml");
- assertNotNull(is);
- TikaConfig tikaConfig = new TikaConfig(is);
- AutoDetectParser p = new AutoDetectParser(tikaConfig);
- Map<MediaType, Parser> parsers = p.getParsers();
- Parser composite = parsers.get(MediaType.application("pdf"));
- Parser pdfParser = ((CompositeParser)composite).getParsers().get(MediaType.application("pdf"));
- assertEquals("org.apache.tika.parser.pdf.PDFParser", pdfParser.getClass().getName());
- assertEquals(PDFParserConfig.OCR_STRATEGY.OCR_ONLY, ((PDFParser)pdfParser).getPDFParserConfig().getOcrStrategy());
- assertEquals(ImageType.RGB, ((PDFParser)pdfParser).getPDFParserConfig().getOcrImageType());
-
+ try (InputStream is =
+ getClass().getResourceAsStream("/org/apache/tika/parser/pdf/tika-config-non-primitives.xml")) {
+ assertNotNull(is);
+ TikaConfig tikaConfig = new TikaConfig(is);
+ AutoDetectParser p = new AutoDetectParser(tikaConfig);
+ Map<MediaType, Parser> parsers = p.getParsers();
+ Parser composite = parsers.get(MediaType.application("pdf"));
+ Parser pdfParser = ((CompositeParser)composite).getParsers().get(MediaType.application("pdf"));
+ assertEquals("org.apache.tika.parser.pdf.PDFParser", pdfParser.getClass().getName());
+ assertEquals(PDFParserConfig.OCR_STRATEGY.OCR_ONLY, ((PDFParser)pdfParser).getPDFParserConfig().getOcrStrategy());
+ assertEquals(ImageType.RGB, ((PDFParser)pdfParser).getPDFParserConfig().getOcrImageType());
+ }
}
@Test
diff --git a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java
index cbdf251..7477a49 100644
--- a/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java
+++ b/tika-parser-modules/tika-parser-text-module/src/test/java/org/apache/tika/parser/txt/CharsetDetectorTest.java
@@ -66,7 +66,9 @@ public class CharsetDetectorTest extends TikaTest {
public void testWin125XHeuristics() throws Exception {
//TIKA-2219
CharsetDetector detector = new CharsetDetector();
- detector.setText(getResourceAsStream("/test-documents/testTXT_win-1252.txt"));
+ try (InputStream is = getResourceAsStream("/test-documents/testTXT_win-1252.txt")) {
+ detector.setText(is);
+ }
CharsetMatch charset = detector.detect();
assertEquals("windows-1252", charset.getName());
}
diff --git a/tika-parser-modules/tika-parser-xmp-commons/src/test/java/org/apache/tika/parser/xmp/JempboxExtractorTest.java b/tika-parser-modules/tika-parser-xmp-commons/src/test/java/org/apache/tika/parser/xmp/JempboxExtractorTest.java
index 2aa5532..37b7b25 100644
--- a/tika-parser-modules/tika-parser-xmp-commons/src/test/java/org/apache/tika/parser/xmp/JempboxExtractorTest.java
+++ b/tika-parser-modules/tika-parser-xmp-commons/src/test/java/org/apache/tika/parser/xmp/JempboxExtractorTest.java
@@ -36,59 +36,63 @@ public class JempboxExtractorTest extends TikaTest {
@Test
public void testParseJpeg() throws IOException, TikaException {
Metadata metadata = new Metadata();
- InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg");
- // set some values before extraction to see that they are overridden
- metadata.set(TikaCoreProperties.TITLE, "old title");
- metadata.set(TikaCoreProperties.DESCRIPTION, "old description");
- metadata.set(TikaCoreProperties.CREATOR, "previous author");
- // ... or kept in case the field is multi-value
- metadata.add(TikaCoreProperties.SUBJECT, "oldkeyword");
-
- JempboxExtractor extractor = new JempboxExtractor(metadata);
- extractor.parse(stream);
-
- // DublinCore fields
- assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
- assertEquals("Some Tourist", metadata.get(TikaCoreProperties.CREATOR));
- Collection<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
- assertTrue(keywords.contains("oldkeyword"));
- assertTrue(keywords.contains("grazelands"));
- assertTrue(keywords.contains("nature reserve"));
- assertTrue(keywords.contains("bird watching"));
- assertTrue(keywords.contains("coast"));
+ try (InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg")) {
+ // set some values before extraction to see that they are overridden
+ metadata.set(TikaCoreProperties.TITLE, "old title");
+ metadata.set(TikaCoreProperties.DESCRIPTION, "old description");
+ metadata.set(TikaCoreProperties.CREATOR, "previous author");
+ // ... or kept in case the field is multi-value
+ metadata.add(TikaCoreProperties.SUBJECT, "oldkeyword");
+
+ JempboxExtractor extractor = new JempboxExtractor(metadata);
+ extractor.parse(stream);
+
+ // DublinCore fields
+ assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
+ assertEquals("Some Tourist", metadata.get(TikaCoreProperties.CREATOR));
+ Collection<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
+ assertTrue(keywords.contains("oldkeyword"));
+ assertTrue(keywords.contains("grazelands"));
+ assertTrue(keywords.contains("nature reserve"));
+ assertTrue(keywords.contains("bird watching"));
+ assertTrue(keywords.contains("coast"));
+ }
}
@Test
public void testParseJpegPhotoshop() throws IOException, TikaException {
Metadata metadata = new Metadata();
- InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_commented_pspcs2mac.jpg");
-
- JempboxExtractor extractor = new JempboxExtractor(metadata);
- extractor.parse(stream);
-
- // DublinCore fields
- assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
- assertEquals("Some Tourist", metadata.get(TikaCoreProperties.CREATOR));
- Collection<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
- assertTrue(keywords.contains("bird watching"));
- assertTrue(keywords.contains("coast"));
+ try (InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_commented_pspcs2mac.jpg")) {
+ JempboxExtractor extractor = new JempboxExtractor(metadata);
+ extractor.parse(stream);
+
+ // DublinCore fields
+ assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
+ assertEquals("Some Tourist", metadata.get(TikaCoreProperties.CREATOR));
+ Collection<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
+ assertTrue(keywords.contains("bird watching"));
+ assertTrue(keywords.contains("coast"));
+ }
}
@Test
public void testParseJpegXnviewmp() throws IOException, TikaException {
Metadata metadata = new Metadata();
- InputStream stream = getClass().getResourceAsStream("/test-documents/testJPEG_commented_xnviewmp026.jpg");
-
- JempboxExtractor extractor = new JempboxExtractor(metadata);
- extractor.parse(stream);
-
- // XnViewMp fields not understood by Jempbox
- assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
- Collection<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
- assertTrue(keywords.contains("coast"));
- assertTrue(keywords.contains("nature reserve"));
+ try (InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_commented_xnviewmp026.jpg")) {
+ JempboxExtractor extractor = new JempboxExtractor(metadata);
+ extractor.parse(stream);
+
+ // XnViewMp fields not understood by Jempbox
+ assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)",
+ metadata.get(TikaCoreProperties.DESCRIPTION));
+ Collection<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.SUBJECT));
+ assertTrue(keywords.contains("coast"));
+ assertTrue(keywords.contains("nature reserve"));
+ }
}
@Test
@@ -106,13 +110,17 @@ public class JempboxExtractorTest extends TikaTest {
try {
Metadata m = new Metadata();
JempboxExtractor ex = new JempboxExtractor(m);
- ex.parse(getResourceAsStream("/test-documents/testXMP.xmp"));
+ try (InputStream is = getResourceAsStream("/test-documents/testXMP.xmp")) {
+ ex.parse(is);
+ }
assertEquals(7, m.getValues(XMPMM.HISTORY_EVENT_INSTANCEID).length);
JempboxExtractor.setMaxXMPMMHistory(5);
m = new Metadata();
ex = new JempboxExtractor(m);
- ex.parse(getResourceAsStream("/test-documents/testXMP.xmp"));
+ try (InputStream is = getResourceAsStream("/test-documents/testXMP.xmp")) {
+ ex.parse(is);
+ }
assertEquals(5, m.getValues(XMPMM.HISTORY_EVENT_INSTANCEID).length);
} finally {
//if something goes wrong, make sure to set this back to what it was
diff --git a/tika-parsers-advanced/tika-age-recogniser/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java b/tika-parsers-advanced/tika-age-recogniser/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java
index 10e5914..9f878ac 100644
--- a/tika-parsers-advanced/tika-age-recogniser/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java
+++ b/tika-parsers-advanced/tika-age-recogniser/src/test/java/org/apache/tika/parser/recognition/AgeRecogniserTest.java
@@ -20,6 +20,7 @@ import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import java.io.ByteArrayInputStream;
+import java.io.InputStream;
import java.nio.charset.Charset;
import org.apache.tika.Tika;
@@ -58,17 +59,22 @@ public class AgeRecogniserTest extends TikaTest {
public void testAgeRecogniser() throws Exception {
//test config is added to resources directory
- TikaConfig config = new TikaConfig(getClass().getResourceAsStream(CONFIG_FILE));
- Tika tika = new Tika(config);
-
- Metadata md = new Metadata();
- tika.parse(new ByteArrayInputStream(TEST_TEXT.getBytes(Charset.defaultCharset())), md);
-
- Assert.assertArrayEquals("Age Parser not invoked.",new String[] {CompositeParser.class.getCanonicalName(),
- AgeRecogniser.class.getCanonicalName()} , md.getValues("X-Parsed-By"));
- Assert.assertArrayEquals("Wrong age predicted.", new String[] {Double.toString(TEST_AGE)} , md.getValues(AgeRecogniser.MD_KEY_ESTIMATED_AGE));
-
-
+ try (InputStream is = getClass().getResourceAsStream(CONFIG_FILE);
+ InputStream bis = new ByteArrayInputStream(TEST_TEXT.getBytes(Charset.defaultCharset()));
+ ) {
+ TikaConfig config = new TikaConfig(is);
+ Tika tika = new Tika(config);
+
+ Metadata md = new Metadata();
+ tika.parse(bis, md);
+
+ Assert.assertArrayEquals("Age Parser not invoked.",
+ new String[] {CompositeParser.class.getCanonicalName(), AgeRecogniser.class.getCanonicalName()},
+ md.getValues("X-Parsed-By"));
+ Assert.assertArrayEquals("Wrong age predicted.",
+ new String[] {Double.toString(TEST_AGE)},
+ md.getValues(AgeRecogniser.MD_KEY_ESTIMATED_AGE));
+ }
}
}
\ No newline at end of file
diff --git a/tika-parsers-advanced/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JInceptionV3NetTest.java b/tika-parsers-advanced/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JInceptionV3NetTest.java
index 4d5b340..50f9867 100644
--- a/tika-parsers-advanced/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JInceptionV3NetTest.java
+++ b/tika-parsers-advanced/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JInceptionV3NetTest.java
@@ -24,13 +24,15 @@ import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.junit.Test;
+import java.io.InputStream;
+
public class DL4JInceptionV3NetTest {
@Test
public void recognise() throws Exception {
TikaConfig config = null;
- try {
- config = new TikaConfig(getClass().getResourceAsStream("dl4j-inception3-config.xml"));
+ try (InputStream is = getClass().getResourceAsStream("dl4j-inception3-config.xml")) {
+ config = new TikaConfig(is);
} catch (Exception e) {
if (e.getMessage() != null
&& (e.getMessage().contains("Connection refused")
@@ -42,12 +44,15 @@ public class DL4JInceptionV3NetTest {
assumeTrue("something went wrong loading tika config", config != null);
Tika tika = new Tika(config);
Metadata md = new Metadata();
- tika.parse(getClass().getResourceAsStream("cat.jpg"), md);
+ try (InputStream is = getClass().getResourceAsStream("cat.jpg")) {
+ tika.parse(is, md);
+ }
String[] objects = md.getValues("OBJECT");
boolean found = false;
for (String object : objects) {
if (object.contains("_cat")) {
found = true;
+ break;
}
}
assertTrue(found);
diff --git a/tika-parsers-advanced/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JVGG16NetTest.java b/tika-parsers-advanced/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JVGG16NetTest.java
index a0915ec..e88c3e6 100644
--- a/tika-parsers-advanced/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JVGG16NetTest.java
+++ b/tika-parsers-advanced/tika-dl/src/test/java/org/apache/tika/dl/imagerec/DL4JVGG16NetTest.java
@@ -31,9 +31,8 @@ public class DL4JVGG16NetTest {
@Test
public void recognise() throws Exception {
TikaConfig config = null;
- InputStream is = getClass().getResourceAsStream("dl4j-vgg16-config.xml");
- try {
- config = new TikaConfig(getClass().getResourceAsStream("dl4j-vgg16-config.xml"));
+ try (InputStream is = getClass().getResourceAsStream("dl4j-vgg16-config.xml")) {
+ config = new TikaConfig(is);
} catch (Exception e) {
if (e.getMessage() != null
&& (e.getMessage().contains("Connection refused")
@@ -46,12 +45,15 @@ public class DL4JVGG16NetTest {
assumeTrue("something went wrong loading tika config", config != null);
Tika tika = new Tika(config);
Metadata md = new Metadata();
- tika.parse(getClass().getResourceAsStream("lion.jpg"), md);
+ try (InputStream is = getClass().getResourceAsStream("lion.jpg")) {
+ tika.parse(is, md);
+ }
String[] objects = md.getValues("OBJECT");
boolean found = false;
for (String object : objects) {
if (object.contains("lion")) {
found = true;
+ break;
}
}
assertTrue(found);
diff --git a/tika-parsers-advanced/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/ner/NamedEntityParserTest.java b/tika-parsers-advanced/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/ner/NamedEntityParserTest.java
index 6e17415..d5adde0 100644
--- a/tika-parsers-advanced/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/ner/NamedEntityParserTest.java
+++ b/tika-parsers-advanced/tika-parser-nlp-module/src/test/java/org/apache/tika/parser/ner/NamedEntityParserTest.java
@@ -25,6 +25,7 @@ import org.apache.tika.parser.ner.regex.RegexNERecogniser;
import org.junit.Test;
import java.io.ByteArrayInputStream;
+import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.HashSet;
@@ -42,50 +43,51 @@ public class NamedEntityParserTest extends TikaTest {
public void testParse() throws Exception {
//test config is added to resources directory
- TikaConfig config = new TikaConfig(getClass().getResourceAsStream(CONFIG_FILE));
- Tika tika = new Tika(config);
- String text = "I am student at University of Southern California (USC)," +
- " located in Los Angeles . USC's football team is called by name Trojans." +
- " Mr. John McKay was a head coach of the team from 1960 - 1975";
- Metadata md = new Metadata();
- tika.parse(new ByteArrayInputStream(text.getBytes(Charset.defaultCharset())), md);
+ try (InputStream is = getClass().getResourceAsStream(CONFIG_FILE)) {
+ TikaConfig config = new TikaConfig(is);
+ Tika tika = new Tika(config);
+ String text = "I am student at University of Southern California (USC)," +
+ " located in Los Angeles . USC's football team is called by name Trojans." +
+ " Mr. John McKay was a head coach of the team from 1960 - 1975";
+ Metadata md = new Metadata();
+ tika.parse(new ByteArrayInputStream(text.getBytes(Charset.defaultCharset())), md);
- HashSet<String> set = new HashSet<String>();
- set.addAll(Arrays.asList(md.getValues("X-Parsed-By")));
- assumeTrue(set.contains(NamedEntityParser.class.getName()));
+ HashSet<String> set = new HashSet<String>();
+ set.addAll(Arrays.asList(md.getValues("X-Parsed-By")));
+ assumeTrue(set.contains(NamedEntityParser.class.getName()));
- set.clear();
- set.addAll(Arrays.asList(md.getValues("NER_PERSON")));
- assumeTrue(set.contains("John McKay"));
+ set.clear();
+ set.addAll(Arrays.asList(md.getValues("NER_PERSON")));
+ assumeTrue(set.contains("John McKay"));
- set.clear();
- set.addAll(Arrays.asList(md.getValues("NER_LOCATION")));
- assumeTrue(set.contains("Los Angeles"));
+ set.clear();
+ set.addAll(Arrays.asList(md.getValues("NER_LOCATION")));
+ assumeTrue(set.contains("Los Angeles"));
- set.clear();
- set.addAll(Arrays.asList(md.getValues("NER_ORGANIZATION")));
- assumeTrue(set.contains("University of Southern California"));
-
- set.clear();
- set.addAll(Arrays.asList(md.getValues("NER_DATE")));
- assumeTrue(set.contains("1960 - 1975"));
+ set.clear();
+ set.addAll(Arrays.asList(md.getValues("NER_ORGANIZATION")));
+ assumeTrue(set.contains("University of Southern California"));
+ set.clear();
+ set.addAll(Arrays.asList(md.getValues("NER_DATE")));
+ assumeTrue(set.contains("1960 - 1975"));
+ }
}
@Test
public void testNerChain() throws Exception {
- String classNames = OpenNLPNERecogniser.class.getName()
- + "," + RegexNERecogniser.class.getName();
+ String classNames = OpenNLPNERecogniser.class.getName() + "," + RegexNERecogniser.class.getName();
System.setProperty(NamedEntityParser.SYS_PROP_NER_IMPL, classNames);
- TikaConfig config = new TikaConfig(getClass().getResourceAsStream(CONFIG_FILE));
- Tika tika = new Tika(config);
- String text = "University of Southern California (USC), is located in Los Angeles ." +
- " Campus is busy from monday to saturday";
- Metadata md = new Metadata();
- tika.parse(new ByteArrayInputStream(text.getBytes(Charset.defaultCharset())), md);
- HashSet<String> keys = new HashSet<>(Arrays.asList(md.names()));
- assumeTrue(keys.contains("NER_WEEK_DAY"));
- assumeTrue(keys.contains("NER_LOCATION"));
-
+ try (InputStream is = getClass().getResourceAsStream(CONFIG_FILE)) {
+ TikaConfig config = new TikaConfig(is);
+ Tika tika = new Tika(config);
+ String text = "University of Southern California (USC), is located in Los Angeles ." +
+ " Campus is busy from monday to saturday";
+ Metadata md = new Metadata();
+ tika.parse(new ByteArrayInputStream(text.getBytes(Charset.defaultCharset())), md);
+ HashSet<String> keys = new HashSet<>(Arrays.asList(md.names()));
+ assumeTrue(keys.contains("NER_WEEK_DAY"));
+ assumeTrue(keys.contains("NER_LOCATION"));
+ }
}
}
\ No newline at end of file
diff --git a/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/java/org/apache/tika/parser/sqlite3/SQLite3ParserTest.java b/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/java/org/apache/tika/parser/sqlite3/SQLite3ParserTest.java
index 9d2f36a..093f0f1 100644
--- a/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/java/org/apache/tika/parser/sqlite3/SQLite3ParserTest.java
+++ b/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/java/org/apache/tika/parser/sqlite3/SQLite3ParserTest.java
@@ -58,10 +58,14 @@ public class SQLite3ParserTest extends TikaTest {
try (InputStream stream = getResourceAsStream(TEST_FILE1)) {
_testBasic(stream);
}
- ByteArrayOutputStream bos = new ByteArrayOutputStream();
- IOUtils.copy(getResourceAsStream(TEST_FILE1), bos);
- try (InputStream stream = new ByteArrayInputStream(bos.toByteArray())) {
- _testBasic(stream);
+
+ try (InputStream is = getResourceAsStream(TEST_FILE1);
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ ) {
+ IOUtils.copy(is, bos);
+ try (InputStream stream = new ByteArrayInputStream(bos.toByteArray())) {
+ _testBasic(stream);
+ }
}
try (TikaInputStream outer = TikaInputStream.get(getResourceAsStream(TEST_FILE1))) {
try (TikaInputStream inner = TikaInputStream.get(outer.getFile())) {
diff --git a/tika-server/src/main/java/org/apache/tika/server/HTMLHelper.java b/tika-server/src/main/java/org/apache/tika/server/HTMLHelper.java
index 66f3e13..a7e8907 100644
--- a/tika-server/src/main/java/org/apache/tika/server/HTMLHelper.java
+++ b/tika-server/src/main/java/org/apache/tika/server/HTMLHelper.java
@@ -37,15 +37,16 @@ public class HTMLHelper {
private String POST_BODY;
public HTMLHelper() {
- InputStream htmlStr = getClass().getResourceAsStream(PATH);
- if (htmlStr == null) {
- throw new IllegalArgumentException("Template Not Found - " + PATH);
- }
- try {
+ try (InputStream htmlStr = getClass().getResourceAsStream(PATH)) {
+ if (htmlStr == null) {
+ throw new IllegalArgumentException("Template Not Found - " + PATH);
+ }
+
String html = IOUtils.toString(htmlStr, UTF_8);
int bodyAt = html.indexOf(BODY_VAR);
PRE_BODY = html.substring(0, bodyAt);
POST_BODY = html.substring(bodyAt + BODY_VAR.length());
+
} catch (IOException e) {
throw new IllegalStateException("Unable to read template");
}