You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/04/09 15:04:37 UTC

[tika] branch main updated (4bd278f -> 149e991)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git.


    from 4bd278f  don't close readers/writers in json serialization
     new a5d8592  fix jdbcfetchiterator unit test to block on adding to queue
     new 7b34dfe  close json writer in tikacli
     new 52a8367  close json writer in tika-server
     new 149e991  TIKA-3350 use the file if supplied via TikaInputStream

The 4 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java       |  5 +++--
 .../src/main/java/org/apache/tika/parser/pdf/PDFParser.java   |  3 ++-
 .../tika/pipes/fetchiterator/jdbc/TestJDBCFetchIterator.java  | 11 ++++++++---
 .../apache/tika/server/core/writer/JSONMessageBodyWriter.java | 10 +++++-----
 .../org/apache/tika/server/core/writer/JSONObjWriter.java     |  8 ++++----
 .../server/core/writer/MetadataListMessageBodyWriter.java     |  6 +++---
 6 files changed, 25 insertions(+), 18 deletions(-)

[tika] 02/04: close json writer in tikacli

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 7b34dfe54dce36a9990fc56e95f2e4c1a3306de7
Author: tallison <ta...@apache.org>
AuthorDate: Fri Apr 9 10:23:36 2021 -0400

    close json writer in tikacli
---
 tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 47ba493..afdc332 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -530,8 +530,9 @@ public class TikaCLI {
             wrapper.parse(input, handler, metadata, context);
         }
         JsonMetadataList.setPrettyPrinting(prettyPrint);
-        Writer writer = getOutputWriter(output, encoding);
-        JsonMetadataList.toJson(handler.getMetadataList(), writer);
+        try(Writer writer = getOutputWriter(output, encoding)) {
+            JsonMetadataList.toJson(handler.getMetadataList(), writer);
+        }
     }
 
     private ContentHandlerFactory getContentHandlerFactory(OutputType type) {

[tika] 04/04: TIKA-3350 use the file if supplied via TikaInputStream

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 149e991442ef534cadea778184e496f1c2aa610c
Author: tallison <ta...@apache.org>
AuthorDate: Fri Apr 9 11:03:59 2021 -0400

    TIKA-3350 use the file if supplied via TikaInputStream
---
 .../src/main/java/org/apache/tika/parser/pdf/PDFParser.java            | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
index 9f817c3..9f4ff52 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/PDFParser.java
@@ -144,7 +144,8 @@ public class PDFParser extends AbstractParser implements Initializable {
             if (tstream != null && tstream.hasFile()) {
                 // File based -- send file directly to PDFBox
                 pdfDocument =
-                        getPDDocument(tstream, password, memoryUsageSetting, metadata, context);
+                        getPDDocument(tstream.getPath(), password,
+                                memoryUsageSetting, metadata, context);
             } else {
                 pdfDocument = getPDDocument(new CloseShieldInputStream(stream), password,
                         memoryUsageSetting, metadata, context);

[tika] 01/04: fix jdbcfetchiterator unit test to block on adding to queue

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit a5d8592e293c73c0807e66fb29ad6b8fee529a5b
Author: tallison <ta...@apache.org>
AuthorDate: Fri Apr 9 09:24:14 2021 -0400

    fix jdbcfetchiterator unit test to block on adding to queue
---
 .../tika/pipes/fetchiterator/jdbc/TestJDBCFetchIterator.java  | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-jdbc/src/test/java/org/apache/tika/pipes/fetchiterator/jdbc/TestJDBCFetchIterator.java b/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-jdbc/src/test/java/org/apache/tika/pipes/fetchiterator/jdbc/TestJDBCFetchIterator.java
index 7eebe31..e7773fe 100644
--- a/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-jdbc/src/test/java/org/apache/tika/pipes/fetchiterator/jdbc/TestJDBCFetchIterator.java
+++ b/tika-pipes/tika-fetch-iterators/tika-fetch-iterator-jdbc/src/test/java/org/apache/tika/pipes/fetchiterator/jdbc/TestJDBCFetchIterator.java
@@ -91,8 +91,9 @@ public class TestJDBCFetchIterator {
     public void testSimple() throws Exception {
         TikaConfig tk = getConfig();
         int numConsumers = 5;
+
         FetchIterator fetchIterator = tk.getFetchIterator();
-        ExecutorService es = Executors.newFixedThreadPool(numConsumers + 1);
+        ExecutorService es = Executors.newFixedThreadPool(numConsumers);
         ExecutorCompletionService<Integer> completionService =
                 new ExecutorCompletionService<>(es);
         ArrayBlockingQueue<FetchEmitTuple> queue = new ArrayBlockingQueue<>(100);
@@ -102,9 +103,12 @@ public class TestJDBCFetchIterator {
             fetchers.add(mockFetcher);
             completionService.submit(mockFetcher);
         }
+        int offered = 0;
         for (FetchEmitTuple t : fetchIterator) {
-            queue.offer(t);
+            queue.put(t);
+            offered++;
         }
+        assertEquals(NUM_ROWS, offered);
         for (int i = 0; i < numConsumers; i++) {
             queue.offer(FetchIterator.COMPLETED_SEMAPHORE);
         }
@@ -112,7 +116,8 @@ public class TestJDBCFetchIterator {
         int completed = 0;
         while (completed < numConsumers) {
             Future<Integer> f = completionService.take();
-            processed += f.get();
+            int fetched = f.get();
+            processed += fetched;
             completed++;
         }
         assertEquals(NUM_ROWS, processed);

[tika] 03/04: close json writer in tika-server

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 52a8367f2f67bf58934d34dfdd77d2058b3536b0
Author: tallison <ta...@apache.org>
AuthorDate: Fri Apr 9 10:47:00 2021 -0400

    close json writer in tika-server
---
 .../apache/tika/server/core/writer/JSONMessageBodyWriter.java  | 10 +++++-----
 .../java/org/apache/tika/server/core/writer/JSONObjWriter.java |  8 ++++----
 .../tika/server/core/writer/MetadataListMessageBodyWriter.java |  6 +++---
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONMessageBodyWriter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONMessageBodyWriter.java
index e975b82..965970f 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONMessageBodyWriter.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONMessageBodyWriter.java
@@ -52,11 +52,11 @@ public class JSONMessageBodyWriter implements MessageBodyWriter<Metadata> {
     @Override
     public void writeTo(Metadata metadata, Class<?> type, Type genericType,
                         Annotation[] annotations, MediaType mediaType,
-                        MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream)
+                        MultivaluedMap<String, Object> httpHeaders,
+                        OutputStream entityStream)
             throws IOException, WebApplicationException {
-        Writer writer = new OutputStreamWriter(entityStream, UTF_8);
-        JsonMetadata.toJson(metadata, writer);
-        writer.flush();
-        entityStream.flush();
+        try (Writer writer = new OutputStreamWriter(entityStream, UTF_8)) {
+            JsonMetadata.toJson(metadata, writer);
+        }
     }
 }
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONObjWriter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONObjWriter.java
index 30354a0..2cc7d17 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONObjWriter.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/JSONObjWriter.java
@@ -57,9 +57,9 @@ public class JSONObjWriter implements MessageBodyWriter<Map<String, Object>> {
                         Annotation[] annotations, MediaType mediaType,
                         MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream)
             throws IOException, WebApplicationException {
-        Writer writer = new OutputStreamWriter(entityStream, UTF_8);
-        ObjectMapper objectMapper = new ObjectMapper();
-        objectMapper.writerWithDefaultPrettyPrinter().writeValue(writer, map);
-        entityStream.flush();
+        try (Writer writer = new OutputStreamWriter(entityStream, UTF_8)) {
+            ObjectMapper objectMapper = new ObjectMapper();
+            objectMapper.writerWithDefaultPrettyPrinter().writeValue(writer, map);
+        }
     }
 }
diff --git a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java
index cf59299..a558cd1 100644
--- a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java
+++ b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/writer/MetadataListMessageBodyWriter.java
@@ -57,8 +57,8 @@ public class MetadataListMessageBodyWriter implements MessageBodyWriter<Metadata
                         Annotation[] annotations, MediaType mediaType,
                         MultivaluedMap<String, Object> httpHeaders, OutputStream entityStream)
             throws IOException, WebApplicationException {
-        Writer writer = new OutputStreamWriter(entityStream, UTF_8);
-        JsonMetadataList.toJson(list.getMetadata(), writer);
-        entityStream.flush();
+        try (Writer writer = new OutputStreamWriter(entityStream, UTF_8)) {
+            JsonMetadataList.toJson(list.getMetadata(), writer);
+        }
     }
 }