You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2018/11/13 19:08:46 UTC

[tika] branch master updated (57562aa -> 30c3d81)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git.


    from 57562aa  TIKA-2775 - bulk upgrade dependencies -- backoff minimum maven dependency to 3.1; clean up whitespace in tika-eval's pom
     new 64302aa  prefer System.currentTimeMillis to creating a new Date object, throughout...
     new 30c3d81  TIKA-2778 -- the shutdown method for tika-batch mode should not be typing anything on stdin of the parent process.  Rather, require an interrupt and/or kill signal and then make sure the children are stopped as well.

The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 CHANGES.txt                                        |  4 ++
 .../java/org/apache/tika/batch/BatchProcess.java   | 31 +++++-----
 .../apache/tika/batch/BatchProcessDriverCLI.java   |  2 +-
 .../apache/tika/batch/FileResourceConsumer.java    |  4 +-
 .../org/apache/tika/batch/FileResourceCrawler.java |  4 +-
 .../java/org/apache/tika/batch/FileStarted.java    |  4 +-
 .../java/org/apache/tika/batch/Interrupter.java    | 72 ++++++++++++++--------
 .../apache/tika/batch/InterrupterFutureResult.java |  3 +-
 .../java/org/apache/tika/batch/StatusReporter.java |  6 +-
 .../tika/batch/builders/BatchProcessBuilder.java   |  6 +-
 .../tika/batch/builders/InterrupterBuilder.java    |  4 +-
 .../batch/fs/strawman/StrawManTikaAppDriver.java   |  8 +--
 .../org/apache/tika/metadata/TestMetadata.java     |  2 +-
 .../org/apache/tika/parser/mock/MockParser.java    |  4 +-
 .../java/org/apache/tika/eval/TikaEvalCLITest.java |  6 +-
 .../apache/tika/eval/tokens/TokenCounterTest.java  |  8 +--
 .../java/org/apache/tika/TestCorruptedFiles.java   |  1 -
 .../parser/microsoft/ooxml/OOXMLParserTest.java    |  4 +-
 .../apache/tika/parser/mock/MockParserTest.java    | 16 ++---
 19 files changed, 109 insertions(+), 80 deletions(-)


[tika] 01/02: prefer System.currentTimeMillis to creating a new Date object, throughout...

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 64302aa88bd8de81771712ccafa778a34cefd7c2
Author: TALLISON <ta...@apache.org>
AuthorDate: Tue Nov 13 11:15:44 2018 -0500

    prefer System.currentTimeMillis to creating a new Date object, throughout...
---
 .../main/java/org/apache/tika/batch/BatchProcess.java    | 10 +++++-----
 .../java/org/apache/tika/batch/FileResourceConsumer.java |  4 ++--
 .../java/org/apache/tika/batch/FileResourceCrawler.java  |  4 ++--
 .../src/main/java/org/apache/tika/batch/FileStarted.java |  4 ++--
 .../main/java/org/apache/tika/batch/StatusReporter.java  |  6 +++---
 .../tika/batch/fs/strawman/StrawManTikaAppDriver.java    |  8 ++++----
 .../test/java/org/apache/tika/metadata/TestMetadata.java |  2 +-
 .../java/org/apache/tika/parser/mock/MockParser.java     |  4 ++--
 .../test/java/org/apache/tika/eval/TikaEvalCLITest.java  |  6 +++---
 .../org/apache/tika/eval/tokens/TokenCounterTest.java    |  8 ++++----
 .../test/java/org/apache/tika/TestCorruptedFiles.java    |  1 -
 .../tika/parser/microsoft/ooxml/OOXMLParserTest.java     |  4 ++--
 .../java/org/apache/tika/parser/mock/MockParserTest.java | 16 ++++++++--------
 13 files changed, 38 insertions(+), 39 deletions(-)

diff --git a/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java b/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
index 23887af..82a9c52 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
@@ -173,7 +173,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
         State state = new State();
         LOG.info("BatchProcess starting up");
 
-        state.start = new Date().getTime();
+        state.start = System.currentTimeMillis();
         completionService.submit(interrupter);
         completionService.submit(fileResourceCrawler);
         completionService.submit(reporter);
@@ -335,7 +335,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
             LOG.warn("A parser was still working on >{}< for {} milliseconds after it started. This exceeds the maxTimeoutMillis parameter",
                     fs.getResourceId(), fs.getElapsedMillis());
         }
-        double elapsed = ((double) new Date().getTime() - (double) state.start) / 1000.0;
+        double elapsed = ((double)System.currentTimeMillis() - (double) state.start) / 1000.0;
         int processed = 0;
         int numExceptions = 0;
         for (FileResourceConsumer c : consumersManager.getConsumers()) {
@@ -419,7 +419,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
         if (causeForTermination == CAUSE_FOR_TERMINATION.COMPLETED_NORMALLY) {
             return;
         }
-        long start = new Date().getTime();
+        long start = System.currentTimeMillis();
         while (countActiveConsumers() > 0) {
             try {
                 Thread.sleep(500);
@@ -427,7 +427,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
                 LOG.warn("Thread interrupted while trying to politelyAwaitTermination");
                 return;
             }
-            long elapsed = new Date().getTime()-start;
+            long elapsed = System.currentTimeMillis()-start;
             if (pauseOnEarlyTerminationMillis > -1 &&
                     elapsed > pauseOnEarlyTerminationMillis) {
                 LOG.warn("Waited after an early termination for {}ms, but there was at least one active consumer", elapsed);
@@ -482,7 +482,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
         if (maxAliveTimeSeconds < 0) {
             return false;
         }
-        double elapsedSeconds = (double) (new Date().getTime() - started) / (double) 1000;
+        double elapsedSeconds = (double) (System.currentTimeMillis() - started) / (double) 1000;
         return elapsedSeconds > (double) maxAliveTimeSeconds;
     }
 
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/FileResourceConsumer.java b/tika-batch/src/main/java/org/apache/tika/batch/FileResourceConsumer.java
index 6e9d314..f619397 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/FileResourceConsumer.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/FileResourceConsumer.java
@@ -312,7 +312,7 @@ public abstract class FileResourceConsumer implements Callable<IFileProcessorFut
 
     private FileResource getNextFileResource() throws InterruptedException {
         FileResource fileResource = null;
-        long start = new Date().getTime();
+        long start = System.currentTimeMillis();
         while (fileResource == null) {
             //check to see if thread is interrupted before polling
             if (Thread.currentThread().isInterrupted()) {
@@ -338,7 +338,7 @@ public abstract class FileResourceConsumer implements Callable<IFileProcessorFut
             }
             LOG.debug("{} is waiting for file and the queue size is: {}", consumerId, fileQueue.size());
 
-            long elapsed = new Date().getTime() - start;
+            long elapsed = System.currentTimeMillis() - start;
             if (maxConsecWaitInMillis > 0 && elapsed > maxConsecWaitInMillis) {
                 setEndedState(STATE.EXCEEDED_MAX_CONSEC_WAIT_MILLIS);
                 break;
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/FileResourceCrawler.java b/tika-batch/src/main/java/org/apache/tika/batch/FileResourceCrawler.java
index fe1fd1a..7f6057b 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/FileResourceCrawler.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/FileResourceCrawler.java
@@ -146,7 +146,7 @@ public abstract class FileResourceCrawler implements Callable<IFileProcessorFutu
             return;
         }
         int i = 0;
-        long start = new Date().getTime();
+        long start = System.currentTimeMillis();
         while (queue.offer(new PoisonFileResource(), 1L, TimeUnit.SECONDS)) {
             if (shutDownNoPoison) {
                 LOG.debug("quitting the poison loop because shutDownNoPoison is now true");
@@ -156,7 +156,7 @@ public abstract class FileResourceCrawler implements Callable<IFileProcessorFutu
                 LOG.debug("thread interrupted while trying to add poison");
                 return;
             }
-            long elapsed = new Date().getTime() - start;
+            long elapsed = System.currentTimeMillis() - start;
             if (maxConsecWaitInMillis > -1 && elapsed > maxConsecWaitInMillis) {
                 LOG.error("Crawler timed out while trying to add poison");
                 return;
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/FileStarted.java b/tika-batch/src/main/java/org/apache/tika/batch/FileStarted.java
index 3a8d4f4..53ea087 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/FileStarted.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/FileStarted.java
@@ -34,7 +34,7 @@ class FileStarted {
      * @param resourceId string for unique resource id
      */
     public FileStarted(String resourceId) {
-        this(resourceId, new Date().getTime());
+        this(resourceId, System.currentTimeMillis());
     }
 
     public FileStarted(String resourceId, long started) {
@@ -62,7 +62,7 @@ class FileStarted {
      * file resource
      */
     public long getElapsedMillis() {
-        long now = new Date().getTime();
+        long now = System.currentTimeMillis();
         return now - started;
     }
 
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/StatusReporter.java b/tika-batch/src/main/java/org/apache/tika/batch/StatusReporter.java
index 7772d6c..0147ac5 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/StatusReporter.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/StatusReporter.java
@@ -58,7 +58,7 @@ public class StatusReporter implements Callable<IFileProcessorFutureResult> {
     public StatusReporter(FileResourceCrawler crawler, ConsumersManager consumersManager) {
         this.consumersManager = consumersManager;
         this.crawler = crawler;
-        start = new Date().getTime();
+        start = System.currentTimeMillis();
     }
 
     /**
@@ -82,11 +82,11 @@ public class StatusReporter implements Callable<IFileProcessorFutureResult> {
                 Thread.sleep(sleepMillis);
                 int cnt = getRoughCountConsumed();
                 int exceptions = getRoughCountExceptions();
-                long elapsed = new Date().getTime() - start;
+                long elapsed = System.currentTimeMillis() - start;
                 double elapsedSecs = (double) elapsed / (double) 1000;
                 int avg = (elapsedSecs > 5 || cnt > 100) ? (int) ((double) cnt / elapsedSecs) : -1;
 
-                String elapsedString = DurationFormatUtils.formatMillis(new Date().getTime() - start);
+                String elapsedString = DurationFormatUtils.formatMillis(System.currentTimeMillis() - start);
                 String docsPerSec = avg > -1 ? String.format(Locale.ROOT,
                         " (%s docs per sec)",
                         numberFormat.format(avg)) : "";
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java b/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java
index 4fef6c8..1814e87 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/fs/strawman/StrawManTikaAppDriver.java
@@ -171,7 +171,7 @@ public class StrawManTikaAppDriver implements Callable<Integer> {
 
     @Override
     public Integer call() throws Exception {
-        long start = new Date().getTime();
+        long start = System.currentTimeMillis();
         TikaVisitor v = new TikaVisitor();
         if (fileList != null) {
             TikaVisitor tikaVisitor = new TikaVisitor();
@@ -195,7 +195,7 @@ public class StrawManTikaAppDriver implements Callable<Integer> {
             Files.walkFileTree(inputRoot, v);
         }
         int processed = v.getProcessed();
-        double elapsedSecs = ((double)new Date().getTime()-(double)start)/(double)1000;
+        double elapsedSecs = ((double)System.currentTimeMillis()-(double)start)/(double)1000;
         LOG.info("Finished processing {} files in {} seconds.", processed, elapsedSecs);
         return processed;
     }
@@ -211,7 +211,7 @@ public class StrawManTikaAppDriver implements Callable<Integer> {
     }
 
     public static void main(String[] args) {
-        long start = new Date().getTime();
+        long start = System.currentTimeMillis();
         if (args.length < 6) {
             System.err.println(StrawManTikaAppDriver.usage());
         }
@@ -253,7 +253,7 @@ public class StrawManTikaAppDriver implements Callable<Integer> {
                 LOG.error(e.getMessage(), e);
             }
         }
-        double elapsedSeconds = (double)(new Date().getTime() - start) / (double)1000;
+        double elapsedSeconds = (double)(System.currentTimeMillis() - start) / (double)1000;
         LOG.info("Processed {} in {} seconds", totalFilesProcessed, elapsedSeconds);
         ex.shutdownNow();
     }
diff --git a/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java b/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
index 78db151..df81c42 100644
--- a/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
+++ b/tika-core/src/test/java/org/apache/tika/metadata/TestMetadata.java
@@ -409,7 +409,7 @@ public class TestMetadata {
         public Integer call() throws Exception {
             for (int i = 0; i < 1000; i++) {
                 Metadata m = new Metadata();
-                long start = new Date().getTime();
+                long start = System.currentTimeMillis();
                 start += random.nextInt(1000000);
                 Date now = new Date(start);
                 DateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US);
diff --git a/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java b/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
index 87f8d89..479a38e 100644
--- a/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
+++ b/tika-core/src/test/java/org/apache/tika/parser/mock/MockParser.java
@@ -350,7 +350,7 @@ public class MockParser extends AbstractParser {
     }
 
     private void sleep(long maxMillis, boolean isInterruptible) {
-        long start = new Date().getTime();
+        long start = System.currentTimeMillis();
         long millisRemaining = maxMillis;
         while (true) {
             try {
@@ -360,7 +360,7 @@ public class MockParser extends AbstractParser {
                     return;
                 }
             }
-            long elapsed = new Date().getTime()-start;
+            long elapsed = System.currentTimeMillis()-start;
             millisRemaining = maxMillis - elapsed;
             if (millisRemaining <= 0) {
                 break;
diff --git a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
index 8151508..ea06f70 100644
--- a/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
+++ b/tika-eval/src/test/java/org/apache/tika/eval/TikaEvalCLITest.java
@@ -211,8 +211,8 @@ public class TikaEvalCLITest extends TikaTest {
         pb.redirectOutput(ProcessBuilder.Redirect.INHERIT);
         pb.redirectError(ProcessBuilder.Redirect.INHERIT);
         Process process = pb.start();
-        long started = new Date().getTime();
-        long elapsed = new Date().getTime()-started;
+        long started = System.currentTimeMillis();
+        long elapsed = System.currentTimeMillis()-started;
         int exitValue = Integer.MIN_VALUE;
         while (elapsed < maxMillis && exitValue == Integer.MIN_VALUE) {
             try {
@@ -220,7 +220,7 @@ public class TikaEvalCLITest extends TikaTest {
             } catch (IllegalThreadStateException e) {
 
             }
-            elapsed = new Date().getTime()-started;
+            elapsed = System.currentTimeMillis()-started;
         }
         if (exitValue == Integer.MIN_VALUE) {
             process.destroy();
diff --git a/tika-eval/src/test/java/org/apache/tika/eval/tokens/TokenCounterTest.java b/tika-eval/src/test/java/org/apache/tika/eval/tokens/TokenCounterTest.java
index 9c6325d..e32f7af 100644
--- a/tika-eval/src/test/java/org/apache/tika/eval/tokens/TokenCounterTest.java
+++ b/tika-eval/src/test/java/org/apache/tika/eval/tokens/TokenCounterTest.java
@@ -64,16 +64,16 @@ public class TokenCounterTest {
         int numberOfTests = 100;
         for (int i = 0; i < numberOfTests; i++) {
             String s = generateString();
-            long start = new Date().getTime();
+            long start = System.currentTimeMillis();
             TokenCounter counter = new TokenCounter(analyzerManager.getGeneralAnalyzer());
             counter.add(FIELD, s);
-            simple += new Date().getTime()-start;
+            simple += System.currentTimeMillis()-start;
             TokenStatistics simpleTokenStatistics = counter.getTokenStatistics(FIELD);
 
-            start = new Date().getTime();
+            start = System.currentTimeMillis();
             LuceneTokenCounter tokenCounter = new LuceneTokenCounter(analyzerManager.getGeneralAnalyzer());
             tokenCounter.add(FIELD, s);
-            lucene += new Date().getTime()-start;
+            lucene += System.currentTimeMillis()-start;
             assertEquals(s, simpleTokenStatistics, tokenCounter.getTokenStatistics(FIELD));
         }
     }
diff --git a/tika-parsers/src/test/java/org/apache/tika/TestCorruptedFiles.java b/tika-parsers/src/test/java/org/apache/tika/TestCorruptedFiles.java
index 6b06f2f..92c94bd 100644
--- a/tika-parsers/src/test/java/org/apache/tika/TestCorruptedFiles.java
+++ b/tika-parsers/src/test/java/org/apache/tika/TestCorruptedFiles.java
@@ -147,7 +147,6 @@ public class TestCorruptedFiles extends TikaTest {
             for (int i = 0; i < NUM_ITERATIONS; i++) {
                 seed = randomSeedGenerator.nextLong();
                 FAILED = true;
-                long start = new Date().getTime();
                 testSingleFile(getBytes(fileName), new Random(seed));
                 FAILED = false;
             }
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index 7018635..38e3581 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -1550,7 +1550,7 @@ public class OOXMLParserTest extends TikaTest {
     public void testBatch() throws Exception {
         OfficeParserConfig officeParserConfig = new OfficeParserConfig();
         officeParserConfig.setUseSAXDocxExtractor(true);
-        long started = new Date().getTime();
+        long started = System.currentTimeMillis();
         int ex = 0;
         for (int i = 0; i < 100; i++) {
             for (File f : getResourceAsFile("/test-documents").listFiles()) {
@@ -1570,7 +1570,7 @@ public class OOXMLParserTest extends TikaTest {
                 }
             }
         }
-        System.out.println("elapsed: "+(new Date().getTime()-started) + " with " + ex + " exceptions");
+        System.out.println("elapsed: "+(System.currentTimeMillis()-started) + " with " + ex + " exceptions");
     }
 
     @Test
diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
index 4adf9e7..10b7372 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/mock/MockParserTest.java
@@ -103,11 +103,11 @@ public class MockParserTest extends TikaTest {
 
     @Test
     public void testSleep() throws Exception {
-        long start = new Date().getTime();
+        long start = System.currentTimeMillis();
         Metadata m = new Metadata();
         String content = getXML("sleep.xml", m).xml;
         assertMockParser(m);
-        long elapsed = new Date().getTime()-start;
+        long elapsed = System.currentTimeMillis()-start;
         //should sleep for at least 3000
         boolean enoughTimeHasElapsed = elapsed > 2000;
         assertTrue("not enough time has not elapsed: "+elapsed, enoughTimeHasElapsed);
@@ -116,12 +116,12 @@ public class MockParserTest extends TikaTest {
 
     @Test
     public void testHeavyHang() throws Exception {
-        long start = new Date().getTime();
+        long start = System.currentTimeMillis();
         Metadata m = new Metadata();
 
         String content = getXML("heavy_hang.xml", m).xml;
         assertMockParser(m);
-        long elapsed = new Date().getTime()-start;
+        long elapsed = System.currentTimeMillis()-start;
         //should sleep for at least 3000
         boolean enoughTimeHasElapsed = elapsed > 2000;
         assertTrue("not enough time has elapsed: "+elapsed, enoughTimeHasElapsed);
@@ -154,7 +154,7 @@ public class MockParserTest extends TikaTest {
         ParserRunnable r = new ParserRunnable("sleep_interruptible.xml");
         Thread t = new Thread(r);
         t.start();
-        long start = new Date().getTime();
+        long start = System.currentTimeMillis();
         try {
             Thread.sleep(1000);
         } catch (InterruptedException e) {
@@ -168,7 +168,7 @@ public class MockParserTest extends TikaTest {
         } catch (InterruptedException e) {
             //swallow
         }
-        long elapsed = new Date().getTime()-start;
+        long elapsed = System.currentTimeMillis()-start;
         boolean shortEnough = elapsed < 2000;//the xml file specifies 3000
         assertTrue("elapsed (" + elapsed + " millis) was not short enough", shortEnough);
     }
@@ -178,7 +178,7 @@ public class MockParserTest extends TikaTest {
         ParserRunnable r = new ParserRunnable("sleep_not_interruptible.xml");
         Thread t = new Thread(r);
         t.start();
-        long start = new Date().getTime();
+        long start = System.currentTimeMillis();
         try {
             //make sure that the thread has actually started
             Thread.sleep(1000);
@@ -191,7 +191,7 @@ public class MockParserTest extends TikaTest {
         } catch (InterruptedException e) {
             //swallow
         }
-        long elapsed = new Date().getTime()-start;
+        long elapsed = System.currentTimeMillis()-start;
         boolean longEnough = elapsed >= 3000;//the xml file specifies 3000, this sleeps 1000
         assertTrue("elapsed ("+elapsed+" millis) was not long enough", longEnough);
     }


[tika] 02/02: TIKA-2778 -- the shutdown method for tika-batch mode should not be typing anything on stdin of the parent process. Rather, require an interrupt and/or kill signal and then make sure the children are stopped as well.

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 30c3d8104a51f015416382995435a4785059f07c
Author: TALLISON <ta...@apache.org>
AuthorDate: Tue Nov 13 14:08:26 2018 -0500

    TIKA-2778 -- the shutdown method for tika-batch mode should not be
    typing anything on stdin of the parent process.  Rather, require
    an interrupt and/or kill signal and then make sure the children are
    stopped as well.
---
 CHANGES.txt                                        |  4 ++
 .../java/org/apache/tika/batch/BatchProcess.java   | 21 ++++---
 .../apache/tika/batch/BatchProcessDriverCLI.java   |  2 +-
 .../java/org/apache/tika/batch/Interrupter.java    | 72 ++++++++++++++--------
 .../apache/tika/batch/InterrupterFutureResult.java |  3 +-
 .../tika/batch/builders/BatchProcessBuilder.java   |  6 +-
 .../tika/batch/builders/InterrupterBuilder.java    |  4 +-
 7 files changed, 71 insertions(+), 41 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index 68f1c6c..7ea3ab8 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -7,6 +7,10 @@ Release 2.0.0 - ???
 
 Release 1.20 - ???
 
+   * tika-app in batch mode now requires an interrupt or
+     kill signal to the parent process to stop the parent
+     and the child processes (TIKA-2778).
+
    * Bulk upgrade of dependencies (TIKA-2775).
 
    * Improve language id efficiency in tika-eval (TIKA-2777).
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java b/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
index 82a9c52..2cd40ff 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/BatchProcess.java
@@ -21,7 +21,6 @@ import static java.nio.charset.StandardCharsets.UTF_8;
 
 import java.io.IOException;
 import java.io.PrintStream;
-import java.util.Date;
 import java.util.List;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.Callable;
@@ -63,7 +62,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
         MAIN_LOOP_EXCEPTION,
         CRAWLER_TIMED_OUT,
         TIMED_OUT_CONSUMER,
-        USER_INTERRUPTION,
+        PARENT_SHUTDOWN,
         BATCH_PROCESS_ALIVE_TOO_LONG,
     }
 
@@ -163,6 +162,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
         } finally {
             shutdownConsumersManager();
         }
+        LOG.trace("finishing up");
         return result;
     }
 
@@ -194,6 +194,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
                 if (futureResult != null) {
                     state.removed++;
                     IFileProcessorFutureResult result = futureResult.get();
+                    LOG.trace("result: "+result);
                     if (result instanceof FileConsumerFutureResult) {
                         state.consumersRemoved++;
                     } else if (result instanceof FileResourceCrawlerFutureResult) {
@@ -203,14 +204,13 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
                             break;
                         }
                     } else if (result instanceof InterrupterFutureResult) {
-                        causeForTermination = CAUSE_FOR_TERMINATION.USER_INTERRUPTION;
+                        causeForTermination = CAUSE_FOR_TERMINATION.PARENT_SHUTDOWN;
                         break;
                     } else if (result instanceof TimeoutFutureResult) {
                         causeForTermination = CAUSE_FOR_TERMINATION.TIMED_OUT_CONSUMER;
                         break;
                     } //only thing left should be StatusReporterResult
                 }
-
                 if (state.consumersRemoved >= state.numConsumers) {
                     causeForTermination = CAUSE_FOR_TERMINATION.COMPLETED_NORMALLY;
                     break;
@@ -244,7 +244,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
         //TODO: figure out safe way to shutdown resource crawler
         //if it isn't.  Does it need to add poison at this point?
         //fileResourceCrawler.pleaseShutdown();
-
+        LOG.trace("about to shutdown");
         //Step 1: prevent uncalled threads from being started
         ex.shutdown();
 
@@ -255,8 +255,9 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
         }
         //The resourceCrawler should shutdown now.  No need for poison.
         fileResourceCrawler.shutDownNoPoison();
-        //if there are any active/asked to shutdown consumers, await termination
-        //this can happen if a user interrupts the process
+        //if there are any active/asked-to-shutdown consumers, wait
+        //a bit for those parsers to finish.
+        //This can happen if the parent process dies
         //of if the crawler stops early, or ...
         politelyAwaitTermination(state.causeForTermination);
 
@@ -287,6 +288,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
                 if (result instanceof FileConsumerFutureResult) {
                     FileConsumerFutureResult consumerResult = (FileConsumerFutureResult) result;
                     FileStarted fileStarted = consumerResult.getFileStarted();
+                    LOG.trace("file started "+fileStarted);
                     if (fileStarted != null
                             && fileStarted.getElapsedMillis() > timeoutThresholdMillis) {
                         LOG.warn("{} caused a file processor to hang or crash. You may need to remove "
@@ -305,7 +307,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
         }
         //do we need to restart?
         String restartMsg = null;
-        if (state.causeForTermination == CAUSE_FOR_TERMINATION.USER_INTERRUPTION
+        if (state.causeForTermination == CAUSE_FOR_TERMINATION.PARENT_SHUTDOWN
                 || state.causeForTermination == CAUSE_FOR_TERMINATION.MAIN_LOOP_EXCEPTION_NO_RESTART) {
             //do not restart!!!
         } else if (state.causeForTermination == CAUSE_FOR_TERMINATION.MAIN_LOOP_EXCEPTION) {
@@ -325,7 +327,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
         } else if (! fileResourceCrawler.isQueueEmpty()) {
             restartMsg = "Resources still exist for processing";
         }
-
+        LOG.trace("restart msg: "+restartMsg);
         int exitStatus = getExitStatus(state.causeForTermination, restartMsg);
 
         //need to re-check, report, mark timed out consumers
@@ -342,6 +344,7 @@ public class BatchProcess implements Callable<ParallelFileProcessingResult> {
             processed += c.getNumResourcesConsumed();
             numExceptions += c.getNumHandledExceptions();
         }
+        LOG.trace("returning "+state.causeForTermination);
         return new
             ParallelFileProcessingResult(considered, added, processed, numExceptions,
                 elapsed, exitStatus, state.causeForTermination.toString());
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java b/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
index aa8ebf8..a1b19f6 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/BatchProcessDriverCLI.java
@@ -230,7 +230,7 @@ public class BatchProcessDriverCLI {
     private void stop() {
         if (process != null) {
             LOG.trace("destroying a non-null process");
-            process.destroy();
+            process.destroyForcibly();
         }
 
         receivedRestartMsg = false;
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/Interrupter.java b/tika-batch/src/main/java/org/apache/tika/batch/Interrupter.java
index c1e31ba..eea1308 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/Interrupter.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/Interrupter.java
@@ -1,5 +1,3 @@
-package org.apache.tika.batch;
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,44 +14,70 @@ package org.apache.tika.batch;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+package org.apache.tika.batch;
 
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStreamReader;
+
+import java.io.InputStream;
 import java.util.concurrent.Callable;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static java.nio.charset.StandardCharsets.UTF_8;
-
 
 /**
- * Class that waits for input on System.in.  If the user enters a keystroke on 
- * System.in, this will send a signal to the FileResourceRunner to shutdown gracefully.
- *
+ * Class that waits for input on System.in.  If this reads
+ * EOF or if there is an exception from the parent's IO,
+ * this will send a signal to shutdown the child process.
  * <p>
- * In the future, this may implement a common IInterrupter interface for more flexibility.
+ *     This will call System.exit(-1) if the process
+ *     doesn't stop after {@link #pauseOnEarlyTermination}
+ *     milliseconds.
+ * </p>
+ *
+ *
  */
 public class Interrupter implements Callable<IFileProcessorFutureResult> {
     private static final Logger LOG = LoggerFactory.getLogger(Interrupter.class);
 
-	public IFileProcessorFutureResult call(){
+    private static final long EXTRA_GRACE_PERIOD_MILLIS = 1000;
+    private final long pauseOnEarlyTermination;
+
+    public Interrupter(long pauseOnEarlyTermination) {
+        this.pauseOnEarlyTermination = pauseOnEarlyTermination;
+    }
+
+	public IFileProcessorFutureResult call() {
 		try{
-			BufferedReader reader = new BufferedReader(new InputStreamReader(System.in, UTF_8));
-			while (true){
-				if (reader.ready()){
-					reader.readLine();
-					break;
-				} else {
-					Thread.sleep(1000);
-				}
+			InputStream is = System.in;
+			int byt = is.read();
+			while (byt > -1){
+				byt = is.read();
 			}
-		} catch (InterruptedException e){
-		    //canceller was interrupted
-		} catch (IOException e){
-            LOG.error("IOException from STDIN in CommandlineInterrupter.");
+		} catch (Throwable e) {
+            LOG.warn("Exception from STDIN in CommandlineInterrupter.", e);
 		}
+		new Thread(new Doomsday()).start();
 		return new InterrupterFutureResult();
 	}
+
+    private class Doomsday implements Runnable {
+        @Override
+        public void run() {
+            if (pauseOnEarlyTermination < 0) {
+                return;
+            }
+            long start = System.currentTimeMillis();
+            long elapsed = System.currentTimeMillis()-start;
+            while (elapsed < (pauseOnEarlyTermination+EXTRA_GRACE_PERIOD_MILLIS)) {
+                try {
+                    Thread.sleep(500);
+                } catch (InterruptedException e) {
+                    return;
+                }
+                elapsed = System.currentTimeMillis()-start;
+            }
+            LOG.error("Interrupter timed out; now calling System.exit.");
+            System.exit(-1);
+        }
+    }
 }
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/InterrupterFutureResult.java b/tika-batch/src/main/java/org/apache/tika/batch/InterrupterFutureResult.java
index c4d3704..ec49507 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/InterrupterFutureResult.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/InterrupterFutureResult.java
@@ -1,5 +1,3 @@
-package org.apache.tika.batch;
-
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -16,6 +14,7 @@ package org.apache.tika.batch;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+package org.apache.tika.batch;
 
 public class InterrupterFutureResult implements IFileProcessorFutureResult {
 
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java b/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java
index 0ebfd15..26b944e 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/builders/BatchProcessBuilder.java
@@ -133,7 +133,7 @@ public class BatchProcessBuilder {
 
         reporter = buildReporter(crawler, consumersManager, keyNodes.get("reporter"), runtimeAttributes);
 
-        interrupter = buildInterrupter(keyNodes.get("interrupter"), runtimeAttributes);
+        interrupter = buildInterrupter(keyNodes.get("interrupter"), pauseOnEarlyTerminationMillis, runtimeAttributes);
 
         BatchProcess proc = new BatchProcess(
                 crawler, consumersManager, reporter, interrupter);
@@ -153,7 +153,7 @@ public class BatchProcessBuilder {
         return proc;
     }
 
-    private Interrupter buildInterrupter(Node node, Map<String, String> runtimeAttributes) {
+    private Interrupter buildInterrupter(Node node, long pauseOnEarlyTermination, Map<String, String> runtimeAttributes) {
         Map<String, String> attrs = XMLDOMUtil.mapifyAttrs(node, runtimeAttributes);
         String className = attrs.get("builderClass");
         if (className == null) {
@@ -161,7 +161,7 @@ public class BatchProcessBuilder {
         }
         InterrupterBuilder builder = ClassLoaderUtil.buildClass(InterrupterBuilder.class, className);
 
-        return builder.build(node, runtimeAttributes);
+        return builder.build(node, pauseOnEarlyTermination, runtimeAttributes);
 
     }
 
diff --git a/tika-batch/src/main/java/org/apache/tika/batch/builders/InterrupterBuilder.java b/tika-batch/src/main/java/org/apache/tika/batch/builders/InterrupterBuilder.java
index d7223cd..d02b48c 100644
--- a/tika-batch/src/main/java/org/apache/tika/batch/builders/InterrupterBuilder.java
+++ b/tika-batch/src/main/java/org/apache/tika/batch/builders/InterrupterBuilder.java
@@ -26,7 +26,7 @@ import org.w3c.dom.Node;
  */
 public class InterrupterBuilder {
 
-    public Interrupter build(Node n, Map<String, String> commandlineArguments) {
-        return new Interrupter();
+    public Interrupter build(Node n, long pauseOnEarlyTermination, Map<String, String> commandlineArguments) {
+        return new Interrupter(pauseOnEarlyTermination);
     }
 }