You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/09/09 14:01:19 UTC

[tika] branch branch_1x updated (6ef132c -> 78d0398)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git.


    from 6ef132c  TIKA-3119 -- general upgrades for 1.25
     new fd1926d  Tika-3141 -- add empty environment variable handle (#334)
     new c00af36  Fix can't del tmp file in windows (#332)
     new fecbbf8  Pull request #332 resolve conflicts.  In 1.x, we use JUL in core.
     new ba72e03  Modify some bin file's Url (#330)
     new 9c2e5e6  Modify some arg parse in TikaCLI (#340)
     new 78d0398  Fix test fail caused by default language is not english (#353)

The 6 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java     |  4 ++--
 .../src/main/java/org/apache/tika/config/TikaConfig.java    |  4 ++--
 .../main/java/org/apache/tika/io/TemporaryResources.java    | 13 ++++++++++++-
 .../tika/parser/sentiment/SentimentAnalysisParser.java      |  6 +++---
 .../java/org/apache/tika/parser/sas/SAS7BDATParserTest.java |  7 +++++--
 .../parser/sentiment/tika-config-sentiment-opennlp-cat.xml  |  2 +-
 .../tika/parser/sentiment/tika-config-sentiment-opennlp.xml |  2 +-
 7 files changed, 26 insertions(+), 12 deletions(-)


[tika] 04/06: Modify some bin file's Url (#330)

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit ba72e0373193ced0dcff34f5127e3bca3c6f2d12
Author: Lee <55...@users.noreply.github.com>
AuthorDate: Wed Sep 9 21:16:11 2020 +0800

    Modify some bin file's Url (#330)
    
    Some bin file's Url was changed, modify.
    # Conflicts:
    #	tika-parsers/src/main/java/org/apache/tika/parser/sentiment/SentimentAnalysisParser.java
---
 .../org/apache/tika/parser/sentiment/SentimentAnalysisParser.java   | 6 +++---
 .../tika/parser/sentiment/tika-config-sentiment-opennlp-cat.xml     | 2 +-
 .../apache/tika/parser/sentiment/tika-config-sentiment-opennlp.xml  | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/tika-parsers/src/main/java/org/apache/tika/parser/sentiment/SentimentAnalysisParser.java b/tika-parsers/src/main/java/org/apache/tika/parser/sentiment/SentimentAnalysisParser.java
index 276d792..3e731cc 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/sentiment/SentimentAnalysisParser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/sentiment/SentimentAnalysisParser.java
@@ -51,9 +51,9 @@ public class SentimentAnalysisParser extends AbstractParser implements Initializ
 
     private static final Set<MediaType> SUPPORTED_TYPES = Collections
             .singleton(MediaType.application("sentiment"));
-    private static final Logger LOG = LoggerFactory
-            .getLogger(SentimentAnalysisParser.class);
-    public static final String DEF_MODEL = "https://raw.githubusercontent.com/USCDataScience/SentimentAnalysisParser/master/sentiment-models/en-netflix-sentiment.bin";
+    private static final Logger LOG = LoggerFactory.getLogger(SentimentAnalysisParser.class);
+
+    public static final String DEF_MODEL = "https://raw.githubusercontent.com/USCDataScience/SentimentAnalysisParser/master/sentiment-models/src/main/resources/edu/usc/irds/sentiment/en-netflix-sentiment.bin";
 
     private SentimentME classifier;
 
diff --git a/tika-parsers/src/test/resources/org/apache/tika/parser/sentiment/tika-config-sentiment-opennlp-cat.xml b/tika-parsers/src/test/resources/org/apache/tika/parser/sentiment/tika-config-sentiment-opennlp-cat.xml
index aaa6a5a..d008053 100644
--- a/tika-parsers/src/test/resources/org/apache/tika/parser/sentiment/tika-config-sentiment-opennlp-cat.xml
+++ b/tika-parsers/src/test/resources/org/apache/tika/parser/sentiment/tika-config-sentiment-opennlp-cat.xml
@@ -20,7 +20,7 @@
             <mime>text/plain</mime>
             <mime>application/sentiment</mime>
             <params>
-                <param name="modelPath" type="string">https://raw.githubusercontent.com/USCDataScience/SentimentAnalysisParser/master/sentiment-models/ht-sentiment-categ.bin</param>
+                <param name="modelPath" type="string">https://raw.githubusercontent.com/USCDataScience/SentimentAnalysisParser/master/sentiment-models/src/main/resources/edu/usc/irds/sentiment/ht-sentiment-categ.bin</param>
                 <!--This can also be relative path sentiment-models/en-stanford-sentiment.bin-->
             </params>
         </parser>
diff --git a/tika-parsers/src/test/resources/org/apache/tika/parser/sentiment/tika-config-sentiment-opennlp.xml b/tika-parsers/src/test/resources/org/apache/tika/parser/sentiment/tika-config-sentiment-opennlp.xml
index d5657d7..cc63aee 100644
--- a/tika-parsers/src/test/resources/org/apache/tika/parser/sentiment/tika-config-sentiment-opennlp.xml
+++ b/tika-parsers/src/test/resources/org/apache/tika/parser/sentiment/tika-config-sentiment-opennlp.xml
@@ -21,7 +21,7 @@
             <mime>text/plain</mime>
             <mime>application/sentiment</mime>
             <params>
-                <param name="modelPath" type="string">https://raw.githubusercontent.com/USCDataScience/SentimentAnalysisParser/master/sentiment-models/en-netflix-sentiment.bin</param>
+                <param name="modelPath" type="string">https://raw.githubusercontent.com/USCDataScience/SentimentAnalysisParser/master/sentiment-models/src/main/resources/edu/usc/irds/sentiment/en-netflix-sentiment.bin</param>
                 <!--This can also be relative path sentiment-models/en-stanford-sentiment.bin-->
             </params>
         </parser>


[tika] 02/06: Fix can't del tmp file in windows (#332)

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit c00af36e70c29a597b07d8c285d2adc0b7c4fce3
Author: Lee <55...@users.noreply.github.com>
AuthorDate: Wed Sep 9 21:20:15 2020 +0800

    Fix can't del tmp file in windows (#332)
    
    TestCase org.apache.tika.image.HeifParserTest.testSimple fail in windows
    because TemporaryResources.close() fail to delete tmp file.
    
    This is a fix for it by set tmp file delete on exit if current delete fail.
---
 .../src/main/java/org/apache/tika/io/TemporaryResources.java | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java b/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java
index 2dad5bd..6a1e1b6 100644
--- a/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java
+++ b/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java
@@ -24,6 +24,8 @@ import java.nio.file.Path;
 import java.util.LinkedList;
 
 import org.apache.tika.exception.TikaException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Utility class for tracking and ultimately closing or otherwise disposing
@@ -35,6 +37,8 @@ import org.apache.tika.exception.TikaException;
  */
 public class TemporaryResources implements Closeable {
 
+    private static final Logger LOG = LoggerFactory.getLogger(TemporaryResources.class);
+
     /**
      * Tracked resources in LIFO order.
      */
@@ -81,7 +85,13 @@ public class TemporaryResources implements Closeable {
                 : Files.createTempFile(tempFileDir, "apache-tika-", ".tmp");
         addResource(new Closeable() {
             public void close() throws IOException {
-                Files.delete(path);
+                try {
+                    Files.delete(path);
+                } catch (IOException e) {
+                    // delete when exit if current delete fail
+                    LOG.warn("delete tmp file fail, will delete it on exit");
+                    path.toFile().deleteOnExit();
+                }
             }
         });
         return path;


[tika] 05/06: Modify some arg parse in TikaCLI (#340)

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 9c2e5e6d25d1a1f2c47b2135be9900f7b0a35ea6
Author: Lee <55...@users.noreply.github.com>
AuthorDate: Wed Sep 9 21:15:36 2020 +0800

    Modify some arg parse in TikaCLI (#340)
    
    1. fix parse arg "--client="
    2. make the way of parse arg "--compare-file-magic" same as others
---
 tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 46f82ee..b57822c 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -395,7 +395,7 @@ public class TikaCLI {
             displaySupportedTypes();
         } else if (arg.startsWith("--compare-file-magic=")) {
             pipeMode = false;
-            compareFileMagic(arg.substring(arg.indexOf('=')+1));
+            compareFileMagic(arg.substring("--compare-file-magic=".length()));
         } else if (arg.equals("--dump-minimal-config")) {
             pipeMode = false;
             dumpConfig(TikaConfigSerializer.Mode.MINIMAL);
@@ -467,7 +467,7 @@ public class TikaCLI {
         } else if (arg.startsWith("-c")) {
             networkURI = new URI(arg.substring("-c".length()));
         } else if (arg.startsWith("--client=")) {
-            networkURI = new URI(arg.substring("-c".length()));
+            networkURI = new URI(arg.substring("--client=".length()));
         } else {
             pipeMode = false;
             configure();


[tika] 03/06: Pull request #332 resolve conflicts. In 1.x, we use JUL in core.

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit fecbbf87e5d468209c545fa32b5d6024382fcca1
Author: tallison <ta...@apache.org>
AuthorDate: Wed Sep 9 09:32:16 2020 -0400

    Pull request #332 resolve conflicts.  In 1.x, we use JUL in core.
---
 .../src/main/java/org/apache/tika/io/TemporaryResources.java     | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java b/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java
index 6a1e1b6..42e5c77 100644
--- a/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java
+++ b/tika-core/src/main/java/org/apache/tika/io/TemporaryResources.java
@@ -24,8 +24,9 @@ import java.nio.file.Path;
 import java.util.LinkedList;
 
 import org.apache.tika.exception.TikaException;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
+
+import java.util.logging.Level;
+import java.util.logging.Logger;
 
 /**
  * Utility class for tracking and ultimately closing or otherwise disposing
@@ -37,7 +38,7 @@ import org.slf4j.LoggerFactory;
  */
 public class TemporaryResources implements Closeable {
 
-    private static final Logger LOG = LoggerFactory.getLogger(TemporaryResources.class);
+    private static final Logger LOG = Logger.getLogger(TemporaryResources.class.getName());
 
     /**
      * Tracked resources in LIFO order.
@@ -89,7 +90,7 @@ public class TemporaryResources implements Closeable {
                     Files.delete(path);
                 } catch (IOException e) {
                     // delete when exit if current delete fail
-                    LOG.warn("delete tmp file fail, will delete it on exit");
+                    LOG.log(Level.WARNING, "delete tmp file failed; will delete it on exit");
                     path.toFile().deleteOnExit();
                 }
             }


[tika] 01/06: Tika-3141 -- add empty environment variable handle (#334)

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit fd1926dd95d592afb6dce1b0da885f94482ce137
Author: Lee <55...@users.noreply.github.com>
AuthorDate: Wed Sep 9 21:22:06 2020 +0800

    Tika-3141 -- add empty environment variable handle (#334)
---
 tika-core/src/main/java/org/apache/tika/config/TikaConfig.java | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
index 18b3add..3499cc2 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfig.java
@@ -248,11 +248,11 @@ public class TikaConfig {
     public TikaConfig() throws TikaException, IOException {
 
         String config = System.getProperty("tika.config");
-        if (config == null) {
+        if (config == null || config.trim().equals("")) {
             config = System.getenv("TIKA_CONFIG");
         }
 
-        if (config == null) {
+        if (config == null || config.trim().equals("")) {
             this.serviceLoader = new ServiceLoader();
             this.mimeTypes = getDefaultMimeTypes(getContextClassLoader());
             this.encodingDetector = getDefaultEncodingDetector(serviceLoader);


[tika] 06/06: Fix test fail caused by default language is not english (#353)

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch branch_1x
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 78d0398bea4545ff9c2aebc28e86c32cbbcfe8b6
Author: Lee <55...@users.noreply.github.com>
AuthorDate: Wed Sep 9 21:13:53 2020 +0800

    Fix test fail caused by default language is not english (#353)
    
    Use short months in default language to test.
---
 .../test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java   | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java b/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java
index 2ee5e12..5ae709c 100644
--- a/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java
+++ b/tika-parsers/src/test/java/org/apache/tika/parser/sas/SAS7BDATParserTest.java
@@ -19,7 +19,9 @@ package org.apache.tika.parser.sas;
 import static org.junit.Assert.assertEquals;
 
 import java.io.InputStream;
+import java.text.DateFormatSymbols;
 import java.util.Arrays;
+import java.util.Locale;
 
 import org.apache.tika.TikaTest;
 import org.apache.tika.metadata.Database;
@@ -36,6 +38,7 @@ import org.junit.Test;
 import org.xml.sax.ContentHandler;
 
 public class SAS7BDATParserTest extends TikaTest {
+    private static final String[] SHORT_MONTHS = new DateFormatSymbols(Locale.getDefault()).getShortMonths();
     private Parser parser = new SAS7BDATParser();
     
     @Test
@@ -109,7 +112,7 @@ public class SAS7BDATParserTest extends TikaTest {
         assertContains("2\t4\tThis", content);
         assertContains("4\t16\tThis", content);
         assertContains("\t01-01-1960\t", content);
-        assertContains("\t01Jan1960:00:00", content);
+        assertContains("\t01"+SHORT_MONTHS[0]+"1960:00:00", content);
     }
 
     @Test
@@ -141,6 +144,6 @@ public class SAS7BDATParserTest extends TikaTest {
         assertContains("<th title=\"date\">date</th>", xml);
         // Check formatting of dates
         assertContains("<td>01-01-1960</td>", xml);
-        assertContains("<td>01Jan1960:00:00:10.00</td>", xml);
+        assertContains("<td>01"+SHORT_MONTHS[0]+"1960:00:00:10.00</td>", xml);
     }
 }