You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/12/03 12:26:19 UTC

[tika] branch main updated: TIKA-3943 (#835)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 29b38dab2 TIKA-3943 (#835)
29b38dab2 is described below

commit 29b38dab2b698335846088fc2c4e1bf2cad82cf3
Author: Tim Allison <ta...@apache.org>
AuthorDate: Sat Dec 3 07:26:12 2022 -0500

    TIKA-3943 (#835)
    
    * TIKA-3943 -- include inherited getters and setters in serialization of params
---
 .../java/org/apache/tika/config/TikaConfigSerializer.java     |  2 +-
 .../apache/tika/parser/microsoft/AbstractOfficeParser.java    | 10 +++++++---
 .../org/apache/tika/parser/microsoft/OfficeParserConfig.java  |  2 +-
 .../java/org/apache/tika/config/TikaConfigSerializerTest.java | 11 +++++++++++
 4 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
index 598651a2c..a2313f408 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
@@ -348,7 +348,7 @@ public class TikaConfigSerializer {
         MethodTuples primitiveSetters = new MethodTuples();
         MethodTuples nonPrimitiveGetters = new MethodTuples();
         MethodTuples primitiveGetters = new MethodTuples();
-        for (Method method : object.getClass().getDeclaredMethods()) {
+        for (Method method : object.getClass().getMethods()) {
             Class[] parameterTypes = method.getParameterTypes();
 
             if (setterMatcher.reset(method.getName()).find()) {
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractOfficeParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractOfficeParser.java
index 254a1a051..2ff728074 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractOfficeParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractOfficeParser.java
@@ -146,13 +146,17 @@ public abstract class AbstractOfficeParser extends AbstractParser {
      * of overly large byte arrays.  Use carefully; and please open up issues on
      * POI's bugzilla to bump values for specific records.
      *
+     * If the value is &lt;&eq; 0, this value is ignored
+     *
      * @param maxOverride
      */
     @Field
     public void setByteArrayMaxOverride(int maxOverride) {
-        IOUtils.setByteArrayMaxOverride(maxOverride);
-        //required for serialization
-        defaultOfficeParserConfig.setMaxOverride(maxOverride);
+        if (maxOverride > 0) {
+            IOUtils.setByteArrayMaxOverride(maxOverride);
+            //required for serialization
+            defaultOfficeParserConfig.setMaxOverride(maxOverride);
+        }
     }
 
     public int getByteArrayMaxOverride() {
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParserConfig.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParserConfig.java
index fccebba46..680b63c9e 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParserConfig.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParserConfig.java
@@ -37,7 +37,7 @@ public class OfficeParserConfig implements Serializable {
     private boolean extractAllAlternativesFromMSG;
 
     private String dateOverrideFormat = null;
-    private int maxOverride;
+    private int maxOverride = 0;//ignore
 
     /**
      * @return whether or not to extract macros
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/TikaConfigSerializerTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/TikaConfigSerializerTest.java
index 0c354e474..d7313db6f 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/TikaConfigSerializerTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/TikaConfigSerializerTest.java
@@ -17,6 +17,7 @@
 package org.apache.tika.config;
 
 import static org.apache.tika.TikaTest.assertContains;
+import static org.apache.tika.TikaTest.assertContainsCount;
 import static org.apache.tika.TikaTest.assertNotContained;
 
 import java.io.ByteArrayInputStream;
@@ -81,6 +82,16 @@ public class TikaConfigSerializerTest {
         }
     }
 
+    @Test
+    public void testOfficeParserParams() throws Exception {
+        TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
+        StringWriter writer = new StringWriter();
+        TikaConfigSerializer.serialize(tikaConfig, TikaConfigSerializer.Mode.STATIC_FULL,
+                writer, StandardCharsets.UTF_8);
+        assertContainsCount("<param name=\"concatenatePhoneticRuns\" type=\"bool\">true</param>",
+                writer.toString(), 3);
+    }
+
     private Path getPath(String config) {
         try {
             return Paths.get(TikaConfigSerializerTest.class.getResource("/configs/" + config)