You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/12/03 12:26:19 UTC
[tika] branch main updated: TIKA-3943 (#835)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 29b38dab2 TIKA-3943 (#835)
29b38dab2 is described below
commit 29b38dab2b698335846088fc2c4e1bf2cad82cf3
Author: Tim Allison <ta...@apache.org>
AuthorDate: Sat Dec 3 07:26:12 2022 -0500
TIKA-3943 (#835)
* TIKA-3943 -- include inherited getters and setters in serialization of params
---
.../java/org/apache/tika/config/TikaConfigSerializer.java | 2 +-
.../apache/tika/parser/microsoft/AbstractOfficeParser.java | 10 +++++++---
.../org/apache/tika/parser/microsoft/OfficeParserConfig.java | 2 +-
.../java/org/apache/tika/config/TikaConfigSerializerTest.java | 11 +++++++++++
4 files changed, 20 insertions(+), 5 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
index 598651a2c..a2313f408 100644
--- a/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
+++ b/tika-core/src/main/java/org/apache/tika/config/TikaConfigSerializer.java
@@ -348,7 +348,7 @@ public class TikaConfigSerializer {
MethodTuples primitiveSetters = new MethodTuples();
MethodTuples nonPrimitiveGetters = new MethodTuples();
MethodTuples primitiveGetters = new MethodTuples();
- for (Method method : object.getClass().getDeclaredMethods()) {
+ for (Method method : object.getClass().getMethods()) {
Class[] parameterTypes = method.getParameterTypes();
if (setterMatcher.reset(method.getName()).find()) {
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractOfficeParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractOfficeParser.java
index 254a1a051..2ff728074 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractOfficeParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/AbstractOfficeParser.java
@@ -146,13 +146,17 @@ public abstract class AbstractOfficeParser extends AbstractParser {
* of overly large byte arrays. Use carefully; and please open up issues on
* POI's bugzilla to bump values for specific records.
*
+ * If the value is <&eq; 0, this value is ignored
+ *
* @param maxOverride
*/
@Field
public void setByteArrayMaxOverride(int maxOverride) {
- IOUtils.setByteArrayMaxOverride(maxOverride);
- //required for serialization
- defaultOfficeParserConfig.setMaxOverride(maxOverride);
+ if (maxOverride > 0) {
+ IOUtils.setByteArrayMaxOverride(maxOverride);
+ //required for serialization
+ defaultOfficeParserConfig.setMaxOverride(maxOverride);
+ }
}
public int getByteArrayMaxOverride() {
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParserConfig.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParserConfig.java
index fccebba46..680b63c9e 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParserConfig.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/OfficeParserConfig.java
@@ -37,7 +37,7 @@ public class OfficeParserConfig implements Serializable {
private boolean extractAllAlternativesFromMSG;
private String dateOverrideFormat = null;
- private int maxOverride;
+ private int maxOverride = 0;//ignore
/**
* @return whether or not to extract macros
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/TikaConfigSerializerTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/TikaConfigSerializerTest.java
index 0c354e474..d7313db6f 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/TikaConfigSerializerTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/config/TikaConfigSerializerTest.java
@@ -17,6 +17,7 @@
package org.apache.tika.config;
import static org.apache.tika.TikaTest.assertContains;
+import static org.apache.tika.TikaTest.assertContainsCount;
import static org.apache.tika.TikaTest.assertNotContained;
import java.io.ByteArrayInputStream;
@@ -81,6 +82,16 @@ public class TikaConfigSerializerTest {
}
}
+ @Test
+ public void testOfficeParserParams() throws Exception {
+ TikaConfig tikaConfig = TikaConfig.getDefaultConfig();
+ StringWriter writer = new StringWriter();
+ TikaConfigSerializer.serialize(tikaConfig, TikaConfigSerializer.Mode.STATIC_FULL,
+ writer, StandardCharsets.UTF_8);
+ assertContainsCount("<param name=\"concatenatePhoneticRuns\" type=\"bool\">true</param>",
+ writer.toString(), 3);
+ }
+
private Path getPath(String config) {
try {
return Paths.get(TikaConfigSerializerTest.class.getResource("/configs/" + config)