You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2024/03/25 16:24:26 UTC
(tika) branch branch_2x updated: TIKA-4171 -- fix regression when field names are missing in the XFAExtractor (#1679)
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch branch_2x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_2x by this push:
new a323ee5a4 TIKA-4171 -- fix regression when field names are missing in the XFAExtractor (#1679)
a323ee5a4 is described below
commit a323ee5a43517bed4478378d0e586f66adc32133
Author: Tim Allison <ta...@apache.org>
AuthorDate: Mon Mar 25 12:24:20 2024 -0400
TIKA-4171 -- fix regression when field names are missing in the XFAExtractor (#1679)
---
.../src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java | 3 +++
.../src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java | 2 +-
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java
index 14bb07b1d..a79e942e8 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java
@@ -123,6 +123,9 @@ class XFAExtractor {
(field.toolTip == null || field.toolTip.trim().length() == 0) ? fieldName :
field.toolTip;
String[] fieldValues = pdfObjRToValues.getValues(fieldName);
+ if (fieldValues.length == 0) {
+ fieldValues = new String[]{""};
+ }
for (String fieldValue : fieldValues) {
AttributesImpl attrs = new AttributesImpl();
attrs.addAttribute("", "fieldName", "fieldName", "CDATA", fieldName);
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 6e9167f37..6eb0b4a0a 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -986,7 +986,7 @@ public class PDFParserTest extends TikaTest {
while (matcher.find()) {
listItems++;
}
- assertEquals(24, listItems);
+ assertEquals(27, listItems);
}
@Test