You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2024/03/23 19:19:43 UTC

(tika) branch TIKA-4171b created (now 63cbe674e)

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-4171b
in repository https://gitbox.apache.org/repos/asf/tika.git


      at 63cbe674e TIKA-4171 -- fix regression when field names are missing in the XFAExtractor

This branch includes the following new commits:

     new 63cbe674e TIKA-4171 -- fix regression when field names are missing in the XFAExtractor

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.



(tika) 01/01: TIKA-4171 -- fix regression when field names are missing in the XFAExtractor

Posted by ta...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4171b
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 63cbe674e409ed05bb810488084beddb3da43a35
Author: tallison <ta...@apache.org>
AuthorDate: Sat Mar 23 15:19:28 2024 -0400

    TIKA-4171 -- fix regression when field names are missing in the XFAExtractor
---
 .../src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java         | 3 +++
 .../src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java        | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java
index 14bb07b1d..a79e942e8 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/XFAExtractor.java
@@ -123,6 +123,9 @@ class XFAExtractor {
                     (field.toolTip == null || field.toolTip.trim().length() == 0) ? fieldName :
                             field.toolTip;
             String[] fieldValues = pdfObjRToValues.getValues(fieldName);
+            if (fieldValues.length == 0) {
+                fieldValues = new String[]{""};
+            }
             for (String fieldValue : fieldValues) {
                 AttributesImpl attrs = new AttributesImpl();
                 attrs.addAttribute("", "fieldName", "fieldName", "CDATA", fieldName);
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 6e9167f37..6eb0b4a0a 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -986,7 +986,7 @@ public class PDFParserTest extends TikaTest {
         while (matcher.find()) {
             listItems++;
         }
-        assertEquals(24, listItems);
+        assertEquals(27, listItems);
     }
 
     @Test