You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/04/20 17:25:16 UTC

[tika] 01/02: general code improvements

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 83b0de4d60161ebd4bc224141a959ac8c18d95f4
Author: tallison <ta...@apache.org>
AuthorDate: Mon Apr 18 15:47:44 2022 -0400

    general code improvements
---
 tika-core/src/main/java/org/apache/tika/sax/StandardsText.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tika-core/src/main/java/org/apache/tika/sax/StandardsText.java b/tika-core/src/main/java/org/apache/tika/sax/StandardsText.java
index 4bb863e3f..686832dcc 100644
--- a/tika-core/src/main/java/org/apache/tika/sax/StandardsText.java
+++ b/tika-core/src/main/java/org/apache/tika/sax/StandardsText.java
@@ -53,7 +53,7 @@ import org.apache.tika.sax.StandardReference.StandardReferenceBuilder;
 public class StandardsText {
     // Regular expression to match uppercase headers
     private static final String REGEX_HEADER =
-            "(\\d+\\.(\\d+\\.?)*)\\p{Blank}+([A-Z]+(\\s[A-Z]+)*){5,}";
+            "(\\d{1,10}\\.(\\d{1,10}\\.?){0,10})\\p{Blank}+([A-Z]{1,256}(\\s[A-Z]+){0,256}){5,}";
 
     // Regular expression to match the "APPLICABLE DOCUMENTS" and equivalent
     // sections