You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2022/08/05 14:15:21 UTC
[tika] branch main updated: add bounds to storage of types and subtypes
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 19fb7bf5d add bounds to storage of types and subtypes
19fb7bf5d is described below
commit 19fb7bf5d42641eff6575fa805ffb906a3e95401
Author: tallison <ta...@apache.org>
AuthorDate: Fri Aug 5 10:15:13 2022 -0400
add bounds to storage of types and subtypes
---
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 24 ++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
index 7e7ef16ea..95dd6565e 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
@@ -139,6 +139,10 @@ class AbstractPDF2XHTML extends PDFTextStripper {
*/
private final static int MAX_RECURSION_DEPTH = 100;
private final static int MAX_BOOKMARK_ITEMS = 10000;
+
+ //This is used for both types and subtypes.
+ //These can be unbounded. We need to limit the number we store.
+ private final static int MAX_ANNOTATION_TYPES = 100;
private static final String THREE_D = "3D";
private static final COSName THREE_DD = COSName.getPDFName("3DD");
private static final String NULL_STRING = "null";
@@ -644,16 +648,20 @@ class AbstractPDF2XHTML extends PDFTextStripper {
try {
for (PDAnnotation annotation : page.getAnnotations()) {
String annotationName = annotation.getAnnotationName();
- if (annotationName != null) {
- annotationTypes.add(annotationName);
- } else {
- annotationTypes.add(NULL_STRING);
+ if (annotationTypes.size() < MAX_ANNOTATION_TYPES) {
+ if (annotationName != null) {
+ annotationTypes.add(annotationName);
+ } else {
+ annotationTypes.add(NULL_STRING);
+ }
}
String annotationSubtype = annotation.getSubtype();
- if (annotationSubtype != null) {
- annotationSubtypes.add(annotationSubtype);
- } else {
- annotationSubtypes.add(NULL_STRING);
+ if (annotationSubtypes.size() < MAX_ANNOTATION_TYPES) {
+ if (annotationSubtype != null) {
+ annotationSubtypes.add(annotationSubtype);
+ } else {
+ annotationSubtypes.add(NULL_STRING);
+ }
}
if (annotation instanceof PDAnnotationFileAttachment) {
PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) annotation;