You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2020/12/08 15:55:58 UTC
[tika] branch main updated: TIKA-3243 -- bump max record length and
enable manual configuration of max record length
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new e8fa990 TIKA-3243 -- bump max record length and enable manual configuration of max record length
e8fa990 is described below
commit e8fa990e201f9ffdef2da3b0e2237a0b93ed9353
Author: tallison <ta...@apache.org>
AuthorDate: Tue Dec 8 10:55:31 2020 -0500
TIKA-3243 -- bump max record length and enable manual configuration of max record length
---
.../org/apache/tika/parser/image/PSDParser.java | 23 ++++++++++++-----
.../apache/tika/parser/image/PSDParserTest.java | 10 ++++++++
.../tika/parser/image/tika-config-TIKA-3243.xml | 29 ++++++++++++++++++++++
3 files changed, 56 insertions(+), 6 deletions(-)
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/PSDParser.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/PSDParser.java
index 8b3e2fd..7dc4253 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/PSDParser.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/PSDParser.java
@@ -26,6 +26,7 @@ import java.util.HashSet;
import java.util.Set;
import org.apache.commons.io.IOUtils;
+import org.apache.tika.config.Field;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.EndianUtils;
import org.apache.tika.metadata.Metadata;
@@ -62,9 +63,11 @@ public class PSDParser extends AbstractParser {
Collections.unmodifiableSet(new HashSet<MediaType>(Arrays.asList(
MediaType.image("vnd.adobe.photoshop"))));
- private static final int MAX_DATA_LENGTH_BYTES = 1000000;
+ private static final int MAX_DATA_LENGTH_BYTES = 10_000_000;
private static final int MAX_BLOCKS = 10000;
+ private int maxDataLengthBytes = MAX_DATA_LENGTH_BYTES;
+
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
}
@@ -127,7 +130,7 @@ public class PSDParser extends AbstractParser {
//infinite loop by only reading 10000 blocks
int blocks = 0;
while (read < imageResourcesSectionSize && blocks < MAX_BLOCKS) {
- ResourceBlock rb = new ResourceBlock(stream);
+ ResourceBlock rb = new ResourceBlock(stream, maxDataLengthBytes);
if (rb.totalLength <= 0) {
//break;
}
@@ -159,6 +162,11 @@ public class PSDParser extends AbstractParser {
xhtml.endDocument();
}
+ @Field
+ public void setMaxDataLengthBytes(int maxDataLengthBytes) {
+ this.maxDataLengthBytes = maxDataLengthBytes;
+ }
+
private static class ResourceBlock {
private static final long SIGNATURE = 0x3842494d; // 8BIM
private static final int ID_CAPTION = 0x03F0;
@@ -170,12 +178,15 @@ public class PSDParser extends AbstractParser {
private static final int ID_AUTO_SAVE_FILE_PATH = 0x043E;
private static final int ID_THUMBNAIL_RESOURCE = 0x040C;
+ private final int maxDataLengthBytes;
private int id;
private String name;
private byte[] data;
private int totalLength;
static int counter = 0;
- private ResourceBlock(InputStream stream) throws IOException, TikaException {
+
+ private ResourceBlock(InputStream stream, int maxDataLengthBytes) throws IOException, TikaException {
+ this.maxDataLengthBytes = maxDataLengthBytes;
counter++;
// Verify the signature
long sig = EndianUtils.readIntBE(stream);
@@ -224,9 +235,9 @@ public class PSDParser extends AbstractParser {
totalLength = 4 + 2 + nameLen + 4 + dataLen;
// Do we have use for the data segment?
if (captureData(id)) {
- if (dataLen > MAX_DATA_LENGTH_BYTES) {
- throw new TikaException("data length must be < "+MAX_DATA_LENGTH_BYTES+
- ": "+dataLen);
+ if (dataLen > maxDataLengthBytes) {
+ throw new TikaException("data length must be < " +
+ maxDataLengthBytes + ": " + dataLen);
}
data = new byte[dataLen];
IOUtils.readFully(stream, data);
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/PSDParserTest.java b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/PSDParserTest.java
index 657de5d..e2cb091 100644
--- a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/PSDParserTest.java
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/PSDParserTest.java
@@ -21,6 +21,8 @@ import static org.junit.Assert.assertEquals;
import java.io.InputStream;
import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.XMPMM;
import org.apache.tika.parser.ParseContext;
@@ -70,4 +72,12 @@ public class PSDParserTest extends TikaTest {
assertEquals("Adobe Photoshop CC 2014 (Macintosh)", metadata.get(XMPMM.HISTORY_SOFTWARE_AGENT));
assertEquals("xmp.iid:63681182-81a0-4035-b4b2-19bea6201c05", metadata.get(XMPMM.HISTORY_EVENT_INSTANCEID));
}
+
+ @Test (expected = TikaException.class)
+ public void testMaxLength() throws Exception {
+ TikaConfig config = new TikaConfig(getResourceAsStream("tika-config-TIKA-3243.xml"));
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/x-psd");
+ getXML("testPSD_xmp.psd", config.getParser(), metadata);
+ }
}
diff --git a/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/test/resources/org/apache/tika/parser/image/tika-config-TIKA-3243.xml b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/test/resources/org/apache/tika/parser/image/tika-config-TIKA-3243.xml
new file mode 100644
index 0000000..a3230ee
--- /dev/null
+++ b/tika-parsers/tika-parsers-classic/tika-parsers-classic-modules/tika-parser-image-module/src/test/resources/org/apache/tika/parser/image/tika-config-TIKA-3243.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<properties>
+ <parsers>
+ <parser class="org.apache.tika.parser.DefaultParser">
+ <parser-exclude class="org.apache.tika.parser.image.PSDParser"/>
+ </parser>
+ <parser class="org.apache.tika.parser.image.PSDParser">
+ <params>
+ <param name="maxDataLengthBytes" type="int">100</param>
+ </params>
+ </parser>
+ </parsers>
+</properties>
\ No newline at end of file