You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/10/12 14:54:57 UTC
[tika] branch main updated: TIKA-3569 -- fix npe when mediatype is
null
This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new dd2799f TIKA-3569 -- fix npe when mediatype is null
dd2799f is described below
commit dd2799f1acc7a2bf61e11afac7a3d4ea065baf33
Author: tballison <ta...@apache.org>
AuthorDate: Tue Oct 12 10:54:48 2021 -0400
TIKA-3569 -- fix npe when mediatype is null
---
.../tika/parser/image/AbstractImageParser.java | 14 ++-
.../apache/tika/parser/image/ImageParserTest.java | 103 ++++++++++-----------
2 files changed, 59 insertions(+), 58 deletions(-)
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/AbstractImageParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/AbstractImageParser.java
index fab2b90..d58fca0 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/AbstractImageParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/AbstractImageParser.java
@@ -42,7 +42,15 @@ public abstract class AbstractImageParser extends AbstractParser {
public static String OCR_MEDIATYPE_PREFIX = "ocr-";
+ /**
+ *
+ * @param mediaType
+ * @return ocr media type if mediatype is not null; returns null if mediatype is null
+ */
static MediaType convertToOCRMediaType(MediaType mediaType) {
+ if (mediaType == null) {
+ return null;
+ }
return new MediaType(mediaType.getType(), OCR_MEDIATYPE_PREFIX + mediaType.getSubtype());
}
@@ -61,11 +69,13 @@ public abstract class AbstractImageParser extends AbstractParser {
ParseContext context) throws IOException, SAXException, TikaException {
String mediaTypeString = metadata.get(Metadata.CONTENT_TYPE);
- //note: mediaType can be null
+ //note: mediaType can be null if mediaTypeString is null or
+ //not parseable.
MediaType mediaType = normalizeMediaType(MediaType.parse(mediaTypeString));
MediaType ocrMediaType = convertToOCRMediaType(mediaType);
Parser ocrParser = EmbeddedDocumentUtil.getStatelessParser(context);
- if (ocrParser == null || !ocrParser.getSupportedTypes(context).contains(ocrMediaType)) {
+ if (ocrMediaType == null ||
+ ocrParser == null || !ocrParser.getSupportedTypes(context).contains(ocrMediaType)) {
extractMetadata(stream, handler, metadata, context);
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
index 9892553..6bdc54d 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
@@ -71,38 +71,30 @@ public class ImageParserTest extends TikaTest {
assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
assertEquals("lzw", metadata.get("Compression CompressionTypeName"));
assertEquals("0", metadata.get("Dimension HorizontalPixelOffset"));
- assertEquals(
- "imageLeftPosition=0, imageTopPosition=0, imageWidth=100, " +
- "imageHeight=75, interlaceFlag=false",
- metadata.get("ImageDescriptor"));
+ assertEquals("imageLeftPosition=0, imageTopPosition=0, imageWidth=100, " +
+ "imageHeight=75, interlaceFlag=false", metadata.get("ImageDescriptor"));
assertEquals("Index", metadata.get("Data SampleFormat"));
assertEquals("3", metadata.get("Chroma NumChannels"));
assertEquals("1", metadata.get("Compression NumProgressiveScans"));
assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
- assertEquals(
- "Licensed to the Apache Software Foundation (ASF) under " +
- "one or more contributor license agreements. See the NOTICE file " +
- "distributed with this work for additional information regarding " +
- "copyright ownership.",
- metadata.get("CommentExtensions CommentExtension"));
- assertEquals(
- "value=Licensed to the Apache Software Foundation (ASF) under one " +
+ assertEquals("Licensed to the Apache Software Foundation (ASF) under " +
+ "one or more contributor license agreements. See the NOTICE file " +
+ "distributed with this work for additional information regarding " +
+ "copyright ownership.", metadata.get("CommentExtensions CommentExtension"));
+ assertEquals("value=Licensed to the Apache Software Foundation (ASF) under one " +
"or more contributor license agreements. See the NOTICE file " +
"distributed with this work for additional information regarding " +
"copyright ownership., encoding=ISO-8859-1, compression=none",
metadata.get("Text TextEntry"));
assertEquals("true", metadata.get("Chroma BlackIsZero"));
- assertEquals(
- "disposalMethod=none, userInputFlag=false, transparentColorFlag=false, " +
- "delayTime=0, transparentColorIndex=0",
- metadata.get("GraphicControlExtension"));
+ assertEquals("disposalMethod=none, userInputFlag=false, transparentColorFlag=false, " +
+ "delayTime=0, transparentColorIndex=0", metadata.get("GraphicControlExtension"));
assertEquals("0", metadata.get("Dimension VerticalPixelOffset"));
assertEquals("image/gif", metadata.get("Content-Type"));
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
- assertEquals(
- "Licensed to the Apache Software Foundation (ASF) under one or " +
+ assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " +
"more contributor license agreements. See the NOTICE file distributed " +
"with this work for additional information regarding copyright ownership.",
metadata.get(TikaCoreProperties.COMMENTS));
@@ -121,51 +113,39 @@ public class ImageParserTest extends TikaTest {
assertEquals("0.35277778", metadata.get("Dimension VerticalPixelSize"));
assertEquals("false", metadata.get("Compression Lossless"));
assertEquals("class=0, htableId=0", metadata.get("markerSequence dht dhtable"));
- assertEquals(
- "majorVersion=1, minorVersion=1, resUnits=1, Xdensity=72, " +
- "Ydensity=72, thumbWidth=0, thumbHeight=0",
- metadata.get("JPEGvariety app0JFIF"));
+ assertEquals("majorVersion=1, minorVersion=1, resUnits=1, Xdensity=72, " +
+ "Ydensity=72, thumbWidth=0, thumbHeight=0", metadata.get("JPEGvariety app0JFIF"));
assertEquals("225", metadata.get("markerSequence unknown"));
assertEquals("componentSelector=1, dcHuffTable=0, acHuffTable=0",
metadata.get("markerSequence sos scanComponentSpec"));
assertEquals("normal", metadata.get("Dimension ImageOrientation"));
assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
- assertEquals("elementPrecision=0, qtableId=0",
- metadata.get("markerSequence dqt dqtable"));
- assertEquals(
- "numScanComponents=3, startSpectralSelection=0, " +
+ assertEquals("elementPrecision=0, qtableId=0", metadata.get("markerSequence dqt dqtable"));
+ assertEquals("numScanComponents=3, startSpectralSelection=0, " +
"endSpectralSelection=63, approxHigh=0, approxLow=0",
metadata.get("markerSequence sos"));
- assertEquals("componentId=1, HsamplingFactor=1, " +
- "VsamplingFactor=1, QtableSelector=0",
+ assertEquals("componentId=1, HsamplingFactor=1, " + "VsamplingFactor=1, QtableSelector=0",
metadata.get("markerSequence sof componentSpec"));
assertEquals("JPEG", metadata.get("Compression CompressionTypeName"));
assertEquals("0.35277778", metadata.get("Dimension HorizontalPixelSize"));
- assertEquals(
- "Licensed to the Apache Software Foundation (ASF) under one or " +
- "more contributor license agreements. See the NOTICE file " +
- "distributed with this work for additional information " +
- "regarding copyright ownership.",
- metadata.get("markerSequence com"));
+ assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " +
+ "more contributor license agreements. See the NOTICE file " +
+ "distributed with this work for additional information " +
+ "regarding copyright ownership.", metadata.get("markerSequence com"));
assertEquals("3", metadata.get("Chroma NumChannels"));
assertEquals("1", metadata.get("Compression NumProgressiveScans"));
assertEquals("YCbCr", metadata.get("Chroma ColorSpaceType"));
- assertEquals(
- "keyword=comment, value=Licensed to the Apache Software Foundation " +
- "(ASF) under one or more contributor license agreements. See the NOTICE" +
- " file distributed with this work for additional information regarding " +
- "copyright ownership.",
- metadata.get("Text TextEntry"));
+ assertEquals("keyword=comment, value=Licensed to the Apache Software Foundation " +
+ "(ASF) under one or more contributor license agreements. See the NOTICE" +
+ " file distributed with this work for additional information regarding " +
+ "copyright ownership.", metadata.get("Text TextEntry"));
assertEquals("image/jpeg", metadata.get("Content-Type"));
- assertEquals(
- "process=0, samplePrecision=8, numLines=75, samplesPerLine=100, " +
- "numFrameComponents=3",
- metadata.get("markerSequence sof"));
+ assertEquals("process=0, samplePrecision=8, numLines=75, samplesPerLine=100, " +
+ "numFrameComponents=3", metadata.get("markerSequence sof"));
assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
- assertEquals(
- "Licensed to the Apache Software Foundation (ASF) under one or " +
+ assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " +
"more contributor license agreements. See the NOTICE file distributed " +
"with this work for additional information regarding copyright ownership.",
metadata.get(TikaCoreProperties.COMMENTS));
@@ -189,12 +169,10 @@ public class ImageParserTest extends TikaTest {
metadata.get("tIME"));
assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
- assertEquals(
- "keyword=Comment, value=Licensed to the Apache Software Foundation " +
- "(ASF) under one or more contributor license agreements. See the " +
- "NOTICE file distributed with this work for additional information " +
- "regarding copyright ownership.",
- metadata.get("tEXt tEXtEntry"));
+ assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation " +
+ "(ASF) under one or more contributor license agreements. See the " +
+ "NOTICE file distributed with this work for additional information " +
+ "regarding copyright ownership.", metadata.get("tEXt tEXtEntry"));
assertEquals("deflate", metadata.get("Compression CompressionTypeName"));
assertEquals("UnsignedIntegral", metadata.get("Data SampleFormat"));
assertEquals("0.35273367", metadata.get("Dimension HorizontalPixelSize"));
@@ -204,15 +182,13 @@ public class ImageParserTest extends TikaTest {
assertEquals("3", metadata.get("Chroma NumChannels"));
assertEquals("1", metadata.get("Compression NumProgressiveScans"));
assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
- assertEquals(
- "keyword=Comment, value=Licensed to the Apache Software Foundation " +
+ assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation " +
"(ASF) under one or more contributor license agreements. See the " +
"NOTICE file distributed with this work for additional information " +
"regarding copyright ownership., encoding=ISO-8859-1, compression=none",
metadata.get("Text TextEntry"));
assertEquals("PixelInterleaved", metadata.get("Data PlanarConfiguration"));
- assertEquals(
- "width=100, height=75, bitDepth=8, colorType=RGB, " +
+ assertEquals("width=100, height=75, bitDepth=8, colorType=RGB, " +
"compressionMethod=deflate, filterMethod=adaptive, interlaceMethod=none",
metadata.get("IHDR"));
assertEquals("true", metadata.get("Chroma BlackIsZero"));
@@ -242,4 +218,19 @@ public class ImageParserTest extends TikaTest {
assertEquals(new MediaType("image", "OCR-png"),
AbstractImageParser.convertToOCRMediaType(MediaType.image("png")));
}
+
+ @Test
+ public void testNPEOnEmptyContentType() throws Exception {
+ //test no NPE TIKA-3569
+ Metadata metadata = new Metadata();
+ try (InputStream stream = getResourceAsStream("/test-documents/testBMP.bmp")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
+
+ metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "unparseablegarbage");
+ try (InputStream stream = getResourceAsStream("/test-documents/testBMP.bmp")) {
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ }
+ }
}