You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2021/10/12 14:54:57 UTC

[tika] branch main updated: TIKA-3569 -- fix npe when mediatype is null

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new dd2799f  TIKA-3569 -- fix npe when mediatype is null
dd2799f is described below

commit dd2799f1acc7a2bf61e11afac7a3d4ea065baf33
Author: tballison <ta...@apache.org>
AuthorDate: Tue Oct 12 10:54:48 2021 -0400

    TIKA-3569 -- fix npe when mediatype is null
---
 .../tika/parser/image/AbstractImageParser.java     |  14 ++-
 .../apache/tika/parser/image/ImageParserTest.java  | 103 ++++++++++-----------
 2 files changed, 59 insertions(+), 58 deletions(-)

diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/AbstractImageParser.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/AbstractImageParser.java
index fab2b90..d58fca0 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/AbstractImageParser.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/main/java/org/apache/tika/parser/image/AbstractImageParser.java
@@ -42,7 +42,15 @@ public abstract class AbstractImageParser extends AbstractParser {
 
     public static String OCR_MEDIATYPE_PREFIX = "ocr-";
 
+    /**
+     *
+     * @param mediaType
+     * @return ocr media type if mediatype is not null; returns null if mediatype is null
+     */
     static MediaType convertToOCRMediaType(MediaType mediaType) {
+        if (mediaType == null) {
+            return null;
+        }
         return new MediaType(mediaType.getType(), OCR_MEDIATYPE_PREFIX + mediaType.getSubtype());
     }
 
@@ -61,11 +69,13 @@ public abstract class AbstractImageParser extends AbstractParser {
                       ParseContext context) throws IOException, SAXException, TikaException {
 
         String mediaTypeString = metadata.get(Metadata.CONTENT_TYPE);
-        //note: mediaType can be null
+        //note: mediaType can be null if mediaTypeString is null or
+        //not parseable.
         MediaType mediaType = normalizeMediaType(MediaType.parse(mediaTypeString));
         MediaType ocrMediaType = convertToOCRMediaType(mediaType);
         Parser ocrParser = EmbeddedDocumentUtil.getStatelessParser(context);
-        if (ocrParser == null || !ocrParser.getSupportedTypes(context).contains(ocrMediaType)) {
+        if (ocrMediaType == null ||
+                ocrParser == null || !ocrParser.getSupportedTypes(context).contains(ocrMediaType)) {
             extractMetadata(stream, handler, metadata, context);
             XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
             xhtml.startDocument();
diff --git a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
index 9892553..6bdc54d 100644
--- a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
+++ b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-image-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
@@ -71,38 +71,30 @@ public class ImageParserTest extends TikaTest {
         assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
         assertEquals("lzw", metadata.get("Compression CompressionTypeName"));
         assertEquals("0", metadata.get("Dimension HorizontalPixelOffset"));
-        assertEquals(
-                "imageLeftPosition=0, imageTopPosition=0, imageWidth=100, " +
-                        "imageHeight=75, interlaceFlag=false",
-                metadata.get("ImageDescriptor"));
+        assertEquals("imageLeftPosition=0, imageTopPosition=0, imageWidth=100, " +
+                "imageHeight=75, interlaceFlag=false", metadata.get("ImageDescriptor"));
         assertEquals("Index", metadata.get("Data SampleFormat"));
         assertEquals("3", metadata.get("Chroma NumChannels"));
         assertEquals("1", metadata.get("Compression NumProgressiveScans"));
         assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
-        assertEquals(
-                "Licensed to the Apache Software Foundation (ASF) under " +
-                        "one or more contributor license agreements.  See the NOTICE file " +
-                        "distributed with this work for additional information regarding " +
-                        "copyright ownership.",
-                metadata.get("CommentExtensions CommentExtension"));
-        assertEquals(
-                "value=Licensed to the Apache Software Foundation (ASF) under one " +
+        assertEquals("Licensed to the Apache Software Foundation (ASF) under " +
+                "one or more contributor license agreements.  See the NOTICE file " +
+                "distributed with this work for additional information regarding " +
+                "copyright ownership.", metadata.get("CommentExtensions CommentExtension"));
+        assertEquals("value=Licensed to the Apache Software Foundation (ASF) under one " +
                         "or more contributor license agreements.  See the NOTICE file " +
                         "distributed with this work for additional information regarding " +
                         "copyright ownership., encoding=ISO-8859-1, compression=none",
                 metadata.get("Text TextEntry"));
         assertEquals("true", metadata.get("Chroma BlackIsZero"));
-        assertEquals(
-                "disposalMethod=none, userInputFlag=false, transparentColorFlag=false, " +
-                        "delayTime=0, transparentColorIndex=0",
-                metadata.get("GraphicControlExtension"));
+        assertEquals("disposalMethod=none, userInputFlag=false, transparentColorFlag=false, " +
+                "delayTime=0, transparentColorIndex=0", metadata.get("GraphicControlExtension"));
         assertEquals("0", metadata.get("Dimension VerticalPixelOffset"));
         assertEquals("image/gif", metadata.get("Content-Type"));
 
         assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
         assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
-        assertEquals(
-                "Licensed to the Apache Software Foundation (ASF) under one or " +
+        assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " +
                         "more contributor license agreements.  See the NOTICE file distributed " +
                         "with this work for additional information regarding copyright ownership.",
                 metadata.get(TikaCoreProperties.COMMENTS));
@@ -121,51 +113,39 @@ public class ImageParserTest extends TikaTest {
         assertEquals("0.35277778", metadata.get("Dimension VerticalPixelSize"));
         assertEquals("false", metadata.get("Compression Lossless"));
         assertEquals("class=0, htableId=0", metadata.get("markerSequence dht dhtable"));
-        assertEquals(
-                "majorVersion=1, minorVersion=1, resUnits=1, Xdensity=72, " +
-                        "Ydensity=72, thumbWidth=0, thumbHeight=0",
-                metadata.get("JPEGvariety app0JFIF"));
+        assertEquals("majorVersion=1, minorVersion=1, resUnits=1, Xdensity=72, " +
+                "Ydensity=72, thumbWidth=0, thumbHeight=0", metadata.get("JPEGvariety app0JFIF"));
         assertEquals("225", metadata.get("markerSequence unknown"));
         assertEquals("componentSelector=1, dcHuffTable=0, acHuffTable=0",
                 metadata.get("markerSequence sos scanComponentSpec"));
         assertEquals("normal", metadata.get("Dimension ImageOrientation"));
         assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
-        assertEquals("elementPrecision=0, qtableId=0",
-                metadata.get("markerSequence dqt dqtable"));
-        assertEquals(
-                "numScanComponents=3, startSpectralSelection=0, " +
+        assertEquals("elementPrecision=0, qtableId=0", metadata.get("markerSequence dqt dqtable"));
+        assertEquals("numScanComponents=3, startSpectralSelection=0, " +
                         "endSpectralSelection=63, approxHigh=0, approxLow=0",
                 metadata.get("markerSequence sos"));
-        assertEquals("componentId=1, HsamplingFactor=1, " +
-                        "VsamplingFactor=1, QtableSelector=0",
+        assertEquals("componentId=1, HsamplingFactor=1, " + "VsamplingFactor=1, QtableSelector=0",
                 metadata.get("markerSequence sof componentSpec"));
         assertEquals("JPEG", metadata.get("Compression CompressionTypeName"));
         assertEquals("0.35277778", metadata.get("Dimension HorizontalPixelSize"));
-        assertEquals(
-                "Licensed to the Apache Software Foundation (ASF) under one or " +
-                        "more contributor license agreements.  See the NOTICE file " +
-                        "distributed with this work for additional information " +
-                        "regarding copyright ownership.",
-                metadata.get("markerSequence com"));
+        assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " +
+                "more contributor license agreements.  See the NOTICE file " +
+                "distributed with this work for additional information " +
+                "regarding copyright ownership.", metadata.get("markerSequence com"));
         assertEquals("3", metadata.get("Chroma NumChannels"));
         assertEquals("1", metadata.get("Compression NumProgressiveScans"));
         assertEquals("YCbCr", metadata.get("Chroma ColorSpaceType"));
-        assertEquals(
-                "keyword=comment, value=Licensed to the Apache Software Foundation " +
-                        "(ASF) under one or more contributor license agreements.  See the NOTICE" +
-                        " file distributed with this work for additional information regarding " +
-                        "copyright ownership.",
-                metadata.get("Text TextEntry"));
+        assertEquals("keyword=comment, value=Licensed to the Apache Software Foundation " +
+                "(ASF) under one or more contributor license agreements.  See the NOTICE" +
+                " file distributed with this work for additional information regarding " +
+                "copyright ownership.", metadata.get("Text TextEntry"));
         assertEquals("image/jpeg", metadata.get("Content-Type"));
-        assertEquals(
-                "process=0, samplePrecision=8, numLines=75, samplesPerLine=100, " +
-                        "numFrameComponents=3",
-                metadata.get("markerSequence sof"));
+        assertEquals("process=0, samplePrecision=8, numLines=75, samplesPerLine=100, " +
+                "numFrameComponents=3", metadata.get("markerSequence sof"));
 
         assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
         assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
-        assertEquals(
-                "Licensed to the Apache Software Foundation (ASF) under one or " +
+        assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " +
                         "more contributor license agreements.  See the NOTICE file distributed " +
                         "with this work for additional information regarding copyright ownership.",
                 metadata.get(TikaCoreProperties.COMMENTS));
@@ -189,12 +169,10 @@ public class ImageParserTest extends TikaTest {
                 metadata.get("tIME"));
         assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
         assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
-        assertEquals(
-                "keyword=Comment, value=Licensed to the Apache Software Foundation " +
-                        "(ASF) under one or more contributor license agreements.  See the " +
-                        "NOTICE file distributed with this work for additional information " +
-                        "regarding copyright ownership.",
-                metadata.get("tEXt tEXtEntry"));
+        assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation " +
+                "(ASF) under one or more contributor license agreements.  See the " +
+                "NOTICE file distributed with this work for additional information " +
+                "regarding copyright ownership.", metadata.get("tEXt tEXtEntry"));
         assertEquals("deflate", metadata.get("Compression CompressionTypeName"));
         assertEquals("UnsignedIntegral", metadata.get("Data SampleFormat"));
         assertEquals("0.35273367", metadata.get("Dimension HorizontalPixelSize"));
@@ -204,15 +182,13 @@ public class ImageParserTest extends TikaTest {
         assertEquals("3", metadata.get("Chroma NumChannels"));
         assertEquals("1", metadata.get("Compression NumProgressiveScans"));
         assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
-        assertEquals(
-                "keyword=Comment, value=Licensed to the Apache Software Foundation " +
+        assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation " +
                         "(ASF) under one or more contributor license agreements.  See the " +
                         "NOTICE file distributed with this work for additional information " +
                         "regarding copyright ownership., encoding=ISO-8859-1, compression=none",
                 metadata.get("Text TextEntry"));
         assertEquals("PixelInterleaved", metadata.get("Data PlanarConfiguration"));
-        assertEquals(
-                "width=100, height=75, bitDepth=8, colorType=RGB, " +
+        assertEquals("width=100, height=75, bitDepth=8, colorType=RGB, " +
                         "compressionMethod=deflate, filterMethod=adaptive, interlaceMethod=none",
                 metadata.get("IHDR"));
         assertEquals("true", metadata.get("Chroma BlackIsZero"));
@@ -242,4 +218,19 @@ public class ImageParserTest extends TikaTest {
         assertEquals(new MediaType("image", "OCR-png"),
                 AbstractImageParser.convertToOCRMediaType(MediaType.image("png")));
     }
+
+    @Test
+    public void testNPEOnEmptyContentType() throws Exception {
+        //test no NPE TIKA-3569
+        Metadata metadata = new Metadata();
+        try (InputStream stream = getResourceAsStream("/test-documents/testBMP.bmp")) {
+            parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+        }
+
+        metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "unparseablegarbage");
+        try (InputStream stream = getResourceAsStream("/test-documents/testBMP.bmp")) {
+            parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+        }
+    }
 }