You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ta...@apache.org on 2016/06/29 11:11:36 UTC
[30/39] tika git commit: Convert new lines from windows to unix
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
index 98970d9..83d72c9 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
@@ -1,162 +1,162 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.image;
-
-import static org.junit.Assert.assertEquals;
-
-import java.io.InputStream;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.junit.Test;
-import org.xml.sax.helpers.DefaultHandler;
-
-public class ImageParserTest {
-
- private final Parser parser = new ImageParser();
-
- @Test
- public void testBMP() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/bmp");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testBMP.bmp");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- assertEquals("75", metadata.get("height"));
- assertEquals("100", metadata.get("width"));
- assertEquals("8 8 8", metadata.get("Data BitsPerSample"));
- assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
- //TODO: figure out why we're getting 0.35273367 in Ubuntu, but not Windows
- //assertEquals("0", metadata.get("Dimension VerticalPhysicalPixelSpacing"));
- //assertEquals("0", metadata.get("Dimension HorizontalPhysicalPixelSpacing"));
- assertEquals("BI_RGB", metadata.get("Compression CompressionTypeName"));
- assertEquals("image/bmp", metadata.get("Content-Type"));
-
- assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
- assertEquals("8 8 8", metadata.get(Metadata.BITS_PER_SAMPLE));
- }
-
- @Test
- public void testGIF() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/gif");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testGIF.gif");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- assertEquals("75", metadata.get("height"));
- assertEquals("100", metadata.get("width"));
- assertEquals("true", metadata.get("Compression Lossless"));
- assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
- assertEquals("lzw", metadata.get("Compression CompressionTypeName"));
- assertEquals("0", metadata.get("Dimension HorizontalPixelOffset"));
- assertEquals("imageLeftPosition=0, imageTopPosition=0, imageWidth=100, imageHeight=75, interlaceFlag=false", metadata.get("ImageDescriptor"));
- assertEquals("Index", metadata.get("Data SampleFormat"));
- assertEquals("3", metadata.get("Chroma NumChannels"));
- assertEquals("1", metadata.get("Compression NumProgressiveScans"));
- assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
- assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("CommentExtensions CommentExtension"));
- assertEquals("value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership., encoding=ISO-8859-1, compression=none", metadata.get("Text TextEntry"));
- assertEquals("true", metadata.get("Chroma BlackIsZero"));
- assertEquals("disposalMethod=none, userInputFlag=false, transparentColorFlag=false, delayTime=0, transparentColorIndex=0", metadata.get("GraphicControlExtension"));
- assertEquals("0", metadata.get("Dimension VerticalPixelOffset"));
- assertEquals("image/gif", metadata.get("Content-Type"));
-
- assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
- assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get(TikaCoreProperties.COMMENTS));
- }
-
- @Test
- public void testJPEG() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- assertEquals("75", metadata.get("height"));
- assertEquals("100", metadata.get("width"));
- assertEquals("0.35277778", metadata.get("Dimension VerticalPixelSize"));
- assertEquals("false", metadata.get("Compression Lossless"));
- assertEquals("class=0, htableId=0", metadata.get("markerSequence dht dhtable"));
- assertEquals("majorVersion=1, minorVersion=1, resUnits=1, Xdensity=72, Ydensity=72, thumbWidth=0, thumbHeight=0", metadata.get("JPEGvariety app0JFIF"));
- assertEquals("225", metadata.get("markerSequence unknown"));
- assertEquals("componentSelector=1, dcHuffTable=0, acHuffTable=0", metadata.get("markerSequence sos scanComponentSpec"));
- assertEquals("normal", metadata.get("Dimension ImageOrientation"));
- assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
- assertEquals("elementPrecision=0, qtableId=0", metadata.get("markerSequence dqt dqtable"));
- assertEquals("numScanComponents=3, startSpectralSelection=0, endSpectralSelection=63, approxHigh=0, approxLow=0", metadata.get("markerSequence sos"));
- assertEquals("componentId=1, HsamplingFactor=1, VsamplingFactor=1, QtableSelector=0", metadata.get("markerSequence sof componentSpec"));
- assertEquals("JPEG", metadata.get("Compression CompressionTypeName"));
- assertEquals("0.35277778", metadata.get("Dimension HorizontalPixelSize"));
- assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("markerSequence com"));
- assertEquals("3", metadata.get("Chroma NumChannels"));
- assertEquals("1", metadata.get("Compression NumProgressiveScans"));
- assertEquals("YCbCr", metadata.get("Chroma ColorSpaceType"));
- assertEquals("keyword=comment, value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("Text TextEntry"));
- assertEquals("image/jpeg", metadata.get("Content-Type"));
- assertEquals("process=0, samplePrecision=8, numLines=75, samplesPerLine=100, numFrameComponents=3", metadata.get("markerSequence sof"));
-
- assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
- assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get(TikaCoreProperties.COMMENTS));
- }
-
- @Test
- public void testPNG() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/png");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testPNG.png");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- assertEquals("75", metadata.get("height"));
- assertEquals("100", metadata.get("width"));
- assertEquals("0.35273367", metadata.get("Dimension VerticalPixelSize"));
- assertEquals("8 8 8", metadata.get("Data BitsPerSample"));
- assertEquals("Perceptual", metadata.get("sRGB"));
- assertEquals("true", metadata.get("Compression Lossless"));
- assertEquals("year=2008, month=5, day=6, hour=6, minute=18, second=47", metadata.get("tIME"));
- assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
- assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
- assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("tEXt tEXtEntry"));
- assertEquals("deflate", metadata.get("Compression CompressionTypeName"));
- assertEquals("UnsignedIntegral", metadata.get("Data SampleFormat"));
- assertEquals("0.35273367", metadata.get("Dimension HorizontalPixelSize"));
- assertEquals("none", metadata.get("Transparency Alpha"));
- assertEquals("pixelsPerUnitXAxis=2835, pixelsPerUnitYAxis=2835, unitSpecifier=meter", metadata.get("pHYs"));
- assertEquals("3", metadata.get("Chroma NumChannels"));
- assertEquals("1", metadata.get("Compression NumProgressiveScans"));
- assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
- assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership., encoding=ISO-8859-1, compression=none", metadata.get("Text TextEntry"));
- assertEquals("PixelInterleaved", metadata.get("Data PlanarConfiguration"));
- assertEquals("width=100, height=75, bitDepth=8, colorType=RGB, compressionMethod=deflate, filterMethod=adaptive, interlaceMethod=none", metadata.get("IHDR"));
- assertEquals("true", metadata.get("Chroma BlackIsZero"));
- assertEquals("year=2008, month=5, day=6, hour=6, minute=18, second=47", metadata.get("Document ImageModificationTime"));
- assertEquals("image/png", metadata.get("Content-Type"));
-
- assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
- assertEquals("8 8 8", metadata.get(Metadata.BITS_PER_SAMPLE));
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.junit.Test;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class ImageParserTest {
+
+ private final Parser parser = new ImageParser();
+
+ @Test
+ public void testBMP() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/bmp");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testBMP.bmp");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ assertEquals("75", metadata.get("height"));
+ assertEquals("100", metadata.get("width"));
+ assertEquals("8 8 8", metadata.get("Data BitsPerSample"));
+ assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
+ //TODO: figure out why we're getting 0.35273367 in Ubuntu, but not Windows
+ //assertEquals("0", metadata.get("Dimension VerticalPhysicalPixelSpacing"));
+ //assertEquals("0", metadata.get("Dimension HorizontalPhysicalPixelSpacing"));
+ assertEquals("BI_RGB", metadata.get("Compression CompressionTypeName"));
+ assertEquals("image/bmp", metadata.get("Content-Type"));
+
+ assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("8 8 8", metadata.get(Metadata.BITS_PER_SAMPLE));
+ }
+
+ @Test
+ public void testGIF() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/gif");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testGIF.gif");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ assertEquals("75", metadata.get("height"));
+ assertEquals("100", metadata.get("width"));
+ assertEquals("true", metadata.get("Compression Lossless"));
+ assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
+ assertEquals("lzw", metadata.get("Compression CompressionTypeName"));
+ assertEquals("0", metadata.get("Dimension HorizontalPixelOffset"));
+ assertEquals("imageLeftPosition=0, imageTopPosition=0, imageWidth=100, imageHeight=75, interlaceFlag=false", metadata.get("ImageDescriptor"));
+ assertEquals("Index", metadata.get("Data SampleFormat"));
+ assertEquals("3", metadata.get("Chroma NumChannels"));
+ assertEquals("1", metadata.get("Compression NumProgressiveScans"));
+ assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
+ assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("CommentExtensions CommentExtension"));
+ assertEquals("value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership., encoding=ISO-8859-1, compression=none", metadata.get("Text TextEntry"));
+ assertEquals("true", metadata.get("Chroma BlackIsZero"));
+ assertEquals("disposalMethod=none, userInputFlag=false, transparentColorFlag=false, delayTime=0, transparentColorIndex=0", metadata.get("GraphicControlExtension"));
+ assertEquals("0", metadata.get("Dimension VerticalPixelOffset"));
+ assertEquals("image/gif", metadata.get("Content-Type"));
+
+ assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get(TikaCoreProperties.COMMENTS));
+ }
+
+ @Test
+ public void testJPEG() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ assertEquals("75", metadata.get("height"));
+ assertEquals("100", metadata.get("width"));
+ assertEquals("0.35277778", metadata.get("Dimension VerticalPixelSize"));
+ assertEquals("false", metadata.get("Compression Lossless"));
+ assertEquals("class=0, htableId=0", metadata.get("markerSequence dht dhtable"));
+ assertEquals("majorVersion=1, minorVersion=1, resUnits=1, Xdensity=72, Ydensity=72, thumbWidth=0, thumbHeight=0", metadata.get("JPEGvariety app0JFIF"));
+ assertEquals("225", metadata.get("markerSequence unknown"));
+ assertEquals("componentSelector=1, dcHuffTable=0, acHuffTable=0", metadata.get("markerSequence sos scanComponentSpec"));
+ assertEquals("normal", metadata.get("Dimension ImageOrientation"));
+ assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
+ assertEquals("elementPrecision=0, qtableId=0", metadata.get("markerSequence dqt dqtable"));
+ assertEquals("numScanComponents=3, startSpectralSelection=0, endSpectralSelection=63, approxHigh=0, approxLow=0", metadata.get("markerSequence sos"));
+ assertEquals("componentId=1, HsamplingFactor=1, VsamplingFactor=1, QtableSelector=0", metadata.get("markerSequence sof componentSpec"));
+ assertEquals("JPEG", metadata.get("Compression CompressionTypeName"));
+ assertEquals("0.35277778", metadata.get("Dimension HorizontalPixelSize"));
+ assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("markerSequence com"));
+ assertEquals("3", metadata.get("Chroma NumChannels"));
+ assertEquals("1", metadata.get("Compression NumProgressiveScans"));
+ assertEquals("YCbCr", metadata.get("Chroma ColorSpaceType"));
+ assertEquals("keyword=comment, value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("Text TextEntry"));
+ assertEquals("image/jpeg", metadata.get("Content-Type"));
+ assertEquals("process=0, samplePrecision=8, numLines=75, samplesPerLine=100, numFrameComponents=3", metadata.get("markerSequence sof"));
+
+ assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get(TikaCoreProperties.COMMENTS));
+ }
+
+ @Test
+ public void testPNG() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/png");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testPNG.png");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ assertEquals("75", metadata.get("height"));
+ assertEquals("100", metadata.get("width"));
+ assertEquals("0.35273367", metadata.get("Dimension VerticalPixelSize"));
+ assertEquals("8 8 8", metadata.get("Data BitsPerSample"));
+ assertEquals("Perceptual", metadata.get("sRGB"));
+ assertEquals("true", metadata.get("Compression Lossless"));
+ assertEquals("year=2008, month=5, day=6, hour=6, minute=18, second=47", metadata.get("tIME"));
+ assertEquals("Normal", metadata.get("Dimension ImageOrientation"));
+ assertEquals("1.0", metadata.get("Dimension PixelAspectRatio"));
+ assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership.", metadata.get("tEXt tEXtEntry"));
+ assertEquals("deflate", metadata.get("Compression CompressionTypeName"));
+ assertEquals("UnsignedIntegral", metadata.get("Data SampleFormat"));
+ assertEquals("0.35273367", metadata.get("Dimension HorizontalPixelSize"));
+ assertEquals("none", metadata.get("Transparency Alpha"));
+ assertEquals("pixelsPerUnitXAxis=2835, pixelsPerUnitYAxis=2835, unitSpecifier=meter", metadata.get("pHYs"));
+ assertEquals("3", metadata.get("Chroma NumChannels"));
+ assertEquals("1", metadata.get("Compression NumProgressiveScans"));
+ assertEquals("RGB", metadata.get("Chroma ColorSpaceType"));
+ assertEquals("keyword=Comment, value=Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership., encoding=ISO-8859-1, compression=none", metadata.get("Text TextEntry"));
+ assertEquals("PixelInterleaved", metadata.get("Data PlanarConfiguration"));
+ assertEquals("width=100, height=75, bitDepth=8, colorType=RGB, compressionMethod=deflate, filterMethod=adaptive, interlaceMethod=none", metadata.get("IHDR"));
+ assertEquals("true", metadata.get("Chroma BlackIsZero"));
+ assertEquals("year=2008, month=5, day=6, hour=6, minute=18, second=47", metadata.get("Document ImageModificationTime"));
+ assertEquals("image/png", metadata.get("Content-Type"));
+
+ assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("8 8 8", metadata.get(Metadata.BITS_PER_SAMPLE));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/MetadataFieldsTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/MetadataFieldsTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/MetadataFieldsTest.java
index b78a831..7e3a123 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/MetadataFieldsTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/MetadataFieldsTest.java
@@ -1,36 +1,36 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.image;
-
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import org.apache.tika.metadata.TIFF;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.junit.Test;
-
-public class MetadataFieldsTest {
-
- @Test
- public void testIsMetadataField() {
- assertFalse(MetadataFields.isMetadataField("random string that is not a field"));
- assertFalse(MetadataFields.isMetadataField("xyz"));
- assertTrue(MetadataFields.isMetadataField(TikaCoreProperties.KEYWORDS));
- assertTrue(MetadataFields.isMetadataField(TIFF.F_NUMBER.getName()));
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.tika.metadata.TIFF;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.junit.Test;
+
+public class MetadataFieldsTest {
+
+ @Test
+ public void testIsMetadataField() {
+ assertFalse(MetadataFields.isMetadataField("random string that is not a field"));
+ assertFalse(MetadataFields.isMetadataField("xyz"));
+ assertTrue(MetadataFields.isMetadataField(TikaCoreProperties.KEYWORDS));
+ assertTrue(MetadataFields.isMetadataField(TIFF.F_NUMBER.getName()));
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/TiffParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/TiffParserTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/TiffParserTest.java
index 239c160..d506c33 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/TiffParserTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/image/TiffParserTest.java
@@ -1,66 +1,66 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.image;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.List;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.junit.Test;
-import org.xml.sax.helpers.DefaultHandler;
-
-public class TiffParserTest {
- private final Parser parser = new TiffParser();
-
- @Test
- public void testTIFF() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/tiff");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testTIFF.tif");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " +
- "more contributor license agreements. See the NOTICE file " +
- "distributed with this work for additional information regarding " +
- "copyright ownership.", metadata.get(TikaCoreProperties.DESCRIPTION));
-
- // All EXIF/TIFF tags
- assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
-
- // Core EXIF/TIFF tags
- assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
- assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
- assertEquals("3", metadata.get(Metadata.SAMPLES_PER_PIXEL));
-
- // Embedded XMP
- List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
- assertTrue("got " + keywords, keywords.contains("cat"));
- assertTrue("got " + keywords, keywords.contains("garden"));
- List<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT));
- assertTrue("got " + subject, subject.contains("cat"));
- assertTrue("got " + subject, subject.contains("garden"));
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.junit.Test;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class TiffParserTest {
+ private final Parser parser = new TiffParser();
+
+ @Test
+ public void testTIFF() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/tiff");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testTIFF.tif");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ assertEquals("Licensed to the Apache Software Foundation (ASF) under one or " +
+ "more contributor license agreements. See the NOTICE file " +
+ "distributed with this work for additional information regarding " +
+ "copyright ownership.", metadata.get(TikaCoreProperties.DESCRIPTION));
+
+ // All EXIF/TIFF tags
+ assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
+
+ // Core EXIF/TIFF tags
+ assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("75", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
+ assertEquals("3", metadata.get(Metadata.SAMPLES_PER_PIXEL));
+
+ // Embedded XMP
+ List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
+ assertTrue("got " + keywords, keywords.contains("cat"));
+ assertTrue("got " + keywords, keywords.contains("garden"));
+ List<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT));
+ assertTrue("got " + subject, subject.contains("cat"));
+ assertTrue("got " + subject, subject.contains("garden"));
+ }
+}
http://git-wip-us.apache.org/repos/asf/tika/blob/c7a6bcac/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
----------------------------------------------------------------------
diff --git a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
index f65c797..b189fd7 100644
--- a/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
+++ b/tika-parser-modules/tika-parser-multimedia-module/src/test/java/org/apache/tika/parser/jpeg/JpegParserTest.java
@@ -1,284 +1,284 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.tika.parser.jpeg;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.List;
-import java.util.TimeZone;
-
-import org.apache.tika.metadata.Metadata;
-import org.apache.tika.metadata.TIFF;
-import org.apache.tika.metadata.TikaCoreProperties;
-import org.apache.tika.metadata.XMPMM;
-import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.Parser;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-import org.xml.sax.helpers.DefaultHandler;
-
-public class JpegParserTest {
-
- private final Parser parser = new JpegParser();
- static TimeZone CURR_TIME_ZONE = TimeZone.getDefault();
-
- //As of Drew Noakes' metadata-extractor 2.8.1,
- //unspecified timezones appear to be set to
- //TimeZone.getDefault(). We need to normalize this
- //for testing across different time zones.
- //We also appear to have to specify it in the surefire config:
- //<argLine>-Duser.timezone=UTC</argLine>
- @BeforeClass
- public static void setDefaultTimeZone() {
- TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
- }
- @AfterClass
- public static void resetDefaultTimeZone() {
- TimeZone.setDefault(CURR_TIME_ZONE);
- }
- @Test
- public void testJPEG() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_EXIF.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- // Core EXIF/TIFF tags
- assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
- assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
- assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
-
- assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1600
- assertEquals("5.6", metadata.get(Metadata.F_NUMBER));
- assertEquals("false", metadata.get(Metadata.FLASH_FIRED));
- assertEquals("194.0", metadata.get(Metadata.FOCAL_LENGTH));
- assertEquals("400", metadata.get(Metadata.ISO_SPEED_RATINGS));
- assertEquals("Canon", metadata.get(Metadata.EQUIPMENT_MAKE));
- assertEquals("Canon EOS 40D", metadata.get(Metadata.EQUIPMENT_MODEL));
- assertEquals("Adobe Photoshop CS3 Macintosh", metadata.get(Metadata.SOFTWARE));
- assertEquals(null, metadata.get(Metadata.ORIENTATION)); // Not present
- assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
- assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
- assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
-
- // Check that EXIF/TIFF tags come through with their raw values too
- // (This may be removed for Tika 1.0, as we support more of them
- // with explicit Metadata entries)
- assertEquals("Canon EOS 40D", metadata.get("Model"));
-
- // Common tags
- assertEquals("2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED));
- assertEquals("Date/Time Original for when the photo was taken, unspecified time zone",
- "2009-08-11T09:09:45", metadata.get(TikaCoreProperties.CREATED));
- List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
- assertTrue("'canon-55-250' expected in " + keywords, keywords.contains("canon-55-250"));
- assertTrue("'moscow-birds' expected in " + keywords, keywords.contains("moscow-birds"));
- assertTrue("'serbor' expected in " + keywords, keywords.contains("serbor"));
- assertFalse(keywords.contains("canon-55-250 moscow-birds serbor"));
- List<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT));
- assertTrue("'canon-55-250' expected in " + subject, subject.contains("canon-55-250"));
- assertTrue("'moscow-birds' expected in " + subject, subject.contains("moscow-birds"));
- assertTrue("'serbor' expected in " + subject, subject.contains("serbor"));
- assertFalse(subject.contains("canon-55-250 moscow-birds serbor"));
- }
-
- /**
- * Test for a file with Geographic information (lat, long etc) in it
- */
- @Test
- public void testJPEGGeo() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_GEO.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- // Geo tags
- assertEquals("12.54321", metadata.get(Metadata.LATITUDE));
- assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
-
- // Core EXIF/TIFF tags
- assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
- assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
- assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
-
- assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1600
- assertEquals("5.6", metadata.get(Metadata.F_NUMBER));
- assertEquals("false", metadata.get(Metadata.FLASH_FIRED));
- assertEquals("194.0", metadata.get(Metadata.FOCAL_LENGTH));
- assertEquals("400", metadata.get(Metadata.ISO_SPEED_RATINGS));
- assertEquals("Canon", metadata.get(Metadata.EQUIPMENT_MAKE));
- assertEquals("Canon EOS 40D", metadata.get(Metadata.EQUIPMENT_MODEL));
- assertEquals("Adobe Photoshop CS3 Macintosh", metadata.get(Metadata.SOFTWARE));
- assertEquals(null, metadata.get(Metadata.ORIENTATION)); // Not present
- assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
- assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
- assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
-
- // Common tags
- assertEquals("Date/Time Original for when the photo was taken, unspecified time zone",
- "2009-08-11T09:09:45", metadata.get(TikaCoreProperties.CREATED));
- assertEquals("This image has different Date/Time than Date/Time Original, so it is probably modification date",
- "2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED));
- assertEquals("Date/Time Original should be stored in EXIF field too",
- "2009-08-11T09:09:45", metadata.get(TIFF.ORIGINAL_DATE));
- assertEquals("canon-55-250", metadata.getValues(TikaCoreProperties.KEYWORDS)[0]);
- assertEquals("canon-55-250", metadata.getValues(Metadata.KEYWORDS)[0]);
- }
-
- /**
- * Test for an image with the geographic information stored in a slightly
- * different way, see TIKA-915 for details
- * Disabled for now, pending a fix to the underlying library
- */
- @Test
- public void testJPEGGeo2() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_GEO_2.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- // Geo tags should be there with 5dp, and not rounded
- assertEquals("51.575762", metadata.get(Metadata.LATITUDE));
- assertEquals("-1.567886", metadata.get(Metadata.LONGITUDE));
- }
-
- @Test
- public void testJPEGTitleAndDescription() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- // embedded comments with non-ascii characters
- assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
- assertEquals("Some Tourist", metadata.get(TikaCoreProperties.CREATOR)); // Dublin Core
- // xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands"
- // but we have to replace them with underscore
-
- List<String> keywords = Arrays.asList(metadata.getValues(Metadata.KEYWORDS));
- assertTrue(keywords.contains("coast"));
- assertTrue(keywords.contains("bird watching"));
- assertEquals(keywords, Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS)));
-
- // Core EXIF/TIFF tags
- assertEquals("103", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("77", metadata.get(Metadata.IMAGE_LENGTH));
- assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
- assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
-
- assertEquals("1.0E-6", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1000000
- assertEquals("2.8", metadata.get(Metadata.F_NUMBER));
- assertEquals("4.6", metadata.get(Metadata.FOCAL_LENGTH));
- assertEquals("114", metadata.get(Metadata.ISO_SPEED_RATINGS));
- assertEquals(null, metadata.get(Metadata.EQUIPMENT_MAKE));
- assertEquals(null, metadata.get(Metadata.EQUIPMENT_MODEL));
- assertEquals(null, metadata.get(Metadata.SOFTWARE));
- assertEquals("1", metadata.get(Metadata.ORIENTATION)); // Not present
- assertEquals("300.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
- assertEquals("300.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
- assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
- }
-
- @Test
- public void testJPEGTitleAndDescriptionPhotoshop() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_commented_pspcs2mac.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- // embedded comments with non-ascii characters
- assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
- assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
- assertEquals("Some Tourist", metadata.get(TikaCoreProperties.CREATOR));
- List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
- assertTrue("got " + keywords, keywords.contains("bird watching"));
- List<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT));
- assertTrue("got " + subject, subject.contains("bird watching"));
- }
-
- @Test
- public void testJPEGTitleAndDescriptionXnviewmp() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_commented_xnviewmp026.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- // XnViewMp's default comment dialog has only comment, not headline.
- // Comment is embedded only if "Write comments in XMP" is enabled in settings
- assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
- // xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands"
- // but we have to replace them with underscore
- String[] subject = metadata.getValues(TikaCoreProperties.KEYWORDS);
- List<String> keywords = Arrays.asList(subject);
- assertTrue("'coast'" + " not in " + keywords, keywords.contains("coast"));
- assertTrue("'nature reserve'" + " not in " + keywords, keywords.contains("nature reserve"));
- }
-
- @Test
- public void testJPEGoddTagComponent() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_oddTagComponent.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
- assertEquals(null, metadata.get(TikaCoreProperties.DESCRIPTION));
- assertEquals("251", metadata.get(Metadata.IMAGE_WIDTH));
- assertEquals("384", metadata.get(Metadata.IMAGE_LENGTH));
- }
-
- @Test
- public void testJPEGEmptyEXIFDateTime() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_EXIF_emptyDateTime.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
- assertEquals("300.0", metadata.get(TIFF.RESOLUTION_HORIZONTAL));
- assertEquals("300.0", metadata.get(TIFF.RESOLUTION_VERTICAL));
- }
-
- @Test
- public void testJPEGXMPMM() throws Exception {
- Metadata metadata = new Metadata();
- metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
- InputStream stream =
- getClass().getResourceAsStream("/test-documents/testJPEG_EXIF_emptyDateTime.jpg");
- parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
-
- //TODO: when jempbox is fixed/xmpbox is used
- //add tests for history...currently not extracted
- assertEquals("xmp.did:49E997348D4911E1AB62EBF9B374B234",
- metadata.get(XMPMM.DOCUMENTID));
- }
-
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.jpeg;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.List;
+import java.util.TimeZone;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.metadata.TIFF;
+import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.metadata.XMPMM;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.junit.AfterClass;
+import org.junit.BeforeClass;
+import org.junit.Test;
+import org.xml.sax.helpers.DefaultHandler;
+
+public class JpegParserTest {
+
+ private final Parser parser = new JpegParser();
+ static TimeZone CURR_TIME_ZONE = TimeZone.getDefault();
+
+ //As of Drew Noakes' metadata-extractor 2.8.1,
+ //unspecified timezones appear to be set to
+ //TimeZone.getDefault(). We need to normalize this
+ //for testing across different time zones.
+ //We also appear to have to specify it in the surefire config:
+ //<argLine>-Duser.timezone=UTC</argLine>
+ @BeforeClass
+ public static void setDefaultTimeZone() {
+ TimeZone.setDefault(TimeZone.getTimeZone("UTC"));
+ }
+ @AfterClass
+ public static void resetDefaultTimeZone() {
+ TimeZone.setDefault(CURR_TIME_ZONE);
+ }
+ @Test
+ public void testJPEG() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_EXIF.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ // Core EXIF/TIFF tags
+ assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
+ assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
+
+ assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1600
+ assertEquals("5.6", metadata.get(Metadata.F_NUMBER));
+ assertEquals("false", metadata.get(Metadata.FLASH_FIRED));
+ assertEquals("194.0", metadata.get(Metadata.FOCAL_LENGTH));
+ assertEquals("400", metadata.get(Metadata.ISO_SPEED_RATINGS));
+ assertEquals("Canon", metadata.get(Metadata.EQUIPMENT_MAKE));
+ assertEquals("Canon EOS 40D", metadata.get(Metadata.EQUIPMENT_MODEL));
+ assertEquals("Adobe Photoshop CS3 Macintosh", metadata.get(Metadata.SOFTWARE));
+ assertEquals(null, metadata.get(Metadata.ORIENTATION)); // Not present
+ assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
+ assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
+ assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
+
+ // Check that EXIF/TIFF tags come through with their raw values too
+ // (This may be removed for Tika 1.0, as we support more of them
+ // with explicit Metadata entries)
+ assertEquals("Canon EOS 40D", metadata.get("Model"));
+
+ // Common tags
+ assertEquals("2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED));
+ assertEquals("Date/Time Original for when the photo was taken, unspecified time zone",
+ "2009-08-11T09:09:45", metadata.get(TikaCoreProperties.CREATED));
+ List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
+ assertTrue("'canon-55-250' expected in " + keywords, keywords.contains("canon-55-250"));
+ assertTrue("'moscow-birds' expected in " + keywords, keywords.contains("moscow-birds"));
+ assertTrue("'serbor' expected in " + keywords, keywords.contains("serbor"));
+ assertFalse(keywords.contains("canon-55-250 moscow-birds serbor"));
+ List<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT));
+ assertTrue("'canon-55-250' expected in " + subject, subject.contains("canon-55-250"));
+ assertTrue("'moscow-birds' expected in " + subject, subject.contains("moscow-birds"));
+ assertTrue("'serbor' expected in " + subject, subject.contains("serbor"));
+ assertFalse(subject.contains("canon-55-250 moscow-birds serbor"));
+ }
+
+ /**
+ * Test for a file with Geographic information (lat, long etc) in it
+ */
+ @Test
+ public void testJPEGGeo() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_GEO.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ // Geo tags
+ assertEquals("12.54321", metadata.get(Metadata.LATITUDE));
+ assertEquals("-54.1234", metadata.get(Metadata.LONGITUDE));
+
+ // Core EXIF/TIFF tags
+ assertEquals("100", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("68", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
+ assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
+
+ assertEquals("6.25E-4", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1600
+ assertEquals("5.6", metadata.get(Metadata.F_NUMBER));
+ assertEquals("false", metadata.get(Metadata.FLASH_FIRED));
+ assertEquals("194.0", metadata.get(Metadata.FOCAL_LENGTH));
+ assertEquals("400", metadata.get(Metadata.ISO_SPEED_RATINGS));
+ assertEquals("Canon", metadata.get(Metadata.EQUIPMENT_MAKE));
+ assertEquals("Canon EOS 40D", metadata.get(Metadata.EQUIPMENT_MODEL));
+ assertEquals("Adobe Photoshop CS3 Macintosh", metadata.get(Metadata.SOFTWARE));
+ assertEquals(null, metadata.get(Metadata.ORIENTATION)); // Not present
+ assertEquals("240.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
+ assertEquals("240.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
+ assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
+
+ // Common tags
+ assertEquals("Date/Time Original for when the photo was taken, unspecified time zone",
+ "2009-08-11T09:09:45", metadata.get(TikaCoreProperties.CREATED));
+ assertEquals("This image has different Date/Time than Date/Time Original, so it is probably modification date",
+ "2009-10-02T23:02:49", metadata.get(Metadata.LAST_MODIFIED));
+ assertEquals("Date/Time Original should be stored in EXIF field too",
+ "2009-08-11T09:09:45", metadata.get(TIFF.ORIGINAL_DATE));
+ assertEquals("canon-55-250", metadata.getValues(TikaCoreProperties.KEYWORDS)[0]);
+ assertEquals("canon-55-250", metadata.getValues(Metadata.KEYWORDS)[0]);
+ }
+
+ /**
+ * Test for an image with the geographic information stored in a slightly
+ * different way, see TIKA-915 for details
+ * Disabled for now, pending a fix to the underlying library
+ */
+ @Test
+ public void testJPEGGeo2() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_GEO_2.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ // Geo tags should be there with 5dp, and not rounded
+ assertEquals("51.575762", metadata.get(Metadata.LATITUDE));
+ assertEquals("-1.567886", metadata.get(Metadata.LONGITUDE));
+ }
+
+ @Test
+ public void testJPEGTitleAndDescription() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_commented.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ // embedded comments with non-ascii characters
+ assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
+ assertEquals("Some Tourist", metadata.get(TikaCoreProperties.CREATOR)); // Dublin Core
+ // xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands"
+ // but we have to replace them with underscore
+
+ List<String> keywords = Arrays.asList(metadata.getValues(Metadata.KEYWORDS));
+ assertTrue(keywords.contains("coast"));
+ assertTrue(keywords.contains("bird watching"));
+ assertEquals(keywords, Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS)));
+
+ // Core EXIF/TIFF tags
+ assertEquals("103", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("77", metadata.get(Metadata.IMAGE_LENGTH));
+ assertEquals("8", metadata.get(Metadata.BITS_PER_SAMPLE));
+ assertEquals(null, metadata.get(Metadata.SAMPLES_PER_PIXEL));
+
+ assertEquals("1.0E-6", metadata.get(Metadata.EXPOSURE_TIME)); // 1/1000000
+ assertEquals("2.8", metadata.get(Metadata.F_NUMBER));
+ assertEquals("4.6", metadata.get(Metadata.FOCAL_LENGTH));
+ assertEquals("114", metadata.get(Metadata.ISO_SPEED_RATINGS));
+ assertEquals(null, metadata.get(Metadata.EQUIPMENT_MAKE));
+ assertEquals(null, metadata.get(Metadata.EQUIPMENT_MODEL));
+ assertEquals(null, metadata.get(Metadata.SOFTWARE));
+ assertEquals("1", metadata.get(Metadata.ORIENTATION)); // Not present
+ assertEquals("300.0", metadata.get(Metadata.RESOLUTION_HORIZONTAL));
+ assertEquals("300.0", metadata.get(Metadata.RESOLUTION_VERTICAL));
+ assertEquals("Inch", metadata.get(Metadata.RESOLUTION_UNIT));
+ }
+
+ @Test
+ public void testJPEGTitleAndDescriptionPhotoshop() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_commented_pspcs2mac.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ // embedded comments with non-ascii characters
+ assertEquals("Tosteberga \u00C4ngar", metadata.get(TikaCoreProperties.TITLE));
+ assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
+ assertEquals("Some Tourist", metadata.get(TikaCoreProperties.CREATOR));
+ List<String> keywords = Arrays.asList(metadata.getValues(TikaCoreProperties.KEYWORDS));
+ assertTrue("got " + keywords, keywords.contains("bird watching"));
+ List<String> subject = Arrays.asList(metadata.getValues(Metadata.SUBJECT));
+ assertTrue("got " + subject, subject.contains("bird watching"));
+ }
+
+ @Test
+ public void testJPEGTitleAndDescriptionXnviewmp() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_commented_xnviewmp026.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ // XnViewMp's default comment dialog has only comment, not headline.
+ // Comment is embedded only if "Write comments in XMP" is enabled in settings
+ assertEquals("Bird site in north eastern Sk\u00E5ne, Sweden.\n(new line)", metadata.get(TikaCoreProperties.DESCRIPTION));
+ // xmp handles spaces in keywords, returns "bird watching, nature reserve, coast, grazelands"
+ // but we have to replace them with underscore
+ String[] subject = metadata.getValues(TikaCoreProperties.KEYWORDS);
+ List<String> keywords = Arrays.asList(subject);
+ assertTrue("'coast'" + " not in " + keywords, keywords.contains("coast"));
+ assertTrue("'nature reserve'" + " not in " + keywords, keywords.contains("nature reserve"));
+ }
+
+ @Test
+ public void testJPEGoddTagComponent() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_oddTagComponent.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ assertEquals(null, metadata.get(TikaCoreProperties.TITLE));
+ assertEquals(null, metadata.get(TikaCoreProperties.DESCRIPTION));
+ assertEquals("251", metadata.get(Metadata.IMAGE_WIDTH));
+ assertEquals("384", metadata.get(Metadata.IMAGE_LENGTH));
+ }
+
+ @Test
+ public void testJPEGEmptyEXIFDateTime() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_EXIF_emptyDateTime.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+ assertEquals("300.0", metadata.get(TIFF.RESOLUTION_HORIZONTAL));
+ assertEquals("300.0", metadata.get(TIFF.RESOLUTION_VERTICAL));
+ }
+
+ @Test
+ public void testJPEGXMPMM() throws Exception {
+ Metadata metadata = new Metadata();
+ metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+ InputStream stream =
+ getClass().getResourceAsStream("/test-documents/testJPEG_EXIF_emptyDateTime.jpg");
+ parser.parse(stream, new DefaultHandler(), metadata, new ParseContext());
+
+ //TODO: when jempbox is fixed/xmpbox is used
+ //add tests for history...currently not extracted
+ assertEquals("xmp.did:49E997348D4911E1AB62EBF9B374B234",
+ metadata.get(XMPMM.DOCUMENTID));
+ }
+
+}