You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by rg...@apache.org on 2012/11/10 00:03:11 UTC
svn commit: r1407683 - in /tika/trunk/tika-parsers/src:
main/java/org/apache/tika/parser/dwg/DWGParser.java
test/java/org/apache/tika/parser/dwg/DWGParserTest.java
test/resources/test-documents/testDWG2010_custom_props.dwg
Author: rgauss
Date: Fri Nov 9 23:03:10 2012
New Revision: 1407683
URL: http://svn.apache.org/viewvc?rev=1407683&view=rev
Log:
TIKA-1022: DWG Custom properties not extracted
- Added testDWG2010_custom_props.dwg
- Added CUSTOM_PROPERTIES_ALT_PADDING_VALUES constant for values found in test file
- Added check for alternate padding values in skipToCustomProperties
- Added testDWG2010CustomPropertiesParser unit test
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/testDWG2010_custom_props.dwg (with props)
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java?rev=1407683&r1=1407682&r2=1407683&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java Fri Nov 9 23:03:10 2012
@@ -93,7 +93,12 @@ public class DWGParser extends AbstractP
* How far to skip after the last standard property, before
* we find any custom properties that might be there.
*/
- private static final int CUSTOM_PROPERTIES_SKIP = 20;
+ private static final int CUSTOM_PROPERTIES_SKIP = 20;
+
+ /**
+ * The value of padding bytes other than 0 in some DWG files.
+ */
+ private static final int[] CUSTOM_PROPERTIES_ALT_PADDING_VALUES = new int[] {0x2, 0, 0, 0};
public void parse(
InputStream stream, ContentHandler handler,
@@ -317,11 +322,16 @@ public class DWGParser extends AbstractP
private int skipToCustomProperties(InputStream stream)
throws IOException, TikaException {
- // There should be 4 zero bytes next
+ // There should be 4 zero bytes or CUSTOM_PROPERTIES_ALT_PADDING_VALUES next
byte[] padding = new byte[4];
IOUtils.readFully(stream, padding);
- if(padding[0] == 0 && padding[1] == 0 &&
- padding[2] == 0 && padding[3] == 0) {
+ if((padding[0] == 0 && padding[1] == 0 &&
+ padding[2] == 0 && padding[3] == 0) ||
+ (padding[0] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[0] &&
+ padding[1] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[1] &&
+ padding[2] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[2] &&
+ padding[3] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[3])) {
+
// Looks hopeful, skip on
padding = new byte[CUSTOM_PROPERTIES_SKIP];
IOUtils.readFully(stream, padding);
Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java?rev=1407683&r1=1407682&r2=1407683&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java Fri Nov 9 23:03:10 2012
@@ -55,6 +55,29 @@ public class DWGParserTest extends TestC
"/test-documents/testDWG2010.dwg");
testParser(input);
}
+
+ public void testDWG2010CustomPropertiesParser() throws Exception {
+ // Check that standard parsing works
+ InputStream input = DWGParserTest.class.getResourceAsStream(
+ "/test-documents/testDWG2010_custom_props.dwg");
+ testParser(input);
+
+ // Check that custom properties with alternate padding work
+ input = DWGParserTest.class.getResourceAsStream(
+ "/test-documents/testDWG2010_custom_props.dwg");
+ try {
+ Metadata metadata = new Metadata();
+ ContentHandler handler = new BodyContentHandler();
+ new DWGParser().parse(input, handler, metadata, null);
+
+ assertEquals("valueforcustomprop1",
+ metadata.get("customprop1"));
+ assertEquals("valueforcustomprop2",
+ metadata.get("customprop2"));
+ } finally {
+ input.close();
+ }
+ }
public void testDWGMechParser() throws Exception {
String[] types = new String[] {
Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testDWG2010_custom_props.dwg
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testDWG2010_custom_props.dwg?rev=1407683&view=auto
==============================================================================
Binary file - no diff available.
Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testDWG2010_custom_props.dwg
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream