You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by rg...@apache.org on 2012/11/10 00:03:11 UTC

svn commit: r1407683 - in /tika/trunk/tika-parsers/src: main/java/org/apache/tika/parser/dwg/DWGParser.java test/java/org/apache/tika/parser/dwg/DWGParserTest.java test/resources/test-documents/testDWG2010_custom_props.dwg

Author: rgauss
Date: Fri Nov  9 23:03:10 2012
New Revision: 1407683

URL: http://svn.apache.org/viewvc?rev=1407683&view=rev
Log:
TIKA-1022: DWG Custom properties not extracted
   - Added testDWG2010_custom_props.dwg 
   - Added CUSTOM_PROPERTIES_ALT_PADDING_VALUES constant for values found in test file 
   - Added check for alternate padding values in skipToCustomProperties
   - Added testDWG2010CustomPropertiesParser unit test

Added:
    tika/trunk/tika-parsers/src/test/resources/test-documents/testDWG2010_custom_props.dwg   (with props)
Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java?rev=1407683&r1=1407682&r2=1407683&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/dwg/DWGParser.java Fri Nov  9 23:03:10 2012
@@ -93,7 +93,12 @@ public class DWGParser extends AbstractP
      * How far to skip after the last standard property, before
      *  we find any custom properties that might be there.
      */
-    private static final int CUSTOM_PROPERTIES_SKIP = 20; 
+    private static final int CUSTOM_PROPERTIES_SKIP = 20;
+    
+    /** 
+     * The value of padding bytes other than 0 in some DWG files.
+     */
+    private static final int[] CUSTOM_PROPERTIES_ALT_PADDING_VALUES = new int[] {0x2, 0, 0, 0};
 
     public void parse(
             InputStream stream, ContentHandler handler,
@@ -317,11 +322,16 @@ public class DWGParser extends AbstractP
 
     private int skipToCustomProperties(InputStream stream) 
             throws IOException, TikaException {
-       // There should be 4 zero bytes next
+       // There should be 4 zero bytes or CUSTOM_PROPERTIES_ALT_PADDING_VALUES next
        byte[] padding = new byte[4];
        IOUtils.readFully(stream, padding);
-       if(padding[0] == 0 && padding[1] == 0 &&
-             padding[2] == 0 && padding[3] == 0) {
+       if((padding[0] == 0 && padding[1] == 0 &&
+             padding[2] == 0 && padding[3] == 0) ||
+             (padding[0] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[0] && 
+               padding[1] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[1] &&
+               padding[2] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[2] &&
+               padding[3] == CUSTOM_PROPERTIES_ALT_PADDING_VALUES[3])) {
+           
           // Looks hopeful, skip on
           padding = new byte[CUSTOM_PROPERTIES_SKIP];
           IOUtils.readFully(stream, padding);

Modified: tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java?rev=1407683&r1=1407682&r2=1407683&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java (original)
+++ tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/dwg/DWGParserTest.java Fri Nov  9 23:03:10 2012
@@ -55,6 +55,29 @@ public class DWGParserTest extends TestC
                 "/test-documents/testDWG2010.dwg");
         testParser(input);
     }
+    
+    public void testDWG2010CustomPropertiesParser() throws Exception {
+        // Check that standard parsing works
+        InputStream input = DWGParserTest.class.getResourceAsStream(
+                "/test-documents/testDWG2010_custom_props.dwg");
+        testParser(input);
+        
+        // Check that custom properties with alternate padding work
+        input = DWGParserTest.class.getResourceAsStream(
+                "/test-documents/testDWG2010_custom_props.dwg");
+        try {
+            Metadata metadata = new Metadata();
+            ContentHandler handler = new BodyContentHandler();
+            new DWGParser().parse(input, handler, metadata, null);
+            
+            assertEquals("valueforcustomprop1",
+                    metadata.get("customprop1"));
+            assertEquals("valueforcustomprop2",
+                    metadata.get("customprop2"));
+        } finally {
+            input.close();
+        }
+    }
 
     public void testDWGMechParser() throws Exception {
         String[] types = new String[] {

Added: tika/trunk/tika-parsers/src/test/resources/test-documents/testDWG2010_custom_props.dwg
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/testDWG2010_custom_props.dwg?rev=1407683&view=auto
==============================================================================
Binary file - no diff available.

Propchange: tika/trunk/tika-parsers/src/test/resources/test-documents/testDWG2010_custom_props.dwg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream