You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ce...@apache.org on 2015/03/22 22:47:44 UTC

svn commit: r1668483 - in /poi/trunk: src/integrationtest/org/apache/poi/TestAllFiles.java src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java test-data/document/52117.doc

Author: centic
Date: Sun Mar 22 21:47:44 2015
New Revision: 1668483

URL: http://svn.apache.org/r1668483
Log:
Integration tests: Expect exception for old word documents and still run the text extraction for them. Also add executing HPSFPropertiesExtractor where possible

Added:
    poi/trunk/test-data/document/52117.doc
Modified:
    poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java
    poi/trunk/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java
    poi/trunk/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java

Modified: poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java?rev=1668483&r1=1668482&r2=1668483&view=diff
==============================================================================
--- poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java (original)
+++ poi/trunk/src/integrationtest/org/apache/poi/TestAllFiles.java Sun Mar 22 21:47:44 2015
@@ -31,6 +31,7 @@ import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
+import org.apache.poi.hwpf.OldWordFileFormatException;
 import org.apache.poi.stress.*;
 import org.apache.tools.ant.DirectoryScanner;
 import org.junit.Test;
@@ -162,6 +163,20 @@ public class TestAllFiles {
         HANDLERS.put("spreadsheet/test_properties1", new NullFileHandler());
     }
 
+    // Old Word Documents where we can at least extract some text
+    private static final Set<String> OLD_FILES = new HashSet<String>();
+    static {
+        OLD_FILES.add("document/Bug49933.doc");
+        OLD_FILES.add("document/Bug51944.doc");
+        OLD_FILES.add("document/Word6.doc");
+        OLD_FILES.add("document/Word6_sections.doc");
+        OLD_FILES.add("document/Word6_sections2.doc");
+        OLD_FILES.add("document/Word95.doc");
+        OLD_FILES.add("document/word95err.doc");
+        OLD_FILES.add("hpsf/TestMickey.doc");
+        OLD_FILES.add("document/52117.doc");
+    }
+
     private static final Set<String> EXPECTED_FAILURES = new HashSet<String>();
     static {
         // password protected files
@@ -202,15 +217,7 @@ public class TestAllFiles {
         EXPECTED_FAILURES.add("spreadsheet/43493.xls");
         EXPECTED_FAILURES.add("spreadsheet/46904.xls");
         EXPECTED_FAILURES.add("document/56880.doc");
-        EXPECTED_FAILURES.add("document/Bug49933.doc");
         EXPECTED_FAILURES.add("document/Bug50955.doc");
-        EXPECTED_FAILURES.add("document/Bug51944.doc");
-        EXPECTED_FAILURES.add("document/Word6.doc");
-        EXPECTED_FAILURES.add("document/Word6_sections.doc");
-        EXPECTED_FAILURES.add("document/Word6_sections2.doc");
-        EXPECTED_FAILURES.add("document/Word95.doc");
-        EXPECTED_FAILURES.add("document/word95err.doc");
-        EXPECTED_FAILURES.add("hpsf/TestMickey.doc");
         EXPECTED_FAILURES.add("slideshow/PPT95.ppt");
         EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx");
         EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx");
@@ -269,17 +276,29 @@ public class TestAllFiles {
         File inputFile = new File(ROOT_DIR, file);
 
         try {
-            InputStream stream = new BufferedInputStream(new FileInputStream(inputFile),100);
+            InputStream stream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024);
             try {
                 handler.handleFile(stream);
 
                 assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", 
                         EXPECTED_FAILURES.contains(file));
+                assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", 
+                        OLD_FILES.contains(file));
             } finally {
                 stream.close();
             }
 
             handler.handleExtracting(inputFile);
+        } catch (OldWordFileFormatException e) {
+            // for old word files we should still support extracting text
+            if(OLD_FILES.contains(file)) {
+                handler.handleExtracting(inputFile);
+            } else {
+                // check if we expect failure for this file
+                if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
+                    throw new Exception("While handling " + file, e);
+                }
+            }
         } catch (Exception e) {
             // check if we expect failure for this file
             if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) {

Modified: poi/trunk/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java?rev=1668483&r1=1668482&r2=1668483&view=diff
==============================================================================
--- poi/trunk/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java (original)
+++ poi/trunk/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java Sun Mar 22 21:47:44 2015
@@ -28,8 +28,10 @@ import java.io.InputStream;
 import java.util.HashSet;
 import java.util.Set;
 
+import org.apache.poi.POIOLE2TextExtractor;
 import org.apache.poi.POITextExtractor;
 import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
 import org.apache.xmlbeans.XmlException;
@@ -89,6 +91,19 @@ public abstract class AbstractFileHandle
             assertEquals("File should not be modified by extractor", modified, file.lastModified());
             
             handleExtractingAsStream(file);
+            
+            if(extractor instanceof POIOLE2TextExtractor) {
+            	HPSFPropertiesExtractor hpsfExtractor = new HPSFPropertiesExtractor((POIOLE2TextExtractor)extractor);
+            	try {
+                	assertNotNull(hpsfExtractor.getDocumentSummaryInformationText());
+                	assertNotNull(hpsfExtractor.getSummaryInformationText());
+                	String text = hpsfExtractor.getText();
+                	//System.out.println(text);
+                	assertNotNull(text);
+            	} finally {
+            		hpsfExtractor.close();
+            	}
+            }
         } catch (IllegalArgumentException e) {
             if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
                 throw new Exception("While handling " + file, e);

Modified: poi/trunk/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java?rev=1668483&r1=1668482&r2=1668483&view=diff
==============================================================================
--- poi/trunk/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java (original)
+++ poi/trunk/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java Sun Mar 22 21:47:44 2015
@@ -63,12 +63,10 @@ public class HWPFFileHandler extends POI
         docTextWriter.close();
 	}
 
-
-
 	// a test-case to test this locally without executing the full TestAllFiles
 	@Test
 	public void test() throws Exception {
-		File file = new File("test-data/document/51921-Word-Crash067.doc");
+		File file = new File("test-data/document/52117.doc");
 
 		InputStream stream = new FileInputStream(file);
 		try {
@@ -91,4 +89,10 @@ public class HWPFFileHandler extends POI
 			stream.close();
 		}
 	}
+
+	@Test
+	public void testExtractingOld() throws Exception {
+		File file = new File("test-data/document/52117.doc");
+		handleExtracting(file);
+	}
 }

Added: poi/trunk/test-data/document/52117.doc
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/document/52117.doc?rev=1668483&view=auto
==============================================================================
Binary files poi/trunk/test-data/document/52117.doc (added) and poi/trunk/test-data/document/52117.doc Sun Mar 22 21:47:44 2015 differ



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org