You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ni...@apache.org on 2008/04/08 14:17:20 UTC
svn commit: r645872 - in /poi/branches/ooxml/src/scratchpad:
src/org/apache/poi/extractor/ExtractorFactory.java
testcases/org/apache/poi/extractor/TestExtractorFactory.java
Author: nick
Date: Tue Apr 8 05:17:18 2008
New Revision: 645872
URL: http://svn.apache.org/viewvc?rev=645872&view=rev
Log:
More ExtractorFactory support and tests
Modified:
poi/branches/ooxml/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java
poi/branches/ooxml/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java
Modified: poi/branches/ooxml/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/branches/ooxml/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java?rev=645872&r1=645871&r2=645872&view=diff
==============================================================================
--- poi/branches/ooxml/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java (original)
+++ poi/branches/ooxml/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java Tue Apr 8 05:17:18 2008
@@ -32,6 +32,7 @@
import org.apache.poi.POITextExtractor;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
@@ -109,7 +110,6 @@
for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
Entry entry = (Entry)entries.next();
- System.err.println(entry.getName());
if(entry.getName().equals("Workbook")) {
return new ExcelExtractor(fs);
}
@@ -119,7 +119,9 @@
if(entry.getName().equals("PowerPoint Document")) {
return new PowerPointExtractor(fs);
}
- // TODO - visio
+ if(entry.getName().equals("VisioDocument")) {
+ return new VisioTextExtractor(fs);
+ }
}
throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
}
Modified: poi/branches/ooxml/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/branches/ooxml/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java?rev=645872&r1=645871&r2=645872&view=diff
==============================================================================
--- poi/branches/ooxml/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java (original)
+++ poi/branches/ooxml/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java Tue Apr 8 05:17:18 2008
@@ -17,10 +17,14 @@
package org.apache.poi.extractor;
import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
@@ -34,6 +38,7 @@
private String excel_dir;
private String word_dir;
private String powerpoint_dir;
+ private String visio_dir;
private File txt;
@@ -45,6 +50,8 @@
private File ppt;
private File pptx;
+
+ private File vsd;
protected void setUp() throws Exception {
super.setUp();
@@ -52,8 +59,9 @@
excel_dir = System.getProperty("HSSF.testdata.path");
word_dir = System.getProperty("HWPF.testdata.path");
powerpoint_dir = System.getProperty("HSLF.testdata.path");
+ visio_dir = System.getProperty("HDGF.testdata.path");
- txt = new File(excel_dir, "SampleSS.txt");
+ txt = new File(powerpoint_dir, "SampleShow.txt");
xls = new File(excel_dir, "SampleSS.xls");
xlsx = new File(excel_dir, "SampleSS.xlsx");
@@ -63,6 +71,8 @@
ppt = new File(powerpoint_dir, "SampleShow.ppt");
pptx = new File(powerpoint_dir, "SampleShow.pptx");
+
+ vsd = new File(visio_dir, "Test_Visio-Some_Random_Text.vsd");
}
public void testFile() throws Exception {
@@ -118,7 +128,13 @@
);
// Visio
- // TODO
+ assertTrue(
+ ExtractorFactory.createExtractor(vsd)
+ instanceof VisioTextExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(vsd).getText().length() > 50
+ );
// Text
try {
@@ -128,12 +144,123 @@
// Good
}
}
+
public void testInputStream() throws Exception {
+ // Excel
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(xls))
+ instanceof ExcelExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(xls)).getText().length() > 200
+ );
+
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(xlsx))
+ instanceof XSSFExcelExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length() > 200
+ );
+
+ // Word
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(doc))
+ instanceof WordExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(doc)).getText().length() > 120
+ );
+
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(docx))
+ instanceof XWPFWordExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(docx)).getText().length() > 120
+ );
+
+ // PowerPoint
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(ppt))
+ instanceof PowerPointExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(ppt)).getText().length() > 120
+ );
+
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(pptx))
+ instanceof XSLFPowerPointExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(pptx)).getText().length() > 120
+ );
+ // Visio
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(vsd))
+ instanceof VisioTextExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50
+ );
+
+ // Text
+ try {
+ ExtractorFactory.createExtractor(new FileInputStream(txt));
+ fail();
+ } catch(IllegalArgumentException e) {
+ // Good
+ }
}
+
public void testPOIFS() throws Exception {
+ // Excel
+ assertTrue(
+ ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))
+ instanceof ExcelExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))).getText().length() > 200
+ );
+ // Word
+ assertTrue(
+ ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc)))
+ instanceof WordExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc))).getText().length() > 120
+ );
+
+ // PowerPoint
+ assertTrue(
+ ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt)))
+ instanceof PowerPointExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt))).getText().length() > 120
+ );
+
+ // Visio
+ assertTrue(
+ ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd)))
+ instanceof VisioTextExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50
+ );
+
+ // Text
+ try {
+ ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(txt)));
+ fail();
+ } catch(IOException e) {
+ // Good
+ }
}
+
public void testPackage() throws Exception {
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org