You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ni...@apache.org on 2008/04/08 14:03:05 UTC

svn commit: r645870 - in /poi/branches/ooxml/src/scratchpad: src/org/apache/poi/extractor/ExtractorFactory.java testcases/org/apache/poi/extractor/ testcases/org/apache/poi/extractor/TestExtractorFactory.java

Author: nick
Date: Tue Apr  8 05:03:05 2008
New Revision: 645870

URL: http://svn.apache.org/viewvc?rev=645870&view=rev
Log:
More ExtractorFactory support and tests

Added:
    poi/branches/ooxml/src/scratchpad/testcases/org/apache/poi/extractor/
    poi/branches/ooxml/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java   (with props)
Modified:
    poi/branches/ooxml/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java

Modified: poi/branches/ooxml/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/branches/ooxml/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java?rev=645870&r1=645869&r2=645870&view=diff
==============================================================================
--- poi/branches/ooxml/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java (original)
+++ poi/branches/ooxml/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java Tue Apr  8 05:03:05 2008
@@ -32,7 +32,9 @@
 import org.apache.poi.POITextExtractor;
 import org.apache.poi.POIXMLDocument;
 import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.hslf.extractor.PowerPointExtractor;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
 import org.apache.poi.poifs.filesystem.Entry;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.xslf.XSLFSlideShow;
@@ -51,20 +53,21 @@
 	public static final String CORE_DOCUMENT_REL =
 		"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
 	
-	public POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
-		FileInputStream finp = new FileInputStream(f);
+	public static POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
+		InputStream inp = new PushbackInputStream( 
+			new FileInputStream(f), 8);
 		
-		if(POIFSFileSystem.hasPOIFSHeader(finp)) {
-			return createExtractor(new POIFSFileSystem(finp));
+		if(POIFSFileSystem.hasPOIFSHeader(inp)) {
+			return createExtractor(new POIFSFileSystem(inp));
 		}
-		if(POIXMLDocument.hasOOXMLHeader(finp)) {
-			finp.close();
+		if(POIXMLDocument.hasOOXMLHeader(inp)) {
+			inp.close();
 			return createExtractor(Package.open(f.toString()));
 		}
 		throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
 	}
 	
-	public POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
+	public static POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
 		// Figure out the kind of stream
 		// If clearly doesn't do mark/reset, wrap up
 		if(! inp.markSupported()) {
@@ -80,7 +83,7 @@
 		throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
 	}
 	
-	public POIXMLTextExtractor createExtractor(Package pkg) throws IOException, OpenXML4JException, XmlException {
+	public static POIXMLTextExtractor createExtractor(Package pkg) throws IOException, OpenXML4JException, XmlException {
 		PackageRelationshipCollection core = 
 			pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
 		if(core.size() != 1) {
@@ -100,14 +103,23 @@
 		throw new IllegalArgumentException("No supported documents found in the OOXML package");
 	}
 	
-	public POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
+	public static POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
 		// Look for certain entries in the stream, to figure it
 		//  out from
 		for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
 			Entry entry = (Entry)entries.next();
+			
+			System.err.println(entry.getName());
 			if(entry.getName().equals("Workbook")) {
 				return new ExcelExtractor(fs);
 			}
+			if(entry.getName().equals("WordDocument")) {
+				return new WordExtractor(fs);
+			}
+			if(entry.getName().equals("PowerPoint Document")) {
+				return new PowerPointExtractor(fs);
+			}
+			// TODO - visio
 		}
 		throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
 	}

Added: poi/branches/ooxml/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/branches/ooxml/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java?rev=645870&view=auto
==============================================================================
--- poi/branches/ooxml/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java (added)
+++ poi/branches/ooxml/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java Tue Apr  8 05:03:05 2008
@@ -0,0 +1,140 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor;
+
+import java.io.File;
+
+import org.apache.poi.hslf.extractor.PowerPointExtractor;
+import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+
+import junit.framework.TestCase;
+
+/**
+ * Test that the extractor factory plays nicely
+ */
+public class TestExtractorFactory extends TestCase {
+	private String excel_dir;
+	private String word_dir;
+	private String powerpoint_dir;
+	
+	private File txt;
+	
+	private File xls;
+	private File xlsx;
+	
+	private File doc;
+	private File docx;
+
+	private File ppt;
+	private File pptx;
+
+	protected void setUp() throws Exception {
+		super.setUp();
+		
+		excel_dir = System.getProperty("HSSF.testdata.path");
+		word_dir = System.getProperty("HWPF.testdata.path");
+		powerpoint_dir = System.getProperty("HSLF.testdata.path");
+		
+		txt = new File(excel_dir, "SampleSS.txt");
+		
+		xls = new File(excel_dir, "SampleSS.xls");
+		xlsx = new File(excel_dir, "SampleSS.xlsx");
+		
+		doc = new File(word_dir, "SampleDoc.doc");
+		docx = new File(word_dir, "SampleDoc.docx");
+		
+		ppt = new File(powerpoint_dir, "SampleShow.ppt");
+		pptx = new File(powerpoint_dir, "SampleShow.pptx");
+	}
+
+	public void testFile() throws Exception {
+		// Excel
+		assertTrue(
+				ExtractorFactory.createExtractor(xls)
+				instanceof ExcelExtractor
+		);
+		assertTrue(
+				ExtractorFactory.createExtractor(xls).getText().length() > 200
+		);
+		
+		assertTrue(
+				ExtractorFactory.createExtractor(xlsx)
+				instanceof XSSFExcelExtractor
+		);
+		assertTrue(
+				ExtractorFactory.createExtractor(xlsx).getText().length() > 200
+		);
+		
+		// Word
+		assertTrue(
+				ExtractorFactory.createExtractor(doc)
+				instanceof WordExtractor
+		);
+		assertTrue(
+				ExtractorFactory.createExtractor(doc).getText().length() > 120
+		);
+		
+		assertTrue(
+				ExtractorFactory.createExtractor(docx)
+				instanceof XWPFWordExtractor
+		);
+		assertTrue(
+				ExtractorFactory.createExtractor(docx).getText().length() > 120
+		);
+		
+		// PowerPoint
+		assertTrue(
+				ExtractorFactory.createExtractor(ppt)
+				instanceof PowerPointExtractor
+		);
+		assertTrue(
+				ExtractorFactory.createExtractor(ppt).getText().length() > 120
+		);
+		
+		assertTrue(
+				ExtractorFactory.createExtractor(pptx)
+				instanceof XSLFPowerPointExtractor
+		);
+		assertTrue(
+				ExtractorFactory.createExtractor(pptx).getText().length() > 120
+		);
+		
+		// Visio
+		// TODO
+		
+		// Text
+		try {
+			ExtractorFactory.createExtractor(txt);
+			fail();
+		} catch(IllegalArgumentException e) {
+			// Good
+		}
+	}
+	public void testInputStream() throws Exception {
+		
+	}
+	public void testPOIFS() throws Exception {
+		
+	}
+	public void testPackage() throws Exception {
+		
+	}
+}

Propchange: poi/branches/ooxml/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java
------------------------------------------------------------------------------
    svn:eol-style = native



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org