You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ni...@apache.org on 2008/01/08 18:28:45 UTC

svn commit: r610074 - in /poi/trunk/src/scratchpad: ooxml-src/org/apache/poi/hssf/ ooxml-src/org/apache/poi/hssf/extractor/ ooxml-src/org/apache/poi/hssf/usermodel/ ooxml-src/org/apache/poi/hxf/ ooxml-testcases/org/apache/poi/hssf/extractor/

Author: nick
Date: Tue Jan  8 09:28:39 2008
New Revision: 610074

URL: http://svn.apache.org/viewvc?rev=610074&view=rev
Log:
Patch from Ugo from bug #44185 - support getting shared strings for ooxml excel files, and further tests for the ooxml excel text extraction

Modified:
    poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java
    poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
    poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
    poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java
    poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java
    poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java

Modified: poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java?rev=610074&r1=610073&r2=610074&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java (original)
+++ poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java Tue Jan  8 09:28:39 2008
@@ -18,6 +18,7 @@
 
 import java.io.IOException;
 
+import org.apache.poi.hssf.model.SharedStringsTable;
 import org.apache.poi.hxf.HXFDocument;
 import org.apache.xmlbeans.XmlException;
 import org.openxml4j.exceptions.OpenXML4JException;
@@ -45,14 +46,24 @@
 	public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml";
 	public static final String SHEET_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml";
 	public static final String SHARED_STRINGS_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml";
+	public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
 	
 	private WorkbookDocument workbookDoc;
 	
+	private SharedStringsTable sharedStrings;
+
 	public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException {
 		super(container, MAIN_CONTENT_TYPE);
 		
 		workbookDoc =
 			WorkbookDocument.Factory.parse(basePart.getInputStream());
+		
+		PackagePart ssPart = getSinglePartByRelationType(SHARED_STRINGS_RELATION_TYPE, basePart);
+		if (ssPart != null) {
+			sharedStrings = new SharedStringsTable(ssPart);
+		} else {
+			
+		}
 	}
 	
 	/**
@@ -80,5 +91,9 @@
 		WorksheetDocument sheetDoc =
 			WorksheetDocument.Factory.parse(sheetPart.getInputStream());
 		return sheetDoc.getWorksheet();
+	}
+	
+	public String getSharedString(int index) {
+		return this.sharedStrings.get(index);
 	}
 }

Modified: poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java?rev=610074&r1=610073&r2=610074&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java (original)
+++ poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java Tue Jan  8 09:28:39 2008
@@ -117,7 +117,7 @@
 							}
 						}
 						if(!done) {
-							HSSFXMLCell uCell = new HSSFXMLCell(cell);
+							HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook);
 							text.append(uCell.getStringValue());
 						}
 					}

Modified: poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java?rev=610074&r1=610073&r2=610074&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java (original)
+++ poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java Tue Jan  8 09:28:39 2008
@@ -17,32 +17,40 @@
 package org.apache.poi.hssf.usermodel;
 
 import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType;
 
 /**
  * User facing wrapper around an underlying cell object
  */
 public class HSSFXMLCell {
-	private CTCell cell;
-	public HSSFXMLCell(CTCell rawCell) {
-		this.cell = rawCell;
-	}
-	
-	/**
-	 * Formats the cell's contents, based on its type,
-	 *  and returns it as a string.
-	 */
-	public String getStringValue() {
-		if(cell.getV() != null) {
-			return cell.getV();
-		}
-		if(cell.getIs() != null) {
-			return cell.getIs().getT();
-		}
-		// TODO: Formatting
-		return Long.toString(cell.getS());
-	}
-	
-	public String toString() {
-		return cell.getR() + " - " + getStringValue(); 
-	}
+    private CTCell cell;
+
+    /** The workbook to which this cell belongs */
+    private final HSSFXMLWorkbook workbook;
+
+    public HSSFXMLCell(CTCell rawCell, HSSFXMLWorkbook workbook) {
+        this.cell = rawCell;
+        this.workbook = workbook;
+    }
+
+    /**
+     * Formats the cell's contents, based on its type,
+     *  and returns it as a string.
+     */
+    public String getStringValue() {
+
+        switch (cell.getT().intValue()) {
+        case STCellType.INT_S:
+            return this.workbook.getSharedString(Integer.valueOf(cell.getV()));
+        case STCellType.INT_N:
+            return cell.getV();
+        // TODO: support other types
+        default:
+            return "UNSUPPORTED CELL TYPE: '" + cell.getT() + "'";
+        }
+    }
+
+    public String toString() {
+        return cell.getR() + " - " + getStringValue(); 
+    }
 }

Modified: poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java?rev=610074&r1=610073&r2=610074&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java (original)
+++ poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java Tue Jan  8 09:28:39 2008
@@ -36,4 +36,8 @@
 	public HSSFXML _getHSSFXML() {
 		return hssfXML;
 	}
+	
+	public String getSharedString(int index) {
+		return hssfXML.getSharedString(index);
+	}
 }

Modified: poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java?rev=610074&r1=610073&r2=610074&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java (original)
+++ poi/trunk/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java Tue Jan  8 09:28:39 2008
@@ -104,12 +104,33 @@
 
 	/**
 	 * Fetches the (single) PackagePart which is defined as
+	 *  the supplied relation content type of the specified part, 
+	 *  or null if none found.
+	 * @param relationType The relation content type to search for
+	 * @throws IllegalArgumentException If we find more than one part of that type
+	 * TODO: this sucks! Make Package and PackagePart implement common intf that defines getRelationshipsByType & friends
+	 */
+	protected PackagePart getSinglePartByRelationType(String relationType, PackagePart part) throws IllegalArgumentException, OpenXML4JException {
+		PackageRelationshipCollection rels =
+			part.getRelationshipsByType(relationType);
+		if(rels.size() == 0) {
+			return null;
+		}
+		if(rels.size() > 1) {
+			throw new IllegalArgumentException("Found " + rels.size() + " relations for the type " + relationType + ", should only ever be one!");
+		}
+		PackageRelationship rel = rels.getRelationship(0);
+		return getPackagePart(rel);
+	}
+	
+	/**
+	 * Fetches the (single) PackagePart which is defined as
 	 *  the supplied relation content type of the base
 	 *  container, or null if none found.
 	 * @param relationType The relation content type to search for
 	 * @throws IllegalArgumentException If we find more than one part of that type
 	 */
-	private PackagePart getSinglePartByRelationType(String relationType) throws IllegalArgumentException, OpenXML4JException {
+	protected PackagePart getSinglePartByRelationType(String relationType) throws IllegalArgumentException, OpenXML4JException {
 		PackageRelationshipCollection rels =
 			container.getRelationshipsByType(relationType);
 		if(rels.size() == 0) {

Modified: poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java?rev=610074&r1=610073&r2=610074&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java (original)
+++ poi/trunk/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/extractor/TestHXFExcelExtractor.java Tue Jan  8 09:28:39 2008
@@ -18,6 +18,8 @@
 
 import java.io.File;
 import java.io.FileInputStream;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import junit.framework.TestCase;
 
@@ -170,7 +172,7 @@
 	 *  ExcelExtractor does, when we're both passed
 	 *  the same file, just saved as xls and xlsx
 	 */
-	public void BROKENtestComparedToOLE2() throws Exception {
+	public void testComparedToOLE2() throws Exception {
 		HXFExcelExtractor ooxmlExtractor =
 			new HXFExcelExtractor(simpleXLSX.getPackage());
 		ExcelExtractor ole2Extractor =
@@ -181,14 +183,13 @@
 		for (int i = 0; i < extractors.length; i++) {
 			POITextExtractor extractor = extractors[i];
 			
-			String text = extractor.getText().replace("\r", "");
+			String text = extractor.getText().replaceAll("[\r\t]", "");
 			System.out.println(text.length());
 			System.out.println(text);
-			assertTrue(text.startsWith("First Sheet\nTest spreadsheet\t\n2nd row\t2nd row 2nd column\n"));
-			assertTrue(text.endsWith("13.0\nSheet3\n"));
-			
-			assertTrue(text.length() >= 214);
-			assertTrue(text.length() <= 214);
+			assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
+			Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
+			Matcher m = pattern.matcher(text);
+			assertTrue(m.matches());			
 		}
 	}
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org