You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ta...@apache.org on 2017/03/16 18:37:14 UTC
svn commit: r1787228 [2/2] - in /poi: site/src/documentation/content/xdocs/
trunk/src/ooxml/java/org/apache/poi/extractor/
trunk/src/ooxml/java/org/apache/poi/xssf/
trunk/src/ooxml/java/org/apache/poi/xssf/binary/
trunk/src/ooxml/java/org/apache/poi/xs...
Added: poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,117 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.ss.util.CellRangeAddress;
+import org.apache.poi.util.Internal;
+
+/**
+ * This is a read only record that maintains information about
+ * a hyperlink. In OOXML land, this information has to be merged
+ * from 1) the sheet's .rels to get the url and 2) from after the
+ * sheet data in they hyperlink section.
+ *
+ * The {@link #display} is often empty and should be filled from
+ * the contents of the anchor cell.
+ *
+ */
+@Internal
+public class XSSFHyperlinkRecord {
+
+ private final CellRangeAddress cellRangeAddress;
+ private final String relId;
+ private String location;
+ private String toolTip;
+ private String display;
+
+ XSSFHyperlinkRecord(CellRangeAddress cellRangeAddress, String relId, String location, String toolTip, String display) {
+ this.cellRangeAddress = cellRangeAddress;
+ this.relId = relId;
+ this.location = location;
+ this.toolTip = toolTip;
+ this.display = display;
+ }
+
+ void setLocation(String location) {
+ this.location = location;
+ }
+
+ void setToolTip(String toolTip) {
+ this.toolTip = toolTip;
+ }
+
+ void setDisplay(String display) {
+ this.display = display;
+ }
+
+ CellRangeAddress getCellRangeAddress() {
+ return cellRangeAddress;
+ }
+
+ public String getRelId() {
+ return relId;
+ }
+
+ public String getLocation() {
+ return location;
+ }
+
+ public String getToolTip() {
+ return toolTip;
+ }
+
+ public String getDisplay() {
+ return display;
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ XSSFHyperlinkRecord that = (XSSFHyperlinkRecord) o;
+
+ if (cellRangeAddress != null ? !cellRangeAddress.equals(that.cellRangeAddress) : that.cellRangeAddress != null)
+ return false;
+ if (relId != null ? !relId.equals(that.relId) : that.relId != null) return false;
+ if (location != null ? !location.equals(that.location) : that.location != null) return false;
+ if (toolTip != null ? !toolTip.equals(that.toolTip) : that.toolTip != null) return false;
+ return display != null ? display.equals(that.display) : that.display == null;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = cellRangeAddress != null ? cellRangeAddress.hashCode() : 0;
+ result = 31 * result + (relId != null ? relId.hashCode() : 0);
+ result = 31 * result + (location != null ? location.hashCode() : 0);
+ result = 31 * result + (toolTip != null ? toolTip.hashCode() : 0);
+ result = 31 * result + (display != null ? display.hashCode() : 0);
+ return result;
+ }
+
+ @Override
+ public String toString() {
+ return "XSSFHyperlinkRecord{" +
+ "cellRangeAddress=" + cellRangeAddress +
+ ", relId='" + relId + '\'' +
+ ", location='" + location + '\'' +
+ ", toolTip='" + toolTip + '\'' +
+ ", display='" + display + '\'' +
+ '}';
+ }
+}
Propchange: poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java
------------------------------------------------------------------------------
svn:eol-style = native
Copied: poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/package.html (from r1787212, poi/trunk/src/java/org/apache/poi/hssf/usermodel/package.html)
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/package.html?p2=poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/package.html&p1=poi/trunk/src/java/org/apache/poi/hssf/usermodel/package.html&r1=1787212&r2=1787228&rev=1787228&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hssf/usermodel/package.html (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/package.html Thu Mar 16 18:37:13 2017
@@ -22,8 +22,17 @@
</head>
<body bgcolor="white">
-usermodel package maps HSSF low level strutures to familiar workbook/sheet model
-
+<p>The org.apache.poi.xssf.binary package includes necessary underlying components
+for streaming/read-only processing of xlsb files.
+</p>
+<p>
+ POI does not yet support opening .xlsb files with XSSFWorkbook, but you can read files with XSSFBReader
+ in o.a.p.xssf.eventusermodel.
+</p>
+<p>
+ This feature was added in poi-3.15-beta3 and should be considered experimental. Most classes
+ have been marked @Internal and the API is subject to change.
+</p>
<h2>Related Documentation</h2>
For overviews, tutorials, examples, guides, and tool documentation, please see:
Added: poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,172 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.eventusermodel;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.xssf.binary.XSSFBCommentsTable;
+import org.apache.poi.xssf.binary.XSSFBParseException;
+import org.apache.poi.xssf.binary.XSSFBParser;
+import org.apache.poi.xssf.binary.XSSFBRecordType;
+import org.apache.poi.xssf.binary.XSSFBRelation;
+import org.apache.poi.xssf.binary.XSSFBStylesTable;
+import org.apache.poi.xssf.binary.XSSFBUtils;
+import org.apache.poi.xssf.model.CommentsTable;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+
+/**
+ * Reader for xlsb files.
+ */
+public class XSSFBReader extends XSSFReader {
+ /**
+ * Creates a new XSSFReader, for the given package
+ *
+ * @param pkg opc package
+ */
+ public XSSFBReader(OPCPackage pkg) throws IOException, OpenXML4JException {
+ super(pkg);
+ }
+
+ /**
+ * Returns an Iterator which will let you get at all the
+ * different Sheets in turn.
+ * Each sheet's InputStream is only opened when fetched
+ * from the Iterator. It's up to you to close the
+ * InputStreams when done with each one.
+ */
+ @Override
+ public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
+ return new SheetIterator(workbookPart);
+ }
+
+ public XSSFBStylesTable getXSSFBStylesTable() throws IOException {
+ ArrayList<PackagePart> parts = pkg.getPartsByContentType(XSSFBRelation.STYLES_BINARY.getContentType());
+ if(parts.size() == 0) return null;
+
+ // Create the Styles Table, and associate the Themes if present
+ return new XSSFBStylesTable(parts.get(0).getInputStream());
+
+ }
+
+
+ public static class SheetIterator extends XSSFReader.SheetIterator {
+
+ /**
+ * Construct a new SheetIterator
+ *
+ * @param wb package part holding workbook.xml
+ */
+ private SheetIterator(PackagePart wb) throws IOException {
+ super(wb);
+ }
+
+ Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
+ SheetRefLoader sheetRefLoader = new SheetRefLoader(wb.getInputStream());
+ sheetRefLoader.parse();
+ return sheetRefLoader.getSheets().iterator();
+ }
+
+ /**
+ * Not supported by XSSFBReader's SheetIterator.
+ * Please use {@link #getXSSFBSheetComments()} instead.
+ * @return nothing, always throws IllegalArgumentException!
+ */
+ @Override
+ public CommentsTable getSheetComments() {
+ throw new IllegalArgumentException("Please use getXSSFBSheetComments");
+ }
+
+ public XSSFBCommentsTable getXSSFBSheetComments() {
+ PackagePart sheetPkg = getSheetPart();
+
+ // Do we have a comments relationship? (Only ever one if so)
+ try {
+ PackageRelationshipCollection commentsList =
+ sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
+ if (commentsList.size() > 0) {
+ PackageRelationship comments = commentsList.getRelationship(0);
+ if (comments == null || comments.getTargetURI() == null) {
+ return null;
+ }
+ PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
+ PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
+ return new XSSFBCommentsTable(commentsPart.getInputStream());
+ }
+ } catch (InvalidFormatException e) {
+ return null;
+ } catch (IOException e) {
+ return null;
+ }
+ return null;
+ }
+
+ }
+
+ private static class SheetRefLoader extends XSSFBParser {
+ List<XSSFSheetRef> sheets = new LinkedList<XSSFSheetRef>();
+
+ private SheetRefLoader(InputStream is) {
+ super(is);
+ }
+
+ @Override
+ public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
+ if (recordType == XSSFBRecordType.BrtBundleSh.getId()) {
+ addWorksheet(data);
+ }
+ }
+
+ private void addWorksheet(byte[] data) {
+ int offset = 0;
+ //this is the sheet state #2.5.142
+ long hsShtat = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+
+ long iTabID = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+ //according to #2.4.304
+ if (iTabID < 1 || iTabID > 0x0000FFFFL) {
+ throw new XSSFBParseException("table id out of range: "+iTabID);
+ }
+ StringBuilder sb = new StringBuilder();
+ offset += XSSFBUtils.readXLWideString(data, offset, sb);
+ String relId = sb.toString();
+ sb.setLength(0);
+ XSSFBUtils.readXLWideString(data, offset, sb);
+ String name = sb.toString();
+ if (relId != null && relId.trim().length() > 0) {
+ sheets.add(new XSSFSheetRef(relId, name));
+ }
+ }
+
+ List<XSSFSheetRef> getSheets() {
+ return sheets;
+ }
+ }
+}
\ No newline at end of file
Propchange: poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java?rev=1787228&r1=1787227&r2=1787228&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java Thu Mar 16 18:37:13 2017
@@ -16,15 +16,16 @@
==================================================================== */
package org.apache.poi.xssf.eventusermodel;
-import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
-
+import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
+import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
+import java.util.Locale;
import java.util.Map;
import org.apache.poi.POIXMLException;
@@ -39,6 +40,7 @@ import org.apache.poi.openxml4j.opc.Pack
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
+import org.apache.poi.util.SAXHelper;
import org.apache.poi.xssf.model.CommentsTable;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.model.StylesTable;
@@ -47,9 +49,11 @@ import org.apache.poi.xssf.usermodel.XSS
import org.apache.poi.xssf.usermodel.XSSFRelation;
import org.apache.poi.xssf.usermodel.XSSFShape;
import org.apache.xmlbeans.XmlException;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
+import org.xml.sax.Attributes;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.DefaultHandler;
/**
* This class makes it easy to get at individual parts
@@ -62,8 +66,8 @@ public class XSSFReader {
private static final POILogger LOGGER = POILogFactory.getLogger(XSSFReader.class);
- private OPCPackage pkg;
- private PackagePart workbookPart;
+ protected OPCPackage pkg;
+ protected PackagePart workbookPart;
/**
* Creates a new XSSFReader, for the given package
@@ -194,23 +198,23 @@ public class XSSFReader {
private final Map<String, PackagePart> sheetMap;
/**
- * Current CTSheet bean
+ * Current sheet reference
*/
- private CTSheet ctSheet;
-
+ XSSFSheetRef xssfSheetRef;
+
/**
* Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
* We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
* i.e. as they are stored in the underlying package
*/
- private final Iterator<CTSheet> sheetIterator;
+ final Iterator<XSSFSheetRef> sheetIterator;
/**
* Construct a new SheetIterator
*
* @param wb package part holding workbook.xml
*/
- private SheetIterator(PackagePart wb) throws IOException {
+ SheetIterator(PackagePart wb) throws IOException {
/**
* The order of sheets is defined by the order of CTSheet elements in workbook.xml
@@ -228,25 +232,44 @@ public class XSSFReader {
sheetMap.put(rel.getId(), pkg.getPart(relName));
}
}
- //step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator
- //Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search
- CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream(), DEFAULT_XML_OPTIONS).getWorkbook();
- List<CTSheet> validSheets = new ArrayList<CTSheet>();
- for (CTSheet ctSheet : wbBean.getSheets().getSheetList()) {
- //if there's no relationship id, silently skip the sheet
- String sheetId = ctSheet.getId();
- if (sheetId != null && sheetId.length() > 0) {
- validSheets.add(ctSheet);
- }
- }
- sheetIterator = validSheets.iterator();
+ //step 2. Read array of CTSheet elements, wrap it in a LinkedList
+ //and construct an iterator
+ sheetIterator = createSheetIteratorFromWB(wb);
} catch (InvalidFormatException e){
throw new POIXMLException(e);
- } catch (XmlException e){
+ }
+ }
+
+ Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
+
+ XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader();
+ XMLReader xmlReader = null;
+ try {
+ xmlReader = SAXHelper.newXMLReader();
+ } catch (ParserConfigurationException e) {
+ throw new POIXMLException(e);
+ } catch (SAXException e) {
throw new POIXMLException(e);
}
+ xmlReader.setContentHandler(xmlSheetRefReader);
+ try {
+ xmlReader.parse(new InputSource(wb.getInputStream()));
+ } catch (SAXException e) {
+ throw new POIXMLException(e);
+ }
+
+ List<XSSFSheetRef> validSheets = new ArrayList<XSSFSheetRef>();
+ for (XSSFSheetRef xssfSheetRef : xmlSheetRefReader.getSheetRefs()) {
+ //if there's no relationship id, silently skip the sheet
+ String sheetId = xssfSheetRef.getId();
+ if (sheetId != null && sheetId.length() > 0) {
+ validSheets.add(xssfSheetRef);
+ }
+ }
+ return validSheets.iterator();
}
+
/**
* Returns <tt>true</tt> if the iteration has more elements.
*
@@ -264,9 +287,9 @@ public class XSSFReader {
*/
@Override
public InputStream next() {
- ctSheet = sheetIterator.next();
+ xssfSheetRef = sheetIterator.next();
- String sheetId = ctSheet.getId();
+ String sheetId = xssfSheetRef.getId();
try {
PackagePart sheetPkg = sheetMap.get(sheetId);
return sheetPkg.getInputStream();
@@ -281,7 +304,7 @@ public class XSSFReader {
* @return name of the current sheet
*/
public String getSheetName() {
- return ctSheet.getName();
+ return xssfSheetRef.getName();
}
/**
@@ -344,7 +367,7 @@ public class XSSFReader {
}
public PackagePart getSheetPart() {
- String sheetId = ctSheet.getId();
+ String sheetId = xssfSheetRef.getId();
return sheetMap.get(sheetId);
}
@@ -356,4 +379,52 @@ public class XSSFReader {
throw new IllegalStateException("Not supported");
}
}
+
+ protected final static class XSSFSheetRef {
+ //do we need to store sheetId, too?
+ private final String id;
+ private final String name;
+
+ public XSSFSheetRef(String id, String name) {
+ this.id = id;
+ this.name = name;
+ }
+
+ public String getId() {
+ return id;
+ }
+
+ public String getName() {
+ return name;
+ }
+ }
+
+ //scrapes sheet reference info and order from workbook.xml
+ private static class XMLSheetRefReader extends DefaultHandler {
+ private final static String SHEET = "sheet";
+ private final static String ID = "id";
+ private final static String NAME = "name";
+
+ private final List<XSSFSheetRef> sheetRefs = new LinkedList();
+
+ @Override
+ public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException {
+ if (localName.toLowerCase(Locale.US).equals(SHEET)) {
+ String name = null;
+ String id = null;
+ for (int i = 0; i < attrs.getLength(); i++) {
+ if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(NAME)) {
+ name = attrs.getValue(i);
+ } else if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(ID)) {
+ id = attrs.getValue(i);
+ }
+ sheetRefs.add(new XSSFSheetRef(id, name));
+ }
+ }
+ }
+
+ List<XSSFSheetRef> getSheetRefs() {
+ return Collections.unmodifiableList(sheetRefs);
+ }
+ }
}
Added: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,160 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.extractor;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.xssf.binary.XSSFBCommentsTable;
+import org.apache.poi.xssf.binary.XSSFBHyperlinksTable;
+import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
+import org.apache.poi.xssf.binary.XSSFBSheetHandler;
+import org.apache.poi.xssf.binary.XSSFBStylesTable;
+import org.apache.poi.xssf.eventusermodel.XSSFBReader;
+import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.xmlbeans.XmlException;
+import org.xml.sax.SAXException;
+
+/**
+ * Implementation of a text extractor or xlsb Excel
+ * files that uses SAX-like binary parsing.
+ */
+public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
+ implements org.apache.poi.ss.extractor.ExcelExtractor {
+
+ public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
+ XSSFRelation.XLSB_BINARY_WORKBOOK
+ };
+
+ private boolean handleHyperlinksInCells = false;
+
+ public XSSFBEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
+ super(path);
+ }
+
+ public XSSFBEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
+ super(container);
+ }
+
+ public static void main(String[] args) throws Exception {
+ if (args.length < 1) {
+ System.err.println("Use:");
+ System.err.println(" XSSFBEventBasedExcelExtractor <filename.xlsb>");
+ System.exit(1);
+ }
+ POIXMLTextExtractor extractor =
+ new XSSFBEventBasedExcelExtractor(args[0]);
+ System.out.println(extractor.getText());
+ extractor.close();
+ }
+
+ public void setHandleHyperlinksInCells(boolean handleHyperlinksInCells) {
+ this.handleHyperlinksInCells = handleHyperlinksInCells;
+ }
+
+ /**
+ * Should we return the formula itself, and not
+ * the result it produces? Default is false
+ * This is currently unsupported for xssfb
+ */
+ @Override
+ public void setFormulasNotResults(boolean formulasNotResults) {
+ throw new IllegalArgumentException("Not currently supported");
+ }
+
+ /**
+ * Processes the given sheet
+ */
+ public void processSheet(
+ SheetContentsHandler sheetContentsExtractor,
+ XSSFBStylesTable styles,
+ XSSFBCommentsTable comments,
+ XSSFBSharedStringsTable strings,
+ InputStream sheetInputStream)
+ throws IOException, SAXException {
+
+ DataFormatter formatter;
+ if (locale == null) {
+ formatter = new DataFormatter();
+ } else {
+ formatter = new DataFormatter(locale);
+ }
+
+ XSSFBSheetHandler xssfbSheetHandler = new XSSFBSheetHandler(
+ sheetInputStream,
+ styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults
+ );
+ xssfbSheetHandler.parse();
+ }
+
+ /**
+ * Processes the file and returns the text
+ */
+ public String getText() {
+ try {
+ XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(container);
+ XSSFBReader xssfbReader = new XSSFBReader(container);
+ XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
+ XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
+
+ StringBuffer text = new StringBuffer();
+ SheetTextExtractor sheetExtractor = new SheetTextExtractor();
+ XSSFBHyperlinksTable hyperlinksTable = null;
+ while (iter.hasNext()) {
+ InputStream stream = iter.next();
+ if (includeSheetNames) {
+ text.append(iter.getSheetName());
+ text.append('\n');
+ }
+ if (handleHyperlinksInCells) {
+ hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart());
+ }
+ XSSFBCommentsTable comments = includeCellComments ? iter.getXSSFBSheetComments() : null;
+ processSheet(sheetExtractor, styles, comments, strings, stream);
+ if (includeHeadersFooters) {
+ sheetExtractor.appendHeaderText(text);
+ }
+ sheetExtractor.appendCellText(text);
+ if (includeTextBoxes) {
+ processShapes(iter.getShapes(), text);
+ }
+ if (includeHeadersFooters) {
+ sheetExtractor.appendFooterText(text);
+ }
+ sheetExtractor.reset();
+ stream.close();
+ }
+
+ return text.toString();
+ } catch (IOException e) {
+ System.err.println(e);
+ return null;
+ } catch (SAXException se) {
+ System.err.println(se);
+ return null;
+ } catch (OpenXML4JException o4je) {
+ System.err.println(o4je);
+ return null;
+ }
+ }
+
+}
Propchange: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java?rev=1787228&r1=1787227&r2=1787228&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java Thu Mar 16 18:37:13 2017
@@ -54,15 +54,15 @@ import org.xml.sax.XMLReader;
*/
public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor
implements org.apache.poi.ss.extractor.ExcelExtractor {
- private OPCPackage container;
+ OPCPackage container;
private POIXMLProperties properties;
- private Locale locale;
- private boolean includeTextBoxes = true;
- private boolean includeSheetNames = true;
- private boolean includeCellComments = false;
- private boolean includeHeadersFooters = true;
- private boolean formulasNotResults = false;
+ Locale locale;
+ boolean includeTextBoxes = true;
+ boolean includeSheetNames = true;
+ boolean includeCellComments = false;
+ boolean includeHeadersFooters = true;
+ boolean formulasNotResults = false;
private boolean concatenatePhoneticRuns = true;
public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
@@ -240,7 +240,7 @@ public class XSSFEventBasedExcelExtracto
}
}
- private void processShapes(List<XSSFShape> shapes, StringBuffer text) {
+ void processShapes(List<XSSFShape> shapes, StringBuffer text) {
if (shapes == null){
return;
}
@@ -349,7 +349,7 @@ public class XSSFEventBasedExcelExtracto
* @see XSSFExcelExtractor#getText()
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
*/
- private void appendHeaderText(StringBuffer buffer) {
+ void appendHeaderText(StringBuffer buffer) {
appendHeaderFooterText(buffer, "firstHeader");
appendHeaderFooterText(buffer, "oddHeader");
appendHeaderFooterText(buffer, "evenHeader");
@@ -361,7 +361,7 @@ public class XSSFEventBasedExcelExtracto
* @see XSSFExcelExtractor#getText()
* @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
*/
- private void appendFooterText(StringBuffer buffer) {
+ void appendFooterText(StringBuffer buffer) {
// append the text for each footer type in the same order
// they are appended in XSSFExcelExtractor
appendHeaderFooterText(buffer, "firstFooter");
@@ -372,7 +372,7 @@ public class XSSFEventBasedExcelExtracto
/**
* Append the cell contents we have collected.
*/
- private void appendCellText(StringBuffer buffer) {
+ void appendCellText(StringBuffer buffer) {
checkMaxTextSize(buffer, output.toString());
buffer.append(output);
}
@@ -380,7 +380,7 @@ public class XSSFEventBasedExcelExtracto
/**
* Reset this <code>SheetTextExtractor</code> for the next sheet.
*/
- private void reset() {
+ void reset() {
output.setLength(0);
firstCellOfRow = true;
if (headerFooterMap != null) {
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java?rev=1787228&r1=1787227&r2=1787228&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java Thu Mar 16 18:37:13 2017
@@ -68,6 +68,7 @@ public class TestExtractorFactory {
private static File xlsxStrict;
private static File xltx;
private static File xlsEmb;
+ private static File xlsb;
private static File doc;
private static File doc6;
@@ -108,6 +109,7 @@ public class TestExtractorFactory {
xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
xltx = getFileAndCheck(ssTests, "test.xltx");
xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
+ xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
doc = getFileAndCheck(wpTests, "SampleDoc.doc");
@@ -172,6 +174,13 @@ public class TestExtractorFactory {
);
extractor.close();
+ extractor = ExtractorFactory.createExtractor(xlsb);
+ assertTrue(
+ extractor.getText().contains("test")
+ );
+ extractor.close();
+
+
extractor = ExtractorFactory.createExtractor(xltx);
assertTrue(
extractor.getText().contains("test")
Added: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java (added)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,56 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.junit.Test;
+
+public class TestXSSFBSharedStringsTable {
+
+
+ private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
+
+ @Test
+ public void testBasic() throws Exception {
+
+ OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsb"));
+ List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.bin"));
+ assertEquals(1, parts.size());
+
+ XSSFBSharedStringsTable rtbl = new XSSFBSharedStringsTable(parts.get(0));
+ List<String> strings = rtbl.getItems();
+ assertEquals(49, strings.size());
+
+ assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0));
+ assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.getEntryAt(3));
+ assertEquals(55, rtbl.getCount());
+ assertEquals(49, rtbl.getUniqueCount());
+
+ //TODO: add in tests for phonetic runs
+
+ }
+
+
+}
Propchange: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java (added)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,54 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.util.List;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.xssf.eventusermodel.XSSFBReader;
+import org.apache.poi.xssf.eventusermodel.XSSFReader;
+import org.junit.Test;
+
+public class TestXSSFBSheetHyperlinkManager {
+
+ private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
+
+ @Test
+ public void testBasic() throws Exception {
+
+ OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("hyperlink.xlsb"));
+ XSSFBReader reader = new XSSFBReader(pkg);
+ XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) reader.getSheetsData();
+ it.next();
+ XSSFBHyperlinksTable manager = new XSSFBHyperlinksTable(it.getSheetPart());
+ List<XSSFHyperlinkRecord> records = manager.getHyperLinks().get(new CellAddress(0, 0));
+ assertNotNull(records);
+ assertEquals(1, records.size());
+ XSSFHyperlinkRecord record = records.get(0);
+ assertEquals("http://tika.apache.org/", record.getLocation());
+ assertEquals("rId2", record.getRelId());
+
+ }
+
+
+}
Propchange: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java (added)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,224 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.eventusermodel;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
+import org.apache.poi.xssf.binary.XSSFBSheetHandler;
+import org.apache.poi.xssf.binary.XSSFBStylesTable;
+import org.apache.poi.xssf.usermodel.XSSFComment;
+import org.junit.Test;
+
+public class TestXSSFBReader {
+
+ private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
+
+ @Test
+ public void testBasic() throws Exception {
+ List<String> sheetTexts = getSheets("testVarious.xlsb");
+
+ assertEquals(1, sheetTexts.size());
+ String xsxml = sheetTexts.get(0);
+ assertContains("This is a string", xsxml);
+ assertContains("<td ref=\"B2\">13</td>", xsxml);
+ assertContains("<td ref=\"B3\">13.12112313</td>", xsxml);
+ assertContains("<td ref=\"B4\">$ 3.03</td>", xsxml);
+ assertContains("<td ref=\"B5\">20%</td>", xsxml);
+ assertContains("<td ref=\"B6\">13.12</td>", xsxml);
+ assertContains("<td ref=\"B7\">1.23457E+14</td>", xsxml);
+ assertContains("<td ref=\"B8\">1.23457E+15</td>", xsxml);
+
+ assertContains("46/1963", xsxml);//custom format 1
+ assertContains("3/128", xsxml);//custom format 2
+
+ assertContains("<tr num=\"7>\n" +
+ "\t<td ref=\"A8\">longer int</td>\n" +
+ "\t<td ref=\"B8\">1.23457E+15</td>\n" +
+ "\t<td ref=\"C8\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+ "test comment2</span></td>\n" +
+ "</tr num=\"7>", xsxml);
+
+ assertContains("<tr num=\"34>\n" +
+ "\t<td ref=\"B35\">comment6<span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+ "comment6 actually in cell</span></td>\n" +
+ "</tr num=\"34>", xsxml);
+
+ assertContains("<tr num=\"64>\n" +
+ "\t<td ref=\"I65\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+ "comment7 end of file</span></td>\n" +
+ "</tr num=\"64>", xsxml);
+
+ assertContains("<tr num=\"65>\n" +
+ "\t<td ref=\"I66\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+ "comment8 end of file</span></td>\n" +
+ "</tr num=\"65>", xsxml);
+
+ assertContains("<header tagName=\"header\">OddLeftHeader OddCenterHeader OddRightHeader</header>", xsxml);
+ assertContains("<footer tagName=\"footer\">OddLeftFooter OddCenterFooter OddRightFooter</footer>", xsxml);
+ assertContains(
+ "<header tagName=\"evenHeader\">EvenLeftHeader EvenCenterHeader EvenRightHeader\n</header>",
+ xsxml);
+ assertContains(
+ "<footer tagName=\"evenFooter\">EvenLeftFooter EvenCenterFooter EvenRightFooter</footer>",
+ xsxml);
+ assertContains(
+ "<header tagName=\"firstHeader\">FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader</header>",
+ xsxml);
+ assertContains(
+ "<footer tagName=\"firstFooter\">FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter</footer>",
+ xsxml);
+
+ }
+
+ @Test
+ public void testComments() throws Exception {
+ List<String> sheetTexts = getSheets("comments.xlsb");
+ String xsxml = sheetTexts.get(0);
+ assertContains(
+ "<tr num=\"0>\n" +
+ "\t<td ref=\"A1\"><span type=\"comment\" author=\"Sven Nissel\">comment top row1 (index0)</span></td>\n" +
+ "\t<td ref=\"B1\">row1</td>\n" +
+ "</tr num=\"0>", xsxml);
+ assertContains(
+ "<tr num=\"1>\n" +
+ "\t<td ref=\"A2\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+ "comment row2 (index1)</span></td>\n" +
+ "</tr num=\"1>",
+ xsxml);
+ assertContains("<tr num=\"2>\n" +
+ "\t<td ref=\"A3\">row3<span type=\"comment\" author=\"Sven Nissel\">comment top row3 (index2)</span></td>\n" +
+ "\t<td ref=\"B3\">row3</td>\n", xsxml);
+
+ assertContains("<tr num=\"3>\n" +
+ "\t<td ref=\"A4\"><span type=\"comment\" author=\"Sven Nissel\">comment top row4 (index3)</span></td>\n" +
+ "\t<td ref=\"B4\">row4</td>\n" +
+ "</tr num=\"3></sheet>", xsxml);
+
+ }
+
+ private List<String> getSheets(String testFileName) throws Exception {
+ OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream(testFileName));
+ List<String> sheetTexts = new ArrayList<String>();
+ XSSFBReader r = new XSSFBReader(pkg);
+
+// assertNotNull(r.getWorkbookData());
+ // assertNotNull(r.getSharedStringsData());
+ assertNotNull(r.getXSSFBStylesTable());
+ XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg);
+ XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable();
+ XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator)r.getSheetsData();
+
+ while (it.hasNext()) {
+ InputStream is = it.next();
+ String name = it.getSheetName();
+ TestSheetHandler testSheetHandler = new TestSheetHandler();
+ testSheetHandler.startSheet(name);
+ XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is,
+ xssfbStylesTable,
+ it.getXSSFBSheetComments(),
+ sst, testSheetHandler,
+ new DataFormatter(),
+ false);
+ sheetHandler.parse();
+ testSheetHandler.endSheet();
+ sheetTexts.add(testSheetHandler.toString());
+ }
+ return sheetTexts;
+
+ }
+
+ //This converts all [\r\n\t]+ to " "
+ private void assertContains(String needle, String haystack) {
+ needle = needle.replaceAll("[\r\n\t]+", " ");
+ haystack = haystack.replaceAll("[\r\n\t]+", " ");
+ if (haystack.indexOf(needle) < 0) {
+ fail("couldn't find >"+needle+"< in: "+haystack );
+ }
+ }
+
+
+ @Test
+ public void testDate() throws Exception {
+ List<String> sheets = getSheets("date.xlsb");
+ assertEquals(1, sheets.size());
+ assertContains("1/12/13", sheets.get(0));
+
+ }
+
+
+ private class TestSheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler {
+ private final StringBuilder sb = new StringBuilder();
+
+ public void startSheet(String sheetName) {
+ sb.append("<sheet name=\"").append(sheetName).append(">");
+ }
+
+ public void endSheet(){
+ sb.append("</sheet>");
+ }
+ @Override
+ public void startRow(int rowNum) {
+ sb.append("\n<tr num=\"").append(rowNum).append(">");
+ }
+
+ @Override
+ public void endRow(int rowNum) {
+ sb.append("\n</tr num=\"").append(rowNum).append(">");
+ }
+
+ @Override
+ public void cell(String cellReference, String formattedValue, XSSFComment comment) {
+ formattedValue = (formattedValue == null) ? "" : formattedValue;
+ if (comment == null) {
+ sb.append("\n\t<td ref=\"").append(cellReference).append("\">").append(formattedValue).append("</td>");
+ } else {
+ sb.append("\n\t<td ref=\"").append(cellReference).append("\">")
+ .append(formattedValue)
+ .append("<span type=\"comment\" author=\"")
+ .append(comment.getAuthor()).append("\">")
+ .append(comment.getString().toString().trim()).append("</span>")
+ .append("</td>");
+ }
+ }
+
+ @Override
+ public void headerFooter(String text, boolean isHeader, String tagName) {
+ if (isHeader) {
+ sb.append("<header tagName=\""+tagName+"\">"+text+"</header>");
+ } else {
+ sb.append("<footer tagName=\""+tagName+"\">"+text+"</footer>");
+
+ }
+ }
+
+ @Override
+ public String toString() {
+ return sb.toString();
+ }
+ }
+}
Propchange: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java (added)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,102 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.extractor;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.poi.xssf.XSSFTestDataSamples;
+import org.junit.Test;
+
+/**
+ * Tests for {@link XSSFBEventBasedExcelExtractor}
+ */
+public class TestXSSFBEventBasedExcelExtractor {
+
+
+ protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
+ return new XSSFBEventBasedExcelExtractor(XSSFTestDataSamples.
+ openSamplePackage(sampleName));
+ }
+
+ /**
+ * Get text out of the simple file
+ */
+ @Test
+ public void testGetSimpleText() throws Exception {
+ // a very simple file
+ XSSFEventBasedExcelExtractor extractor = getExtractor("sample.xlsb");
+ extractor.setIncludeCellComments(true);
+ extractor.getText();
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Check sheet names
+ assertTrue(text.startsWith("Sheet1"));
+ assertTrue(text.endsWith("Sheet3\n"));
+
+ // Now without, will have text
+ extractor.setIncludeSheetNames(false);
+ text = extractor.getText();
+ String CHUNK1 =
+ "Lorem\t111\n" +
+ "ipsum\t222\n" +
+ "dolor\t333\n" +
+ "sit\t444\n" +
+ "amet\t555\n" +
+ "consectetuer\t666\n" +
+ "adipiscing\t777\n" +
+ "elit\t888\n" +
+ "Nunc\t999\n";
+ String CHUNK2 =
+ "The quick brown fox jumps over the lazy dog\n" +
+ "hello, xssf hello, xssf\n" +
+ "hello, xssf hello, xssf\n" +
+ "hello, xssf hello, xssf\n" +
+ "hello, xssf hello, xssf\n";
+ assertEquals(
+ CHUNK1 +
+ "at\t4995\n" +
+ CHUNK2
+ , text);
+
+ }
+
+
+ /**
+ * Test text extraction from text box using getShapes()
+ *
+ * @throws Exception
+ */
+ @Test
+ public void testShapes() throws Exception {
+ XSSFEventBasedExcelExtractor ooxmlExtractor = getExtractor("WithTextBox.xlsb");
+
+ try {
+ String text = ooxmlExtractor.getText();
+
+ assertTrue(text.indexOf("Line 1") > -1);
+ assertTrue(text.indexOf("Line 2") > -1);
+ assertTrue(text.indexOf("Line 3") > -1);
+ } finally {
+ ooxmlExtractor.close();
+ }
+ }
+
+}
Propchange: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java
------------------------------------------------------------------------------
svn:eol-style = native
Added: poi/trunk/test-data/spreadsheet/51519.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/51519.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/spreadsheet/51519.xlsb
------------------------------------------------------------------------------
svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12
Added: poi/trunk/test-data/spreadsheet/WithTextBox.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/WithTextBox.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/spreadsheet/WithTextBox.xlsb
------------------------------------------------------------------------------
svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12
Added: poi/trunk/test-data/spreadsheet/comments.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/comments.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/spreadsheet/comments.xlsb
------------------------------------------------------------------------------
svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12
Added: poi/trunk/test-data/spreadsheet/date.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/date.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/spreadsheet/date.xlsb
------------------------------------------------------------------------------
svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12
Added: poi/trunk/test-data/spreadsheet/hyperlink.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/hyperlink.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/spreadsheet/hyperlink.xlsb
------------------------------------------------------------------------------
svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12
Added: poi/trunk/test-data/spreadsheet/sample.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/sample.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/spreadsheet/sample.xlsb
------------------------------------------------------------------------------
svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12
Added: poi/trunk/test-data/spreadsheet/testVarious.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/testVarious.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/spreadsheet/testVarious.xlsb
------------------------------------------------------------------------------
svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org