You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ta...@apache.org on 2017/03/16 18:37:14 UTC

svn commit: r1787228 [2/2] - in /poi: site/src/documentation/content/xdocs/ trunk/src/ooxml/java/org/apache/poi/extractor/ trunk/src/ooxml/java/org/apache/poi/xssf/ trunk/src/ooxml/java/org/apache/poi/xssf/binary/ trunk/src/ooxml/java/org/apache/poi/xs...

Added: poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,117 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import org.apache.poi.ss.util.CellRangeAddress;
+import org.apache.poi.util.Internal;
+
+/**
+ * This is a read only record that maintains information about
+ * a hyperlink.  In OOXML land, this information has to be merged
+ * from 1) the sheet's .rels to get the url and 2) from after the
+ * sheet data in they hyperlink section.
+ *
+ * The {@link #display} is often empty and should be filled from
+ * the contents of the anchor cell.
+ *
+ */
+@Internal
+public class XSSFHyperlinkRecord {
+
+    private final CellRangeAddress cellRangeAddress;
+    private final String relId;
+    private String location;
+    private String toolTip;
+    private String display;
+
+    XSSFHyperlinkRecord(CellRangeAddress cellRangeAddress, String relId, String location, String toolTip, String display) {
+        this.cellRangeAddress = cellRangeAddress;
+        this.relId = relId;
+        this.location = location;
+        this.toolTip = toolTip;
+        this.display = display;
+    }
+
+    void setLocation(String location) {
+        this.location = location;
+    }
+
+    void setToolTip(String toolTip) {
+        this.toolTip = toolTip;
+    }
+
+    void setDisplay(String display) {
+        this.display = display;
+    }
+
+    CellRangeAddress getCellRangeAddress() {
+        return cellRangeAddress;
+    }
+
+    public String getRelId() {
+        return relId;
+    }
+
+    public String getLocation() {
+        return location;
+    }
+
+    public String getToolTip() {
+        return toolTip;
+    }
+
+    public String getDisplay() {
+        return display;
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) return true;
+        if (o == null || getClass() != o.getClass()) return false;
+
+        XSSFHyperlinkRecord that = (XSSFHyperlinkRecord) o;
+
+        if (cellRangeAddress != null ? !cellRangeAddress.equals(that.cellRangeAddress) : that.cellRangeAddress != null)
+            return false;
+        if (relId != null ? !relId.equals(that.relId) : that.relId != null) return false;
+        if (location != null ? !location.equals(that.location) : that.location != null) return false;
+        if (toolTip != null ? !toolTip.equals(that.toolTip) : that.toolTip != null) return false;
+        return display != null ? display.equals(that.display) : that.display == null;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = cellRangeAddress != null ? cellRangeAddress.hashCode() : 0;
+        result = 31 * result + (relId != null ? relId.hashCode() : 0);
+        result = 31 * result + (location != null ? location.hashCode() : 0);
+        result = 31 * result + (toolTip != null ? toolTip.hashCode() : 0);
+        result = 31 * result + (display != null ? display.hashCode() : 0);
+        return result;
+    }
+
+    @Override
+    public String toString() {
+        return "XSSFHyperlinkRecord{" +
+                "cellRangeAddress=" + cellRangeAddress +
+                ", relId='" + relId + '\'' +
+                ", location='" + location + '\'' +
+                ", toolTip='" + toolTip + '\'' +
+                ", display='" + display + '\'' +
+                '}';
+    }
+}

Propchange: poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/XSSFHyperlinkRecord.java
------------------------------------------------------------------------------
    svn:eol-style = native

Copied: poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/package.html (from r1787212, poi/trunk/src/java/org/apache/poi/hssf/usermodel/package.html)
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/package.html?p2=poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/package.html&p1=poi/trunk/src/java/org/apache/poi/hssf/usermodel/package.html&r1=1787212&r2=1787228&rev=1787228&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/hssf/usermodel/package.html (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/binary/package.html Thu Mar 16 18:37:13 2017
@@ -22,8 +22,17 @@
 </head>
 <body bgcolor="white">
 
-usermodel package maps HSSF low level strutures to familiar workbook/sheet model
-
+<p>The org.apache.poi.xssf.binary package includes necessary underlying components
+for streaming/read-only processing of xlsb files.
+</p>
+<p>
+    POI does not yet support opening .xlsb files with XSSFWorkbook, but you can read files with XSSFBReader
+    in o.a.p.xssf.eventusermodel.
+</p>
+<p>
+    This feature was added in poi-3.15-beta3 and should be considered experimental.  Most classes
+    have been marked @Internal and the API is subject to change.
+</p>
 <h2>Related Documentation</h2>
 
 For overviews, tutorials, examples, guides, and tool documentation, please see:

Added: poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,172 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.eventusermodel;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.openxml4j.opc.PackagePartName;
+import org.apache.poi.openxml4j.opc.PackageRelationship;
+import org.apache.poi.openxml4j.opc.PackageRelationshipCollection;
+import org.apache.poi.openxml4j.opc.PackagingURIHelper;
+import org.apache.poi.util.LittleEndian;
+import org.apache.poi.xssf.binary.XSSFBCommentsTable;
+import org.apache.poi.xssf.binary.XSSFBParseException;
+import org.apache.poi.xssf.binary.XSSFBParser;
+import org.apache.poi.xssf.binary.XSSFBRecordType;
+import org.apache.poi.xssf.binary.XSSFBRelation;
+import org.apache.poi.xssf.binary.XSSFBStylesTable;
+import org.apache.poi.xssf.binary.XSSFBUtils;
+import org.apache.poi.xssf.model.CommentsTable;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+
+/**
+ * Reader for xlsb files.
+ */
+public class XSSFBReader extends XSSFReader {
+    /**
+     * Creates a new XSSFReader, for the given package
+     *
+     * @param pkg opc package
+     */
+    public XSSFBReader(OPCPackage pkg) throws IOException, OpenXML4JException {
+        super(pkg);
+    }
+
+    /**
+     * Returns an Iterator which will let you get at all the
+     *  different Sheets in turn.
+     * Each sheet's InputStream is only opened when fetched
+     *  from the Iterator. It's up to you to close the
+     *  InputStreams when done with each one.
+     */
+    @Override
+    public Iterator<InputStream> getSheetsData() throws IOException, InvalidFormatException {
+        return new SheetIterator(workbookPart);
+    }
+
+    public XSSFBStylesTable getXSSFBStylesTable() throws IOException {
+        ArrayList<PackagePart> parts = pkg.getPartsByContentType(XSSFBRelation.STYLES_BINARY.getContentType());
+        if(parts.size() == 0) return null;
+
+        // Create the Styles Table, and associate the Themes if present
+        return new XSSFBStylesTable(parts.get(0).getInputStream());
+
+    }
+
+
+    public static class SheetIterator extends XSSFReader.SheetIterator {
+
+        /**
+         * Construct a new SheetIterator
+         *
+         * @param wb package part holding workbook.xml
+         */
+        private SheetIterator(PackagePart wb) throws IOException {
+            super(wb);
+        }
+
+        Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
+            SheetRefLoader sheetRefLoader = new SheetRefLoader(wb.getInputStream());
+            sheetRefLoader.parse();
+            return sheetRefLoader.getSheets().iterator();
+        }
+
+        /**
+         * Not supported by XSSFBReader's SheetIterator.
+         * Please use {@link #getXSSFBSheetComments()} instead.
+         * @return nothing, always throws IllegalArgumentException!
+         */
+        @Override
+        public CommentsTable getSheetComments() {
+            throw new IllegalArgumentException("Please use getXSSFBSheetComments");
+        }
+
+        public XSSFBCommentsTable getXSSFBSheetComments() {
+            PackagePart sheetPkg = getSheetPart();
+
+            // Do we have a comments relationship? (Only ever one if so)
+            try {
+                PackageRelationshipCollection commentsList =
+                        sheetPkg.getRelationshipsByType(XSSFRelation.SHEET_COMMENTS.getRelation());
+                if (commentsList.size() > 0) {
+                    PackageRelationship comments = commentsList.getRelationship(0);
+                    if (comments == null || comments.getTargetURI() == null) {
+                        return null;
+                    }
+                    PackagePartName commentsName = PackagingURIHelper.createPartName(comments.getTargetURI());
+                    PackagePart commentsPart = sheetPkg.getPackage().getPart(commentsName);
+                    return new XSSFBCommentsTable(commentsPart.getInputStream());
+                }
+            } catch (InvalidFormatException e) {
+                return null;
+            } catch (IOException e) {
+                return null;
+            }
+            return null;
+        }
+
+    }
+
+    private static class SheetRefLoader extends XSSFBParser {
+        List<XSSFSheetRef> sheets = new LinkedList<XSSFSheetRef>();
+
+        private SheetRefLoader(InputStream is) {
+            super(is);
+        }
+
+        @Override
+        public void handleRecord(int recordType, byte[] data) throws XSSFBParseException {
+            if (recordType == XSSFBRecordType.BrtBundleSh.getId()) {
+                addWorksheet(data);
+            }
+        }
+
+        private void addWorksheet(byte[] data) {
+            int offset = 0;
+            //this is the sheet state #2.5.142
+            long hsShtat = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+
+            long iTabID = LittleEndian.getUInt(data, offset); offset += LittleEndian.INT_SIZE;
+            //according to #2.4.304
+            if (iTabID < 1 || iTabID > 0x0000FFFFL) {
+                throw new XSSFBParseException("table id out of range: "+iTabID);
+            }
+            StringBuilder sb = new StringBuilder();
+            offset += XSSFBUtils.readXLWideString(data, offset, sb);
+            String relId = sb.toString();
+            sb.setLength(0);
+            XSSFBUtils.readXLWideString(data, offset, sb);
+            String name = sb.toString();
+            if (relId != null && relId.trim().length() > 0) {
+                sheets.add(new XSSFSheetRef(relId, name));
+            }
+        }
+
+        List<XSSFSheetRef> getSheets() {
+            return sheets;
+        }
+    }
+}
\ No newline at end of file

Propchange: poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFBReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java?rev=1787228&r1=1787227&r2=1787228&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFReader.java Thu Mar 16 18:37:13 2017
@@ -16,15 +16,16 @@
 ==================================================================== */
 package org.apache.poi.xssf.eventusermodel;
 
-import static org.apache.poi.POIXMLTypeLoader.DEFAULT_XML_OPTIONS;
-
+import javax.xml.parsers.ParserConfigurationException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Locale;
 import java.util.Map;
 
 import org.apache.poi.POIXMLException;
@@ -39,6 +40,7 @@ import org.apache.poi.openxml4j.opc.Pack
 import org.apache.poi.openxml4j.opc.PackagingURIHelper;
 import org.apache.poi.util.POILogFactory;
 import org.apache.poi.util.POILogger;
+import org.apache.poi.util.SAXHelper;
 import org.apache.poi.xssf.model.CommentsTable;
 import org.apache.poi.xssf.model.SharedStringsTable;
 import org.apache.poi.xssf.model.StylesTable;
@@ -47,9 +49,11 @@ import org.apache.poi.xssf.usermodel.XSS
 import org.apache.poi.xssf.usermodel.XSSFRelation;
 import org.apache.poi.xssf.usermodel.XSSFShape;
 import org.apache.xmlbeans.XmlException;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
+import org.xml.sax.Attributes;
+import org.xml.sax.InputSource;
+import org.xml.sax.SAXException;
+import org.xml.sax.XMLReader;
+import org.xml.sax.helpers.DefaultHandler;
 
 /**
  * This class makes it easy to get at individual parts
@@ -62,8 +66,8 @@ public class XSSFReader {
 
     private static final POILogger LOGGER = POILogFactory.getLogger(XSSFReader.class);
 
-    private OPCPackage pkg;
-    private PackagePart workbookPart;
+    protected OPCPackage pkg;
+    protected PackagePart workbookPart;
 
     /**
      * Creates a new XSSFReader, for the given package
@@ -194,23 +198,23 @@ public class XSSFReader {
         private final Map<String, PackagePart> sheetMap;
 
         /**
-         * Current CTSheet bean
+         * Current sheet reference
          */
-        private CTSheet ctSheet;
-        
+        XSSFSheetRef xssfSheetRef;
+
         /**
          * Iterator over CTSheet objects, returns sheets in <tt>logical</tt> order.
          * We can't rely on the Ooxml4J's relationship iterator because it returns objects in physical order,
          * i.e. as they are stored in the underlying package
          */
-        private final Iterator<CTSheet> sheetIterator;
+        final Iterator<XSSFSheetRef> sheetIterator;
 
         /**
          * Construct a new SheetIterator
          *
          * @param wb package part holding workbook.xml
          */
-        private SheetIterator(PackagePart wb) throws IOException {
+        SheetIterator(PackagePart wb) throws IOException {
 
             /**
              * The order of sheets is defined by the order of CTSheet elements in workbook.xml
@@ -228,25 +232,44 @@ public class XSSFReader {
                         sheetMap.put(rel.getId(), pkg.getPart(relName));
                     }
                 }
-                //step 2. Read array of CTSheet elements, wrap it in a ArayList and construct an iterator
-                //Note, using XMLBeans might be expensive, consider refactoring to use SAX or a plain regexp search
-                CTWorkbook wbBean = WorkbookDocument.Factory.parse(wb.getInputStream(), DEFAULT_XML_OPTIONS).getWorkbook();
-                List<CTSheet> validSheets = new ArrayList<CTSheet>();
-                for (CTSheet ctSheet : wbBean.getSheets().getSheetList()) {
-                    //if there's no relationship id, silently skip the sheet
-                    String sheetId = ctSheet.getId();
-                    if (sheetId != null && sheetId.length() > 0) {
-                        validSheets.add(ctSheet);
-                    }
-                }
-                sheetIterator = validSheets.iterator();
+                //step 2. Read array of CTSheet elements, wrap it in a LinkedList
+                //and construct an iterator
+                sheetIterator = createSheetIteratorFromWB(wb);
             } catch (InvalidFormatException e){
                 throw new POIXMLException(e);
-            } catch (XmlException e){
+            }
+        }
+
+        Iterator<XSSFSheetRef> createSheetIteratorFromWB(PackagePart wb) throws IOException {
+
+            XMLSheetRefReader xmlSheetRefReader = new XMLSheetRefReader();
+            XMLReader xmlReader = null;
+            try {
+                xmlReader = SAXHelper.newXMLReader();
+            } catch (ParserConfigurationException e) {
+                throw new POIXMLException(e);
+            } catch (SAXException e) {
                 throw new POIXMLException(e);
             }
+            xmlReader.setContentHandler(xmlSheetRefReader);
+            try {
+                xmlReader.parse(new InputSource(wb.getInputStream()));
+            } catch (SAXException e) {
+                throw new POIXMLException(e);
+            }
+
+            List<XSSFSheetRef> validSheets = new ArrayList<XSSFSheetRef>();
+            for (XSSFSheetRef xssfSheetRef : xmlSheetRefReader.getSheetRefs()) {
+                //if there's no relationship id, silently skip the sheet
+                String sheetId = xssfSheetRef.getId();
+                if (sheetId != null && sheetId.length() > 0) {
+                    validSheets.add(xssfSheetRef);
+                }
+            }
+            return validSheets.iterator();
         }
 
+
         /**
          * Returns <tt>true</tt> if the iteration has more elements.
          *
@@ -264,9 +287,9 @@ public class XSSFReader {
          */
         @Override
         public InputStream next() {
-            ctSheet = sheetIterator.next();
+            xssfSheetRef = sheetIterator.next();
 
-            String sheetId = ctSheet.getId();
+            String sheetId = xssfSheetRef.getId();
             try {
                 PackagePart sheetPkg = sheetMap.get(sheetId);
                 return sheetPkg.getInputStream();
@@ -281,7 +304,7 @@ public class XSSFReader {
          * @return name of the current sheet
          */
         public String getSheetName() {
-            return ctSheet.getName();
+            return xssfSheetRef.getName();
         }
         
         /**
@@ -344,7 +367,7 @@ public class XSSFReader {
         }
         
         public PackagePart getSheetPart() {
-           String sheetId = ctSheet.getId();
+           String sheetId = xssfSheetRef.getId();
            return sheetMap.get(sheetId);
         }
 
@@ -356,4 +379,52 @@ public class XSSFReader {
             throw new IllegalStateException("Not supported");
         }
     }
+
+    protected final static class XSSFSheetRef {
+        //do we need to store sheetId, too?
+        private final String id;
+        private final String name;
+
+        public XSSFSheetRef(String id, String name) {
+            this.id = id;
+            this.name = name;
+        }
+
+        public String getId() {
+            return id;
+        }
+
+        public String getName() {
+            return name;
+        }
+    }
+
+    //scrapes sheet reference info and order from workbook.xml
+    private static class XMLSheetRefReader extends DefaultHandler {
+        private final static String SHEET = "sheet";
+        private final static String ID = "id";
+        private final static String NAME = "name";
+
+        private final List<XSSFSheetRef> sheetRefs = new LinkedList();
+
+        @Override
+        public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException {
+            if (localName.toLowerCase(Locale.US).equals(SHEET)) {
+                String name = null;
+                String id = null;
+                for (int i = 0; i < attrs.getLength(); i++) {
+                    if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(NAME)) {
+                        name = attrs.getValue(i);
+                    } else if (attrs.getLocalName(i).toLowerCase(Locale.US).equals(ID)) {
+                        id = attrs.getValue(i);
+                    }
+                    sheetRefs.add(new XSSFSheetRef(id, name));
+                }
+            }
+        }
+
+        List<XSSFSheetRef> getSheetRefs() {
+            return Collections.unmodifiableList(sheetRefs);
+        }
+    }
 }

Added: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java (added)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,160 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.extractor;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.xssf.binary.XSSFBCommentsTable;
+import org.apache.poi.xssf.binary.XSSFBHyperlinksTable;
+import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
+import org.apache.poi.xssf.binary.XSSFBSheetHandler;
+import org.apache.poi.xssf.binary.XSSFBStylesTable;
+import org.apache.poi.xssf.eventusermodel.XSSFBReader;
+import org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler;
+import org.apache.poi.xssf.usermodel.XSSFRelation;
+import org.apache.xmlbeans.XmlException;
+import org.xml.sax.SAXException;
+
+/**
+ * Implementation of a text extractor or xlsb Excel
+ * files that uses SAX-like binary parsing.
+ */
+public class XSSFBEventBasedExcelExtractor extends XSSFEventBasedExcelExtractor
+        implements org.apache.poi.ss.extractor.ExcelExtractor {
+
+    public static final XSSFRelation[] SUPPORTED_TYPES = new XSSFRelation[] {
+            XSSFRelation.XLSB_BINARY_WORKBOOK
+    };
+
+    private boolean handleHyperlinksInCells = false;
+
+    public XSSFBEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
+        super(path);
+    }
+
+    public XSSFBEventBasedExcelExtractor(OPCPackage container) throws XmlException, OpenXML4JException, IOException {
+        super(container);
+    }
+
+    public static void main(String[] args) throws Exception {
+        if (args.length < 1) {
+            System.err.println("Use:");
+            System.err.println("  XSSFBEventBasedExcelExtractor <filename.xlsb>");
+            System.exit(1);
+        }
+        POIXMLTextExtractor extractor =
+                new XSSFBEventBasedExcelExtractor(args[0]);
+        System.out.println(extractor.getText());
+        extractor.close();
+    }
+
+    public void setHandleHyperlinksInCells(boolean handleHyperlinksInCells) {
+        this.handleHyperlinksInCells = handleHyperlinksInCells;
+    }
+
+    /**
+     * Should we return the formula itself, and not
+     * the result it produces? Default is false
+     * This is currently unsupported for xssfb
+     */
+    @Override
+    public void setFormulasNotResults(boolean formulasNotResults) {
+        throw new IllegalArgumentException("Not currently supported");
+    }
+
+    /**
+     * Processes the given sheet
+     */
+    public void processSheet(
+            SheetContentsHandler sheetContentsExtractor,
+            XSSFBStylesTable styles,
+            XSSFBCommentsTable comments,
+            XSSFBSharedStringsTable strings,
+            InputStream sheetInputStream)
+            throws IOException, SAXException {
+
+        DataFormatter formatter;
+        if (locale == null) {
+            formatter = new DataFormatter();
+        } else {
+            formatter = new DataFormatter(locale);
+        }
+
+        XSSFBSheetHandler xssfbSheetHandler = new XSSFBSheetHandler(
+                sheetInputStream,
+                styles, comments, strings, sheetContentsExtractor, formatter, formulasNotResults
+        );
+        xssfbSheetHandler.parse();
+    }
+
+    /**
+     * Processes the file and returns the text
+     */
+    public String getText() {
+        try {
+            XSSFBSharedStringsTable strings = new XSSFBSharedStringsTable(container);
+            XSSFBReader xssfbReader = new XSSFBReader(container);
+            XSSFBStylesTable styles = xssfbReader.getXSSFBStylesTable();
+            XSSFBReader.SheetIterator iter = (XSSFBReader.SheetIterator) xssfbReader.getSheetsData();
+
+            StringBuffer text = new StringBuffer();
+            SheetTextExtractor sheetExtractor = new SheetTextExtractor();
+            XSSFBHyperlinksTable hyperlinksTable = null;
+            while (iter.hasNext()) {
+                InputStream stream = iter.next();
+                if (includeSheetNames) {
+                    text.append(iter.getSheetName());
+                    text.append('\n');
+                }
+                if (handleHyperlinksInCells) {
+                    hyperlinksTable = new XSSFBHyperlinksTable(iter.getSheetPart());
+                }
+                XSSFBCommentsTable comments = includeCellComments ? iter.getXSSFBSheetComments() : null;
+                processSheet(sheetExtractor, styles, comments, strings, stream);
+                if (includeHeadersFooters) {
+                    sheetExtractor.appendHeaderText(text);
+                }
+                sheetExtractor.appendCellText(text);
+                if (includeTextBoxes) {
+                    processShapes(iter.getShapes(), text);
+                }
+                if (includeHeadersFooters) {
+                    sheetExtractor.appendFooterText(text);
+                }
+                sheetExtractor.reset();
+                stream.close();
+            }
+
+            return text.toString();
+        } catch (IOException e) {
+            System.err.println(e);
+            return null;
+        } catch (SAXException se) {
+            System.err.println(se);
+            return null;
+        } catch (OpenXML4JException o4je) {
+            System.err.println(o4je);
+            return null;
+        }
+    }
+
+}

Propchange: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFBEventBasedExcelExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java?rev=1787228&r1=1787227&r2=1787228&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java Thu Mar 16 18:37:13 2017
@@ -54,15 +54,15 @@ import org.xml.sax.XMLReader;
  */
 public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor 
        implements org.apache.poi.ss.extractor.ExcelExtractor {
-    private OPCPackage container;
+    OPCPackage container;
     private POIXMLProperties properties;
 
-    private Locale locale;
-    private boolean includeTextBoxes = true;
-    private boolean includeSheetNames = true;
-    private boolean includeCellComments = false;
-    private boolean includeHeadersFooters = true;
-    private boolean formulasNotResults = false;
+    Locale locale;
+    boolean includeTextBoxes = true;
+    boolean includeSheetNames = true;
+    boolean includeCellComments = false;
+    boolean includeHeadersFooters = true;
+    boolean formulasNotResults = false;
     private boolean concatenatePhoneticRuns = true;
 
     public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
@@ -240,7 +240,7 @@ public class XSSFEventBasedExcelExtracto
        }
    }
    
-    private void processShapes(List<XSSFShape> shapes, StringBuffer text) {
+    void processShapes(List<XSSFShape> shapes, StringBuffer text) {
         if (shapes == null){
             return;
         }
@@ -349,7 +349,7 @@ public class XSSFEventBasedExcelExtracto
          * @see XSSFExcelExtractor#getText()
          * @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
          */
-        private void appendHeaderText(StringBuffer buffer) {
+        void appendHeaderText(StringBuffer buffer) {
             appendHeaderFooterText(buffer, "firstHeader");
             appendHeaderFooterText(buffer, "oddHeader");
             appendHeaderFooterText(buffer, "evenHeader");
@@ -361,7 +361,7 @@ public class XSSFEventBasedExcelExtracto
          * @see XSSFExcelExtractor#getText()
          * @see org.apache.poi.hssf.extractor.ExcelExtractor#_extractHeaderFooter(org.apache.poi.ss.usermodel.HeaderFooter)
          */
-        private void appendFooterText(StringBuffer buffer) {
+        void appendFooterText(StringBuffer buffer) {
             // append the text for each footer type in the same order
             // they are appended in XSSFExcelExtractor
             appendHeaderFooterText(buffer, "firstFooter");
@@ -372,7 +372,7 @@ public class XSSFEventBasedExcelExtracto
         /**
          * Append the cell contents we have collected.
          */
-        private void appendCellText(StringBuffer buffer) {
+        void appendCellText(StringBuffer buffer) {
             checkMaxTextSize(buffer, output.toString());
             buffer.append(output);
         }
@@ -380,7 +380,7 @@ public class XSSFEventBasedExcelExtracto
         /**
          * Reset this <code>SheetTextExtractor</code> for the next sheet.
          */
-        private void reset() {
+        void reset() {
             output.setLength(0);
             firstCellOfRow = true;
             if (headerFooterMap != null) {

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java?rev=1787228&r1=1787227&r2=1787228&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java Thu Mar 16 18:37:13 2017
@@ -68,6 +68,7 @@ public class TestExtractorFactory {
     private static File xlsxStrict;
     private static File xltx;
     private static File xlsEmb;
+    private static File xlsb;
 
     private static File doc;
     private static File doc6;
@@ -108,6 +109,7 @@ public class TestExtractorFactory {
         xlsxStrict = getFileAndCheck(ssTests, "SampleSS.strict.xlsx");
         xltx = getFileAndCheck(ssTests, "test.xltx");
         xlsEmb = getFileAndCheck(ssTests, "excel_with_embeded.xls");
+        xlsb = getFileAndCheck(ssTests, "testVarious.xlsb");
 
         POIDataSamples wpTests = POIDataSamples.getDocumentInstance();
         doc = getFileAndCheck(wpTests, "SampleDoc.doc");
@@ -172,6 +174,13 @@ public class TestExtractorFactory {
         );
         extractor.close();
 
+        extractor = ExtractorFactory.createExtractor(xlsb);
+        assertTrue(
+                extractor.getText().contains("test")
+        );
+        extractor.close();
+
+
         extractor = ExtractorFactory.createExtractor(xltx);
         assertTrue(
                 extractor.getText().contains("test")

Added: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java (added)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,56 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import static org.junit.Assert.assertEquals;
+
+import java.util.List;
+import java.util.regex.Pattern;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.junit.Test;
+
+public class TestXSSFBSharedStringsTable {
+
+
+    private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
+
+    @Test
+    public void testBasic() throws Exception {
+
+        OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsb"));
+        List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.bin"));
+        assertEquals(1, parts.size());
+
+        XSSFBSharedStringsTable rtbl = new XSSFBSharedStringsTable(parts.get(0));
+        List<String> strings = rtbl.getItems();
+        assertEquals(49, strings.size());
+
+        assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0));
+        assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.getEntryAt(3));
+        assertEquals(55, rtbl.getCount());
+        assertEquals(49, rtbl.getUniqueCount());
+
+        //TODO: add in tests for phonetic runs
+
+    }
+
+
+}

Propchange: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSharedStringsTable.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java (added)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,54 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.binary;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+
+import java.util.List;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.util.CellAddress;
+import org.apache.poi.xssf.eventusermodel.XSSFBReader;
+import org.apache.poi.xssf.eventusermodel.XSSFReader;
+import org.junit.Test;
+
+public class TestXSSFBSheetHyperlinkManager {
+
+    private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
+
+    @Test
+    public void testBasic() throws Exception {
+
+        OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("hyperlink.xlsb"));
+        XSSFBReader reader = new XSSFBReader(pkg);
+        XSSFReader.SheetIterator it = (XSSFReader.SheetIterator) reader.getSheetsData();
+        it.next();
+        XSSFBHyperlinksTable manager = new XSSFBHyperlinksTable(it.getSheetPart());
+        List<XSSFHyperlinkRecord> records = manager.getHyperLinks().get(new CellAddress(0, 0));
+        assertNotNull(records);
+        assertEquals(1, records.size());
+        XSSFHyperlinkRecord record = records.get(0);
+        assertEquals("http://tika.apache.org/", record.getLocation());
+        assertEquals("rId2", record.getRelId());
+
+    }
+
+
+}

Propchange: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/binary/TestXSSFBSheetHyperlinkManager.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java (added)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,224 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.eventusermodel;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.fail;
+
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.poi.POIDataSamples;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.ss.usermodel.DataFormatter;
+import org.apache.poi.xssf.binary.XSSFBSharedStringsTable;
+import org.apache.poi.xssf.binary.XSSFBSheetHandler;
+import org.apache.poi.xssf.binary.XSSFBStylesTable;
+import org.apache.poi.xssf.usermodel.XSSFComment;
+import org.junit.Test;
+
+public class TestXSSFBReader {
+
+    private static POIDataSamples _ssTests = POIDataSamples.getSpreadSheetInstance();
+
+    @Test
+    public void testBasic() throws Exception {
+        List<String> sheetTexts = getSheets("testVarious.xlsb");
+
+        assertEquals(1, sheetTexts.size());
+        String xsxml = sheetTexts.get(0);
+        assertContains("This is a string", xsxml);
+        assertContains("<td ref=\"B2\">13</td>", xsxml);
+        assertContains("<td ref=\"B3\">13.12112313</td>", xsxml);
+        assertContains("<td ref=\"B4\">$   3.03</td>", xsxml);
+        assertContains("<td ref=\"B5\">20%</td>", xsxml);
+        assertContains("<td ref=\"B6\">13.12</td>", xsxml);
+        assertContains("<td ref=\"B7\">1.23457E+14</td>", xsxml);
+        assertContains("<td ref=\"B8\">1.23457E+15</td>", xsxml);
+
+        assertContains("46/1963", xsxml);//custom format 1
+        assertContains("3/128", xsxml);//custom format 2
+
+        assertContains("<tr num=\"7>\n" +
+                "\t<td ref=\"A8\">longer int</td>\n" +
+                "\t<td ref=\"B8\">1.23457E+15</td>\n" +
+                "\t<td ref=\"C8\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+                "test comment2</span></td>\n" +
+                "</tr num=\"7>", xsxml);
+
+        assertContains("<tr num=\"34>\n" +
+                "\t<td ref=\"B35\">comment6<span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+                "comment6 actually in cell</span></td>\n" +
+                "</tr num=\"34>", xsxml);
+
+        assertContains("<tr num=\"64>\n" +
+                "\t<td ref=\"I65\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+                "comment7 end of file</span></td>\n" +
+                "</tr num=\"64>", xsxml);
+
+        assertContains("<tr num=\"65>\n" +
+                "\t<td ref=\"I66\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+                "comment8 end of file</span></td>\n" +
+                "</tr num=\"65>", xsxml);
+
+        assertContains("<header tagName=\"header\">OddLeftHeader OddCenterHeader OddRightHeader</header>", xsxml);
+        assertContains("<footer tagName=\"footer\">OddLeftFooter OddCenterFooter OddRightFooter</footer>", xsxml);
+        assertContains(
+                "<header tagName=\"evenHeader\">EvenLeftHeader EvenCenterHeader EvenRightHeader\n</header>",
+                xsxml);
+        assertContains(
+                "<footer tagName=\"evenFooter\">EvenLeftFooter EvenCenterFooter EvenRightFooter</footer>",
+                xsxml);
+        assertContains(
+                "<header tagName=\"firstHeader\">FirstPageLeftHeader FirstPageCenterHeader FirstPageRightHeader</header>",
+                xsxml);
+        assertContains(
+                "<footer tagName=\"firstFooter\">FirstPageLeftFooter FirstPageCenterFooter FirstPageRightFooter</footer>",
+                xsxml);
+
+    }
+
+    @Test
+    public void testComments() throws Exception {
+        List<String> sheetTexts = getSheets("comments.xlsb");
+        String xsxml = sheetTexts.get(0);
+        assertContains(
+                "<tr num=\"0>\n" +
+                        "\t<td ref=\"A1\"><span type=\"comment\" author=\"Sven Nissel\">comment top row1 (index0)</span></td>\n" +
+                        "\t<td ref=\"B1\">row1</td>\n" +
+                        "</tr num=\"0>",  xsxml);
+        assertContains(
+                "<tr num=\"1>\n" +
+                        "\t<td ref=\"A2\"><span type=\"comment\" author=\"Allison, Timothy B.\">Allison, Timothy B.:\n" +
+                        "comment row2 (index1)</span></td>\n" +
+                        "</tr num=\"1>",
+                xsxml);
+        assertContains("<tr num=\"2>\n" +
+                "\t<td ref=\"A3\">row3<span type=\"comment\" author=\"Sven Nissel\">comment top row3 (index2)</span></td>\n" +
+                "\t<td ref=\"B3\">row3</td>\n", xsxml);
+
+        assertContains("<tr num=\"3>\n" +
+                "\t<td ref=\"A4\"><span type=\"comment\" author=\"Sven Nissel\">comment top row4 (index3)</span></td>\n" +
+                "\t<td ref=\"B4\">row4</td>\n" +
+                "</tr num=\"3></sheet>", xsxml);
+
+    }
+
+    private List<String> getSheets(String testFileName) throws Exception {
+        OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream(testFileName));
+        List<String> sheetTexts = new ArrayList<String>();
+        XSSFBReader r = new XSSFBReader(pkg);
+
+//        assertNotNull(r.getWorkbookData());
+        //      assertNotNull(r.getSharedStringsData());
+        assertNotNull(r.getXSSFBStylesTable());
+        XSSFBSharedStringsTable sst = new XSSFBSharedStringsTable(pkg);
+        XSSFBStylesTable xssfbStylesTable = r.getXSSFBStylesTable();
+        XSSFBReader.SheetIterator it = (XSSFBReader.SheetIterator)r.getSheetsData();
+
+        while (it.hasNext()) {
+            InputStream is = it.next();
+            String name = it.getSheetName();
+            TestSheetHandler testSheetHandler = new TestSheetHandler();
+            testSheetHandler.startSheet(name);
+            XSSFBSheetHandler sheetHandler = new XSSFBSheetHandler(is,
+                    xssfbStylesTable,
+                    it.getXSSFBSheetComments(),
+                    sst, testSheetHandler,
+                    new DataFormatter(),
+                    false);
+            sheetHandler.parse();
+            testSheetHandler.endSheet();
+            sheetTexts.add(testSheetHandler.toString());
+        }
+        return sheetTexts;
+
+    }
+
+    //This converts all [\r\n\t]+ to " "
+    private void assertContains(String needle, String haystack) {
+        needle = needle.replaceAll("[\r\n\t]+", " ");
+        haystack = haystack.replaceAll("[\r\n\t]+", " ");
+        if (haystack.indexOf(needle) < 0) {
+            fail("couldn't find >"+needle+"< in: "+haystack );
+        }
+    }
+
+
+    @Test
+    public void testDate() throws Exception {
+        List<String> sheets = getSheets("date.xlsb");
+        assertEquals(1, sheets.size());
+        assertContains("1/12/13", sheets.get(0));
+
+    }
+
+
+    private class TestSheetHandler implements XSSFSheetXMLHandler.SheetContentsHandler {
+        private final StringBuilder sb = new StringBuilder();
+
+        public void startSheet(String sheetName) {
+            sb.append("<sheet name=\"").append(sheetName).append(">");
+        }
+
+        public void endSheet(){
+            sb.append("</sheet>");
+        }
+        @Override
+        public void startRow(int rowNum) {
+            sb.append("\n<tr num=\"").append(rowNum).append(">");
+        }
+
+        @Override
+        public void endRow(int rowNum) {
+            sb.append("\n</tr num=\"").append(rowNum).append(">");
+        }
+
+        @Override
+        public void cell(String cellReference, String formattedValue, XSSFComment comment) {
+            formattedValue = (formattedValue == null) ? "" : formattedValue;
+            if (comment == null) {
+                sb.append("\n\t<td ref=\"").append(cellReference).append("\">").append(formattedValue).append("</td>");
+            } else {
+                sb.append("\n\t<td ref=\"").append(cellReference).append("\">")
+                        .append(formattedValue)
+                        .append("<span type=\"comment\" author=\"")
+                        .append(comment.getAuthor()).append("\">")
+                        .append(comment.getString().toString().trim()).append("</span>")
+                        .append("</td>");
+            }
+        }
+
+        @Override
+        public void headerFooter(String text, boolean isHeader, String tagName) {
+            if (isHeader) {
+                sb.append("<header tagName=\""+tagName+"\">"+text+"</header>");
+            } else {
+                sb.append("<footer tagName=\""+tagName+"\">"+text+"</footer>");
+
+            }
+        }
+
+        @Override
+        public String toString() {
+            return sb.toString();
+        }
+    }
+}

Propchange: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestXSSFBReader.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java?rev=1787228&view=auto
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java (added)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java Thu Mar 16 18:37:13 2017
@@ -0,0 +1,102 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.extractor;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.poi.xssf.XSSFTestDataSamples;
+import org.junit.Test;
+
+/**
+ * Tests for {@link XSSFBEventBasedExcelExtractor}
+ */
+public class TestXSSFBEventBasedExcelExtractor {
+
+
+    protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
+        return new XSSFBEventBasedExcelExtractor(XSSFTestDataSamples.
+                openSamplePackage(sampleName));
+    }
+
+    /**
+     * Get text out of the simple file
+     */
+    @Test
+    public void testGetSimpleText() throws Exception {
+        // a very simple file
+        XSSFEventBasedExcelExtractor extractor = getExtractor("sample.xlsb");
+        extractor.setIncludeCellComments(true);
+        extractor.getText();
+
+        String text = extractor.getText();
+        assertTrue(text.length() > 0);
+
+        // Check sheet names
+        assertTrue(text.startsWith("Sheet1"));
+        assertTrue(text.endsWith("Sheet3\n"));
+
+        // Now without, will have text
+        extractor.setIncludeSheetNames(false);
+        text = extractor.getText();
+        String CHUNK1 =
+                "Lorem\t111\n" +
+                        "ipsum\t222\n" +
+                        "dolor\t333\n" +
+                        "sit\t444\n" +
+                        "amet\t555\n" +
+                        "consectetuer\t666\n" +
+                        "adipiscing\t777\n" +
+                        "elit\t888\n" +
+                        "Nunc\t999\n";
+        String CHUNK2 =
+                "The quick brown fox jumps over the lazy dog\n" +
+                        "hello, xssf	hello, xssf\n" +
+                        "hello, xssf	hello, xssf\n" +
+                        "hello, xssf	hello, xssf\n" +
+                        "hello, xssf	hello, xssf\n";
+        assertEquals(
+                CHUNK1 +
+                        "at\t4995\n" +
+                        CHUNK2
+                , text);
+
+    }
+
+
+    /**
+     * Test text extraction from text box using getShapes()
+     *
+     * @throws Exception
+     */
+    @Test
+    public void testShapes() throws Exception {
+        XSSFEventBasedExcelExtractor ooxmlExtractor = getExtractor("WithTextBox.xlsb");
+
+        try {
+            String text = ooxmlExtractor.getText();
+
+            assertTrue(text.indexOf("Line 1") > -1);
+            assertTrue(text.indexOf("Line 2") > -1);
+            assertTrue(text.indexOf("Line 3") > -1);
+        } finally {
+            ooxmlExtractor.close();
+        }
+    }
+
+}

Propchange: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFBEventBasedExcelExtractor.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: poi/trunk/test-data/spreadsheet/51519.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/51519.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/spreadsheet/51519.xlsb
------------------------------------------------------------------------------
    svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12

Added: poi/trunk/test-data/spreadsheet/WithTextBox.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/WithTextBox.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/spreadsheet/WithTextBox.xlsb
------------------------------------------------------------------------------
    svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12

Added: poi/trunk/test-data/spreadsheet/comments.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/comments.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/spreadsheet/comments.xlsb
------------------------------------------------------------------------------
    svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12

Added: poi/trunk/test-data/spreadsheet/date.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/date.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/spreadsheet/date.xlsb
------------------------------------------------------------------------------
    svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12

Added: poi/trunk/test-data/spreadsheet/hyperlink.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/hyperlink.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/spreadsheet/hyperlink.xlsb
------------------------------------------------------------------------------
    svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12

Added: poi/trunk/test-data/spreadsheet/sample.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/sample.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/spreadsheet/sample.xlsb
------------------------------------------------------------------------------
    svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12

Added: poi/trunk/test-data/spreadsheet/testVarious.xlsb
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/testVarious.xlsb?rev=1787228&view=auto
==============================================================================
Binary file - no diff available.

Propchange: poi/trunk/test-data/spreadsheet/testVarious.xlsb
------------------------------------------------------------------------------
    svn:mime-type = application/vnd.ms-excel.sheet.binary.macroEnabled.12



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org