You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ki...@apache.org on 2014/02/14 23:45:05 UTC
svn commit: r1568539 - in /poi: site/src/documentation/content/xdocs/
site/src/documentation/content/xdocs/spreadsheet/
trunk/src/ooxml/java/org/apache/poi/xssf/streaming/
trunk/src/ooxml/testcases/org/apache/poi/xssf/streaming/
Author: kiwiwings
Date: Fri Feb 14 22:45:05 2014
New Revision: 1568539
URL: http://svn.apache.org/r1568539
Log:
Bug 53130 - SXSSF Shared Strings option support, to make generated xlsx files compatible with Google Docs or iPad
Modified:
poi/site/src/documentation/content/xdocs/spreadsheet/how-to.xml
poi/site/src/documentation/content/xdocs/status.xml
poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/GZIPSheetDataWriter.java
poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java
poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFWorkbook.java
Modified: poi/site/src/documentation/content/xdocs/spreadsheet/how-to.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/spreadsheet/how-to.xml?rev=1568539&r1=1568538&r2=1568539&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/spreadsheet/how-to.xml (original)
+++ poi/site/src/documentation/content/xdocs/spreadsheet/how-to.xml Fri Feb 14 22:45:05 2014
@@ -655,6 +655,18 @@ public class ExampleEventUserModel {
<p>
Note that SXSSF allocates temporary files that you <strong>must</strong> always clean up explicitly, by calling the dispose method.
</p>
+ <p>
+ SXSSFWorkbook defaults to using inline strings instead of a shared strings
+ table. This is very efficient, since no document content needs to be kept in
+ memory, but is also known to produce documents that are incompatible with
+ some clients. With shared strings enabled all unique strings in the document
+ has to be kept in memory. Depending on your document content this could use
+ a lot more resources than with shared strings disabled.
+ </p>
+ <p>
+ Carefully review your memory budget and compatibility needs before deciding
+ whether to enable shared strings or not.
+ </p>
<p> The example below writes a sheet with a window of 100 rows. When the row count reaches 101,
the row with rownum=0 is flushed to disk and removed from memory, when rownum reaches 102 then the row with rownum=1 is flushed, etc.
</p>
Modified: poi/site/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/status.xml?rev=1568539&r1=1568538&r2=1568539&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/status.xml (original)
+++ poi/site/src/documentation/content/xdocs/status.xml Fri Feb 14 22:45:05 2014
@@ -36,6 +36,7 @@
<changes>
<release version="3.11-beta1" date="2014-??-??">
+ <action dev="poi-developers" type="add">53130 - SXSSF Shared Strings option support</action>
<action dev="poi-developers" type="fix">55902 - Mixed fonts issue with Chinese characters (unable to form images from ppt)</action>
<action dev="poi-developers" type="add">56022 - XSSF Event Text Extractor header/footer support</action>
<action dev="poi-developers" type="fix">53282 - Hyperlink with a non-breaking space throws java.lang.IllegalStateException</action>
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/GZIPSheetDataWriter.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/GZIPSheetDataWriter.java?rev=1568539&r1=1568538&r2=1568539&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/GZIPSheetDataWriter.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/GZIPSheetDataWriter.java Fri Feb 14 22:45:05 2014
@@ -29,6 +29,8 @@ import java.io.Writer;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
+import org.apache.poi.xssf.model.SharedStringsTable;
+
/**
* Sheet writer that supports gzip compression of the temp files.
*/
@@ -37,6 +39,13 @@ public class GZIPSheetDataWriter extends
public GZIPSheetDataWriter() throws IOException {
super();
}
+
+ /**
+ * @param sharedStringsTable the shared strings table, or null if inline text is used
+ */
+ public GZIPSheetDataWriter(SharedStringsTable sharedStringsTable) throws IOException {
+ super(sharedStringsTable);
+ }
/**
* @return temp file to write sheet data
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java?rev=1568539&r1=1568538&r2=1568539&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SXSSFWorkbook.java Fri Feb 14 22:45:05 2014
@@ -43,12 +43,21 @@ import java.util.zip.ZipEntry;
import org.apache.poi.ss.formula.udf.UDFFinder;
import org.apache.poi.ss.usermodel.Row.MissingCellPolicy;
import org.apache.poi.ss.util.CellRangeAddress;
+import org.apache.poi.xssf.model.SharedStringsTable;
/**
* Streaming version of XSSFWorkbook implementing the "BigGridDemo" strategy.
*
- * @author Alex Geller, Four J's Development Tools
-*/
+ * SXSSFWorkbook defaults to using inline strings instead of a shared strings
+ * table. This is very efficient, since no document content needs to be kept in
+ * memory, but is also known to produce documents that are incompatible with
+ * some clients. With shared strings enabled all unique strings in the document
+ * has to be kept in memory. Depending on your document content this could use
+ * a lot more resources than with shared strings disabled.
+ *
+ * Carefully review your memory budget and compatibility needs before deciding
+ * whether to enable shared strings or not.
+ */
public class SXSSFWorkbook implements Workbook
{
/**
@@ -73,6 +82,11 @@ public class SXSSFWorkbook implements Wo
private boolean _compressTmpFiles = false;
/**
+ * shared string table - a cache of strings in this workbook
+ */
+ private SharedStringsTable _sharedStringSource = null;
+
+ /**
* Construct a new workbook
*/
public SXSSFWorkbook(){
@@ -165,15 +179,48 @@ public class SXSSFWorkbook implements Wo
* @param compressTmpFiles whether to use gzip compression for temporary files
*/
public SXSSFWorkbook(XSSFWorkbook workbook, int rowAccessWindowSize, boolean compressTmpFiles){
+ this(workbook,rowAccessWindowSize, compressTmpFiles, false);
+ }
+
+ /**
+ * Constructs an workbook from an existing workbook.
+ * <p>
+ * When a new node is created via createRow() and the total number
+ * of unflushed records would exceed the specified value, then the
+ * row with the lowest index value is flushed and cannot be accessed
+ * via getRow() anymore.
+ * </p>
+ * <p>
+ * A value of -1 indicates unlimited access. In this case all
+ * records that have not been flushed by a call to flush() are available
+ * for random access.
+ * <p>
+ * <p></p>
+ * A value of 0 is not allowed because it would flush any newly created row
+ * without having a chance to specify any cells.
+ * </p>
+ *
+ * @param workbook the template workbook
+ * @param rowAccessWindowSize
+ * @param compressTmpFiles whether to use gzip compression for temporary files
+ * @param useSharedStringsTable whether to use a shared strings table
+ */
+ public SXSSFWorkbook(XSSFWorkbook workbook, int rowAccessWindowSize, boolean compressTmpFiles, boolean useSharedStringsTable){
setRandomAccessWindowSize(rowAccessWindowSize);
setCompressTempFiles(compressTmpFiles);
if (workbook == null)
{
_wb=new XSSFWorkbook();
+ if(useSharedStringsTable){
+ _sharedStringSource = _wb.getSharedStringSource();
+ }
}
else
{
_wb=workbook;
+ if(useSharedStringsTable){
+ _sharedStringSource = _wb.getSharedStringSource();
+ }
for ( int i = 0; i < _wb.getNumberOfSheets(); i++ )
{
XSSFSheet sheet = _wb.getSheetAt( i );
@@ -236,9 +283,9 @@ public class SXSSFWorkbook implements Wo
SheetDataWriter createSheetDataWriter() throws IOException {
if(_compressTmpFiles) {
- return new GZIPSheetDataWriter();
+ return new GZIPSheetDataWriter(_sharedStringSource);
} else {
- return new SheetDataWriter();
+ return new SheetDataWriter(_sharedStringSource);
}
}
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java?rev=1568539&r1=1568538&r2=1568539&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/streaming/SheetDataWriter.java Fri Feb 14 22:45:05 2014
@@ -32,6 +32,9 @@ import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.FormulaError;
import org.apache.poi.ss.util.CellReference;
+import org.apache.poi.xssf.model.SharedStringsTable;
+import org.apache.poi.xssf.usermodel.XSSFRichTextString;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType;
/**
* Initially copied from BigGridDemo "SpreadsheetWriter".
@@ -48,11 +51,21 @@ public class SheetDataWriter {
private int _numberOfCellsOfLastFlushedRow; // meaningful only of _numberOfFlushedRows>0
private int _numberLastFlushedRow = -1; // meaningful only of _numberOfFlushedRows>0
+ /**
+ * Table of strings shared across this workbook.
+ * If two cells contain the same string, then the cell value is the same index into SharedStringsTable
+ */
+ private SharedStringsTable _sharedStringSource;
+
public SheetDataWriter() throws IOException {
_fd = createTempFile();
_out = createWriter(_fd);
}
+ public SheetDataWriter(SharedStringsTable sharedStringsTable) throws IOException{
+ this();
+ this._sharedStringSource = sharedStringsTable;
+ }
/**
* Create a temp file to write sheet data.
* By default, temp files are created in the default temporary-file directory
@@ -196,14 +209,24 @@ public class SheetDataWriter {
break;
}
case Cell.CELL_TYPE_STRING: {
- _out.write(" t=\"inlineStr\">");
- _out.write("<is><t");
- if(hasLeadingTrailingSpaces(cell.getStringCellValue())) {
- _out.write(" xml:space=\"preserve\"");
+ if (_sharedStringSource != null) {
+ XSSFRichTextString rt = new XSSFRichTextString(cell.getStringCellValue());
+ int sRef = _sharedStringSource.addEntry(rt.getCTRst());
+
+ _out.write(" t=\"" + STCellType.S.toString() + "\">");
+ _out.write("<v>");
+ _out.write(String.valueOf(sRef));
+ _out.write("</v>");
+ } else {
+ _out.write(" t=\"inlineStr\">");
+ _out.write("<is><t");
+ if (hasLeadingTrailingSpaces(cell.getStringCellValue())) {
+ _out.write(" xml:space=\"preserve\"");
+ }
+ _out.write(">");
+ outputQuotedString(cell.getStringCellValue());
+ _out.write("</t></is>");
}
- _out.write(">");
- outputQuotedString(cell.getStringCellValue());
- _out.write("</t></is>");
break;
}
case Cell.CELL_TYPE_NUMERIC: {
@@ -245,7 +268,7 @@ public class SheetDataWriter {
}
//Taken from jdk1.3/src/javax/swing/text/html/HTMLWriter.java
- protected void outputQuotedString(String s) throws IOException {
+ protected void outputQuotedString(String s) throws IOException {
if (s == null || s.length() == 0) {
return;
}
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFWorkbook.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFWorkbook.java?rev=1568539&r1=1568538&r2=1568539&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFWorkbook.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/streaming/TestSXSSFWorkbook.java Fri Feb 14 22:45:05 2014
@@ -23,6 +23,7 @@ import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
+import java.lang.reflect.Field;
import org.apache.poi.ss.usermodel.BaseTestWorkbook;
import org.apache.poi.ss.usermodel.Cell;
@@ -32,6 +33,7 @@ import org.apache.poi.ss.usermodel.Workb
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.CellReference;
import org.apache.poi.xssf.SXSSFITestDataProvider;
+import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
public final class TestSXSSFWorkbook extends BaseTestWorkbook {
@@ -90,6 +92,42 @@ public final class TestSXSSFWorkbook ext
}
+ public void testUseSharedStringsTable() throws Exception {
+ SXSSFWorkbook wb = new SXSSFWorkbook(null, 10, false, true);
+
+ Field f = SXSSFWorkbook.class.getDeclaredField("_sharedStringSource");
+ f.setAccessible(true);
+ SharedStringsTable sss = (SharedStringsTable)f.get(wb);
+
+ assertNotNull(sss);
+
+ Row row = wb.createSheet("S1").createRow(0);
+
+ row.createCell(0).setCellValue("A");
+ row.createCell(1).setCellValue("B");
+ row.createCell(2).setCellValue("A");
+
+ XSSFWorkbook xssfWorkbook = (XSSFWorkbook) SXSSFITestDataProvider.instance.writeOutAndReadBack(wb);
+ sss = (SharedStringsTable)f.get(wb);
+ assertEquals(2, sss.getUniqueCount());
+ wb.dispose();
+
+ Sheet sheet1 = xssfWorkbook.getSheetAt(0);
+ assertEquals("S1", sheet1.getSheetName());
+ assertEquals(1, sheet1.getPhysicalNumberOfRows());
+ row = sheet1.getRow(0);
+ assertNotNull(row);
+ Cell cell = row.getCell(0);
+ assertNotNull(cell);
+ assertEquals("A", cell.getStringCellValue());
+ cell = row.getCell(1);
+ assertNotNull(cell);
+ assertEquals("B", cell.getStringCellValue());
+ cell = row.getCell(2);
+ assertNotNull(cell);
+ assertEquals("A", cell.getStringCellValue());
+ }
+
public void testAddToExistingWorkbook() {
XSSFWorkbook xssfWorkbook = new XSSFWorkbook();
xssfWorkbook.createSheet("S1");
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org