You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ta...@apache.org on 2017/03/08 13:41:07 UTC
svn commit: r1785965 - in /poi: site/src/documentation/content/xdocs/
trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/
trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/
trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/ trunk/...
Author: tallison
Date: Wed Mar 8 13:41:07 2017
New Revision: 1785965
URL: http://svn.apache.org/viewvc?rev=1785965&view=rev
Log:
51519 -- allow users to ignore or include the <rPh> (phonetic run) element in the ReadOnlySharedStringsTable used in the SAX/streaming xlsx reader.
Added:
poi/trunk/test-data/spreadsheet/51519.xlsx (with props)
Modified:
poi/site/src/documentation/content/xdocs/status.xml
poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java
poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java
Modified: poi/site/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/site/src/documentation/content/xdocs/status.xml?rev=1785965&r1=1785964&r2=1785965&view=diff
==============================================================================
--- poi/site/src/documentation/content/xdocs/status.xml (original)
+++ poi/site/src/documentation/content/xdocs/status.xml Wed Mar 8 13:41:07 2017
@@ -58,6 +58,7 @@
<release version="3.16-beta3" date="2017-04-??">
<actions>
+ <action dev="PD" type="fix" fixes-bug="51519" module="XSSF">Allow user to select or ignore phonetic strings in shared strings table</action>
<action dev="PD" type="fix" fixes-bug="60662" module="XSLF">Slide import delete unrecognized elements in group shape</action>
<action dev="PD" type="fix" fixes-bug="60715" module="XSLF">Blank layout was not found</action>
<action dev="PD" type="add" fixes-bug="59227,github-48" module="SS Common">Support Chinese and Japanese date formats</action>
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java?rev=1785965&r1=1785964&r2=1785965&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java Wed Mar 8 13:41:07 2017
@@ -18,13 +18,14 @@ package org.apache.poi.xssf.eventusermod
import static org.apache.poi.xssf.usermodel.XSSFRelation.NS_SPREADSHEETML;
+import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
import java.util.ArrayList;
+import java.util.HashMap;
import java.util.List;
-
-import javax.xml.parsers.ParserConfigurationException;
+import java.util.Map;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
@@ -96,6 +97,12 @@ public class ReadOnlySharedStringsTable
private List<String> strings;
/**
+ * Map of phonetic strings (if they exist) indexed
+ * with the integer matching the index in strings
+ */
+ private Map<Integer, String> phoneticStrings;
+
+ /**
* @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
* @throws IOException If reading the data from the package fails.
* @throws SAXException if parsing the XML data fails.
@@ -177,6 +184,22 @@ public class ReadOnlySharedStringsTable
return strings.get(idx);
}
+ /**
+ * Return the phonetic string at a given index.
+ * Returns <code>null</code> if no phonetic string
+ * exists at that index.
+ * @param idx
+ * @return
+ */
+ public String getPhoneticStringAt(int idx) {
+ //avoid an NPE. If the parser hasn't
+ //yet hit <sst/> phoneticStrings could be null
+ if (phoneticStrings == null) {
+ return null;
+ }
+ return phoneticStrings.get(idx);
+ }
+
public List<String> getItems() {
return strings;
}
@@ -184,14 +207,16 @@ public class ReadOnlySharedStringsTable
//// ContentHandler methods ////
private StringBuffer characters;
+ private StringBuffer rphCharacters;
private boolean tIsOpen;
+ private boolean inRPh;
public void startElement(String uri, String localName, String name,
Attributes attributes) throws SAXException {
if (uri != null && ! uri.equals(NS_SPREADSHEETML)) {
return;
}
-
+
if ("sst".equals(localName)) {
String count = attributes.getValue("count");
if(count != null) this.count = Integer.parseInt(count);
@@ -199,12 +224,15 @@ public class ReadOnlySharedStringsTable
if(uniqueCount != null) this.uniqueCount = Integer.parseInt(uniqueCount);
this.strings = new ArrayList<String>(this.uniqueCount);
-
+ this.phoneticStrings = new HashMap<Integer, String>();
characters = new StringBuffer();
+ rphCharacters = new StringBuffer();
} else if ("si".equals(localName)) {
characters.setLength(0);
} else if ("t".equals(localName)) {
tIsOpen = true;
+ } else if ("rPh".equals(localName)) {
+ inRPh = true;
}
}
@@ -213,11 +241,17 @@ public class ReadOnlySharedStringsTable
if (uri != null && ! uri.equals(NS_SPREADSHEETML)) {
return;
}
-
+
if ("si".equals(localName)) {
strings.add(characters.toString());
+ if (rphCharacters.length() > 0) {
+ phoneticStrings.put(strings.size()-1, rphCharacters.toString());
+ rphCharacters.setLength(0);
+ }
} else if ("t".equals(localName)) {
- tIsOpen = false;
+ tIsOpen = false;
+ } else if ("rPh".equals(localName)) {
+ inRPh = false;
}
}
@@ -226,8 +260,12 @@ public class ReadOnlySharedStringsTable
*/
public void characters(char[] ch, int start, int length)
throws SAXException {
- if (tIsOpen)
- characters.append(ch, start, length);
+ if (tIsOpen) {
+ if (inRPh) {
+ rphCharacters.append(ch, start, length);
+ } else {
+ characters.append(ch, start, length);
+ }
+ }
}
-
}
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java?rev=1785965&r1=1785964&r2=1785965&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java Wed Mar 8 13:41:07 2017
@@ -19,8 +19,11 @@
package org.apache.poi.xssf.eventusermodel;
-import junit.framework.TestCase;
+import java.io.IOException;
+import java.util.List;
+import java.util.regex.Pattern;
+import junit.framework.TestCase;
import org.apache.poi.POIDataSamples;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackagePart;
@@ -29,10 +32,6 @@ import org.apache.poi.xssf.usermodel.XSS
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
import org.xml.sax.SAXException;
-import java.io.IOException;
-import java.util.List;
-import java.util.regex.Pattern;
-
/**
* Tests for {@link org.apache.poi.xssf.eventusermodel.XSSFReader}
*/
@@ -59,7 +58,22 @@ public final class TestReadOnlySharedStr
}
}
-
+
+ public void testPhoneticRuns() throws Exception {
+ OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsx"));
+ List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.xml"));
+ assertEquals(1, parts.size());
+
+ ReadOnlySharedStringsTable rtbl = new ReadOnlySharedStringsTable(parts.get(0));
+ List<String> strings = rtbl.getItems();
+ assertEquals(49, strings.size());
+
+ assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0));
+ assertNull(rtbl.getPhoneticStringAt(0));
+ assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.getEntryAt(3));
+ assertEquals("\u30CB\u30DB\u30F3", rtbl.getPhoneticStringAt(3));
+ }
+
public void testEmptySSTOnPackageObtainedViaWorkbook() throws Exception {
XSSFWorkbook wb = new XSSFWorkbook(_ssTests.openResourceAsStream("noSharedStringTable.xlsx"));
OPCPackage pkg = wb.getPackage();
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java?rev=1785965&r1=1785964&r2=1785965&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java Wed Mar 8 13:41:07 2017
@@ -22,7 +22,6 @@ import java.util.regex.Matcher;
import java.util.regex.Pattern;
import junit.framework.TestCase;
-
import org.apache.poi.POITextExtractor;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.extractor.ExcelExtractor;
@@ -226,4 +225,18 @@ public class TestXSSFExcelExtractor exte
extractor.close();
}
}
+
+ public void testPhoneticRuns() throws Exception {
+ XSSFExcelExtractor extractor = getExtractor("51519.xlsx");
+ try {
+ String text = extractor.getText();
+ assertTrue(text.contains("\u8C4A\u7530"));
+ //this shows up only as a phonetic run and should not appear
+ //in the extracted text
+ assertFalse(text.contains("\u30CB\u30DB\u30F3"));
+ } finally {
+ extractor.close();
+ }
+
+ }
}
Added: poi/trunk/test-data/spreadsheet/51519.xlsx
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/spreadsheet/51519.xlsx?rev=1785965&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/spreadsheet/51519.xlsx
------------------------------------------------------------------------------
--- svn:mime-type (added)
+++ svn:mime-type Wed Mar 8 13:41:07 2017
@@ -0,0 +1 @@
+application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org