You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ta...@apache.org on 2017/03/08 16:44:40 UTC

svn commit: r1786021 - in /poi/trunk/src/ooxml: java/org/apache/poi/xssf/eventusermodel/ java/org/apache/poi/xssf/extractor/ testcases/org/apache/poi/xssf/eventusermodel/ testcases/org/apache/poi/xssf/extractor/

Author: tallison
Date: Wed Mar  8 16:44:40 2017
New Revision: 1786021

URL: http://svn.apache.org/viewvc?rev=1786021&view=rev
Log:
51519 -- follow on, make concatenation of rPh configurable

Modified:
    poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java
    poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java?rev=1786021&r1=1786020&r2=1786021&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/eventusermodel/ReadOnlySharedStringsTable.java Wed Mar  8 16:44:40 2017
@@ -78,6 +78,8 @@ import org.xml.sax.helpers.DefaultHandle
  *
  */
 public class ReadOnlySharedStringsTable extends DefaultHandler {
+
+    private final boolean includePhoneticRuns;
     /**
      * An integer representing the total count of strings in the workbook. This count does not
      * include any numbers, it counts only the total of text strings in the workbook.
@@ -103,12 +105,29 @@ public class ReadOnlySharedStringsTable
     private Map<Integer, String> phoneticStrings;
 
     /**
+     * Calls {{@link #ReadOnlySharedStringsTable(OPCPackage, boolean)}} with
+     * a value of <code>true</code> for including phonetic runs
+     *
      * @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
      * @throws IOException If reading the data from the package fails.
      * @throws SAXException if parsing the XML data fails.
      */
     public ReadOnlySharedStringsTable(OPCPackage pkg)
             throws IOException, SAXException {
+        this(pkg, true);
+    }
+
+    /**
+     *
+     * @param pkg The {@link OPCPackage} to use as basis for the shared-strings table.
+     * @param includePhoneticRuns whether or not to concatenate phoneticRuns onto the shared string
+     * @since POI 3.14-Beta3
+     * @throws IOException If reading the data from the package fails.
+     * @throws SAXException if parsing the XML data fails.
+     */
+    public ReadOnlySharedStringsTable(OPCPackage pkg, boolean includePhoneticRuns)
+            throws IOException, SAXException {
+        this.includePhoneticRuns = includePhoneticRuns;
         ArrayList<PackagePart> parts =
                 pkg.getPartsByContentType(XSSFRelation.SHARED_STRINGS.getContentType());
 
@@ -121,10 +140,24 @@ public class ReadOnlySharedStringsTable
 
     /**
      * Like POIXMLDocumentPart constructor
-     * 
+     *
+     * Calls {@link #ReadOnlySharedStringsTable(PackagePart, boolean)}, with a
+     * value of <code>true</code> to include phonetic runs.
+     *
      * @since POI 3.14-Beta1
      */
     public ReadOnlySharedStringsTable(PackagePart part) throws IOException, SAXException {
+        this(part, true);
+    }
+
+    /**
+     * Like POIXMLDocumentPart constructor
+     *
+     * @since POI 3.14-Beta3
+     */
+    public ReadOnlySharedStringsTable(PackagePart part, boolean includePhoneticRuns)
+        throws IOException, SAXException {
+        this.includePhoneticRuns = includePhoneticRuns;
         readFrom(part.getInputStream());
     }
     
@@ -184,22 +217,6 @@ public class ReadOnlySharedStringsTable
         return strings.get(idx);
     }
 
-    /**
-     * Return the phonetic string at a given index.
-     * Returns <code>null</code> if no phonetic string
-     * exists at that index.
-     * @param idx
-     * @return
-     */
-    public String getPhoneticStringAt(int idx) {
-        //avoid an NPE.  If the parser hasn't
-        //yet hit <sst/> phoneticStrings could be null
-        if (phoneticStrings == null) {
-            return null;
-        }
-        return phoneticStrings.get(idx);
-    }
-
     public List<String> getItems() {
         return strings;
     }
@@ -207,7 +224,6 @@ public class ReadOnlySharedStringsTable
     //// ContentHandler methods ////
 
     private StringBuffer characters;
-    private StringBuffer rphCharacters;
     private boolean tIsOpen;
     private boolean inRPh;
 
@@ -226,13 +242,16 @@ public class ReadOnlySharedStringsTable
             this.strings = new ArrayList<String>(this.uniqueCount);
             this.phoneticStrings = new HashMap<Integer, String>();
             characters = new StringBuffer();
-            rphCharacters = new StringBuffer();
         } else if ("si".equals(localName)) {
             characters.setLength(0);
         } else if ("t".equals(localName)) {
             tIsOpen = true;
         } else if ("rPh".equals(localName)) {
             inRPh = true;
+            //append space...this assumes that rPh always comes after regular <t>
+            if (includePhoneticRuns && characters.length() > 0) {
+                characters.append(" ");
+            }
         }
     }
 
@@ -244,10 +263,6 @@ public class ReadOnlySharedStringsTable
 
         if ("si".equals(localName)) {
             strings.add(characters.toString());
-            if (rphCharacters.length() > 0) {
-                phoneticStrings.put(strings.size()-1, rphCharacters.toString());
-                rphCharacters.setLength(0);
-            }
         } else if ("t".equals(localName)) {
             tIsOpen = false;
         } else if ("rPh".equals(localName)) {
@@ -261,9 +276,9 @@ public class ReadOnlySharedStringsTable
     public void characters(char[] ch, int start, int length)
             throws SAXException {
         if (tIsOpen) {
-            if (inRPh) {
-                rphCharacters.append(ch, start, length);
-            } else {
+            if (inRPh && includePhoneticRuns) {
+                characters.append(ch, start, length);
+            } else if (! inRPh){
                 characters.append(ch, start, length);
             }
         }

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java?rev=1786021&r1=1786020&r2=1786021&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java Wed Mar  8 16:44:40 2017
@@ -16,6 +16,7 @@
 ==================================================================== */
 package org.apache.poi.xssf.extractor;
 
+import javax.xml.parsers.ParserConfigurationException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.util.HashMap;
@@ -23,8 +24,6 @@ import java.util.List;
 import java.util.Locale;
 import java.util.Map;
 
-import javax.xml.parsers.ParserConfigurationException;
-
 import org.apache.poi.POIXMLProperties;
 import org.apache.poi.POIXMLProperties.CoreProperties;
 import org.apache.poi.POIXMLProperties.CustomProperties;
@@ -64,6 +63,7 @@ public class XSSFEventBasedExcelExtracto
     private boolean includeCellComments = false;
     private boolean includeHeadersFooters = true;
     private boolean formulasNotResults = false;
+    private boolean concatenatePhoneticRuns = true;
 
     public XSSFEventBasedExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
         this(OPCPackage.open(path));
@@ -120,6 +120,14 @@ public class XSSFEventBasedExcelExtracto
         this.includeCellComments = includeCellComments;
     }
 
+    /**
+     * Concatenate text from &lt;rPh&gt; text elements in SharedStringsTable
+     * Default is true;
+     * @param concatenatePhoneticRuns
+     */
+    public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
+        this.concatenatePhoneticRuns = concatenatePhoneticRuns;
+    }
     public void setLocale(Locale locale) {
         this.locale = locale;
     }
@@ -189,7 +197,7 @@ public class XSSFEventBasedExcelExtracto
     */
    public String getText() {
        try {
-          ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container);
+          ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(container, concatenatePhoneticRuns);
           XSSFReader xssfReader = new XSSFReader(container);
           StylesTable styles = xssfReader.getStylesTable();
           XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java?rev=1786021&r1=1786020&r2=1786021&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/eventusermodel/TestReadOnlySharedStringsTable.java Wed Mar  8 16:44:40 2017
@@ -59,19 +59,27 @@ public final class TestReadOnlySharedStr
 
 	}
 
+	//51519
 	public void testPhoneticRuns() throws Exception {
         OPCPackage pkg = OPCPackage.open(_ssTests.openResourceAsStream("51519.xlsx"));
         List<PackagePart> parts = pkg.getPartsByName(Pattern.compile("/xl/sharedStrings.xml"));
         assertEquals(1, parts.size());
 
-        ReadOnlySharedStringsTable rtbl = new ReadOnlySharedStringsTable(parts.get(0));
+        ReadOnlySharedStringsTable rtbl = new ReadOnlySharedStringsTable(parts.get(0), true);
         List<String> strings = rtbl.getItems();
         assertEquals(49, strings.size());
 
         assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0));
-        assertNull(rtbl.getPhoneticStringAt(0));
+        assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3", rtbl.getEntryAt(3));
+
+        //now do not include phonetic runs
+        rtbl = new ReadOnlySharedStringsTable(parts.get(0), false);
+        strings = rtbl.getItems();
+        assertEquals(49, strings.size());
+
+        assertEquals("\u30B3\u30E1\u30F3\u30C8", rtbl.getEntryAt(0));
         assertEquals("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB", rtbl.getEntryAt(3));
-        assertEquals("\u30CB\u30DB\u30F3", rtbl.getPhoneticStringAt(3));
+
     }
 
     public void testEmptySSTOnPackageObtainedViaWorkbook() throws Exception {

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java?rev=1786021&r1=1786020&r2=1786021&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java Wed Mar  8 16:44:40 2017
@@ -18,6 +18,7 @@
 package org.apache.poi.xssf.extractor;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
@@ -359,4 +360,25 @@ public class TestXSSFEventBasedExcelExtr
 		assertTrue("can't find 10/02/2016", text.contains("10/02/2016"));
 		ex.close();
 	}
+
+	@Test
+	public void test51519() throws Exception {
+    	//default behavior: include phonetic runs
+		XSSFEventBasedExcelExtractor ex =
+				new XSSFEventBasedExcelExtractor(
+						XSSFTestDataSamples.openSamplePackage("51519.xlsx"));
+		String text = ex.getText();
+		assertTrue("can't find appended phonetic run", text.contains("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3"));
+		ex.close();
+
+		//now try turning them off
+		ex =
+				new XSSFEventBasedExcelExtractor(
+						XSSFTestDataSamples.openSamplePackage("51519.xlsx"));
+		ex.setConcatenatePhoneticRuns(false);
+		text = ex.getText();
+		assertFalse("should not be able to find appended phonetic run", text.contains("\u65E5\u672C\u30AA\u30E9\u30AF\u30EB \u30CB\u30DB\u30F3"));
+		ex.close();
+
+	}
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org