You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ce...@apache.org on 2020/03/28 09:24:39 UTC

svn commit: r1875802 - in /poi/trunk: src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java test-data/document/55966.docx

Author: centic
Date: Sat Mar 28 09:24:38 2020
New Revision: 1875802

URL: http://svn.apache.org/viewvc?rev=1875802&view=rev
Log:
Bug 55966: Include content control text in word extraction also if it is part of a paragraph

Added:
    poi/trunk/test-data/document/55966.docx
Modified:
    poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
    poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java

Modified: poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java?rev=1875802&r1=1875801&r2=1875802&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java Sat Mar 28 09:24:38 2020
@@ -90,7 +90,7 @@ public class XWPFWordExtractor extends P
 
     /**
      * Should we concatenate phonetic runs in extraction.  Default is <code>true</code>
-     * @param concatenatePhoneticRuns
+     * @param concatenatePhoneticRuns If phonetic runs should be concatenated
      */
     public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
         this.concatenatePhoneticRuns = concatenatePhoneticRuns;
@@ -138,9 +138,10 @@ public class XWPFWordExtractor extends P
             extractHeaders(text, headerFooterPolicy);
         }
 
-
-        for (IRunElement run : paragraph.getRuns()) {
-            if (! concatenatePhoneticRuns && run instanceof XWPFRun) {
+        for (IRunElement run : paragraph.getIRuns()) {
+            if (run instanceof XWPFSDT) {
+                text.append(((XWPFSDT) run).getContent().getText());
+            } else if (! concatenatePhoneticRuns && run instanceof XWPFRun) {
                 text.append(((XWPFRun)run).text());
             } else {
                 text.append(run);

Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java?rev=1875802&r1=1875801&r2=1875802&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java Sat Mar 28 09:24:38 2020
@@ -17,6 +17,16 @@
 
 package org.apache.poi.xwpf.extractor;
 
+import org.apache.poi.util.StringUtil;
+import org.apache.poi.xwpf.XWPFTestDataSamples;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
 import static org.apache.poi.POITestCase.assertContains;
 import static org.apache.poi.POITestCase.assertEndsWith;
 import static org.apache.poi.POITestCase.assertNotContained;
@@ -25,16 +35,6 @@ import static org.junit.Assert.assertEqu
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertTrue;
 
-import java.io.IOException;
-import java.util.Locale;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.poi.util.StringUtil;
-import org.apache.poi.xwpf.XWPFTestDataSamples;
-import org.apache.poi.xwpf.usermodel.XWPFDocument;
-import org.junit.Test;
-
 /**
  * Tests for HXFWordExtractor
  */
@@ -460,4 +460,21 @@ public class TestXWPFWordExtractor {
             assertContains(txt, "footer 1");
         }
     }
+
+    @Test
+    public void bug55966() throws IOException  {
+        try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("55966.docx")) {
+            String expected = "Content control within a paragraph is here text content from within a paragraph second control with a new\n" +
+                    "line\n" +
+                    "\n" +
+                    "Content control that is the entire paragraph\n";
+
+            XWPFWordExtractor extractedDoc = new XWPFWordExtractor(doc);
+
+            String actual = extractedDoc.getText();
+
+            extractedDoc.close();
+            assertEquals(expected, actual);
+        }
+    }
 }

Added: poi/trunk/test-data/document/55966.docx
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/document/55966.docx?rev=1875802&view=auto
==============================================================================
Binary files poi/trunk/test-data/document/55966.docx (added) and poi/trunk/test-data/document/55966.docx Sat Mar 28 09:24:38 2020 differ



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org