You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ce...@apache.org on 2020/03/28 09:24:39 UTC
svn commit: r1875802 - in /poi/trunk:
src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
test-data/document/55966.docx
Author: centic
Date: Sat Mar 28 09:24:38 2020
New Revision: 1875802
URL: http://svn.apache.org/viewvc?rev=1875802&view=rev
Log:
Bug 55966: Include content control text in word extraction also if it is part of a paragraph
Added:
poi/trunk/test-data/document/55966.docx
Modified:
poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java?rev=1875802&r1=1875801&r2=1875802&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java Sat Mar 28 09:24:38 2020
@@ -90,7 +90,7 @@ public class XWPFWordExtractor extends P
/**
* Should we concatenate phonetic runs in extraction. Default is <code>true</code>
- * @param concatenatePhoneticRuns
+ * @param concatenatePhoneticRuns If phonetic runs should be concatenated
*/
public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
this.concatenatePhoneticRuns = concatenatePhoneticRuns;
@@ -138,9 +138,10 @@ public class XWPFWordExtractor extends P
extractHeaders(text, headerFooterPolicy);
}
-
- for (IRunElement run : paragraph.getRuns()) {
- if (! concatenatePhoneticRuns && run instanceof XWPFRun) {
+ for (IRunElement run : paragraph.getIRuns()) {
+ if (run instanceof XWPFSDT) {
+ text.append(((XWPFSDT) run).getContent().getText());
+ } else if (! concatenatePhoneticRuns && run instanceof XWPFRun) {
text.append(((XWPFRun)run).text());
} else {
text.append(run);
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java?rev=1875802&r1=1875801&r2=1875802&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java Sat Mar 28 09:24:38 2020
@@ -17,6 +17,16 @@
package org.apache.poi.xwpf.extractor;
+import org.apache.poi.util.StringUtil;
+import org.apache.poi.xwpf.XWPFTestDataSamples;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
import static org.apache.poi.POITestCase.assertContains;
import static org.apache.poi.POITestCase.assertEndsWith;
import static org.apache.poi.POITestCase.assertNotContained;
@@ -25,16 +35,6 @@ import static org.junit.Assert.assertEqu
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
-import java.io.IOException;
-import java.util.Locale;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.poi.util.StringUtil;
-import org.apache.poi.xwpf.XWPFTestDataSamples;
-import org.apache.poi.xwpf.usermodel.XWPFDocument;
-import org.junit.Test;
-
/**
* Tests for HXFWordExtractor
*/
@@ -460,4 +460,21 @@ public class TestXWPFWordExtractor {
assertContains(txt, "footer 1");
}
}
+
+ @Test
+ public void bug55966() throws IOException {
+ try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("55966.docx")) {
+ String expected = "Content control within a paragraph is here text content from within a paragraph second control with a new\n" +
+ "line\n" +
+ "\n" +
+ "Content control that is the entire paragraph\n";
+
+ XWPFWordExtractor extractedDoc = new XWPFWordExtractor(doc);
+
+ String actual = extractedDoc.getText();
+
+ extractedDoc.close();
+ assertEquals(expected, actual);
+ }
+ }
}
Added: poi/trunk/test-data/document/55966.docx
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/document/55966.docx?rev=1875802&view=auto
==============================================================================
Binary files poi/trunk/test-data/document/55966.docx (added) and poi/trunk/test-data/document/55966.docx Sat Mar 28 09:24:38 2020 differ
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org