You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@poi.apache.org by ye...@apache.org on 2009/08/31 19:02:07 UTC
svn commit: r809662 - in /poi/trunk: src/documentation/content/xdocs/
src/ooxml/java/org/apache/poi/xwpf/extractor/
src/ooxml/java/org/apache/poi/xwpf/model/
src/ooxml/testcases/org/apache/poi/xwpf/extractor/ test-data/document/
Author: yegor
Date: Mon Aug 31 17:02:06 2009
New Revision: 809662
URL: http://svn.apache.org/viewvc?rev=809662&view=rev
Log:
fix for extraction paragraphs and sections from headers/footers with XWPFWordExtractor, see Bugzilla 47727
Added:
poi/trunk/test-data/document/Headers.docx (with props)
Modified:
poi/trunk/src/documentation/content/xdocs/status.xml
poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
poi/trunk/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHeaderFooterPolicy.java
poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
Modified: poi/trunk/src/documentation/content/xdocs/status.xml
URL: http://svn.apache.org/viewvc/poi/trunk/src/documentation/content/xdocs/status.xml?rev=809662&r1=809661&r2=809662&view=diff
==============================================================================
--- poi/trunk/src/documentation/content/xdocs/status.xml (original)
+++ poi/trunk/src/documentation/content/xdocs/status.xml Mon Aug 31 17:02:06 2009
@@ -33,7 +33,8 @@
<changes>
<release version="3.5-beta7" date="2009-??-??">
- <action dev="POI-DEVELOPERS" type="fix">47773 - Support for extraction of header / footer images in HWPF</action>
+ <action dev="POI-DEVELOPERS" type="fix">47773 - Fix for extraction paragraphs and sections from headers/footers with XWPFWordExtractor</action>
+ <action dev="POI-DEVELOPERS" type="fix">47727 - Support for extraction of header / footer images in HWPF</action>
<action dev="POI-DEVELOPERS" type="fix">moved all test data to a top-level directory</action>
<action dev="POI-DEVELOPERS" type="add">47721 - Added implementation for INDIRECT()</action>
<action dev="POI-DEVELOPERS" type="add">45583 - Avoid exception when reading ClipboardData packet in OLE property sets</action>
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java?rev=809662&r1=809661&r2=809662&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java Mon Aug 31 17:02:06 2009
@@ -21,6 +21,7 @@
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.POIXMLException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
@@ -31,6 +32,7 @@
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFTable;
import org.apache.xmlbeans.XmlException;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
/**
* Helper class to extract text from an OOXML Word file
@@ -72,45 +74,77 @@
public String getText() {
StringBuffer text = new StringBuffer();
XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
-
+
// Start out with all headers
- // TODO - put them in where they're needed
- if(hfPolicy.getFirstPageHeader() != null) {
- text.append( hfPolicy.getFirstPageHeader().getText() );
- }
- if(hfPolicy.getEvenPageHeader() != null) {
- text.append( hfPolicy.getEvenPageHeader().getText() );
- }
- if(hfPolicy.getDefaultHeader() != null) {
- text.append( hfPolicy.getDefaultHeader().getText() );
- }
+ extractHeaders(text, hfPolicy);
// First up, all our paragraph based text
Iterator<XWPFParagraph> i = document.getParagraphsIterator();
while(i.hasNext()) {
- XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
- new XWPFHyperlinkDecorator(i.next(), null, fetchHyperlinks));
- text.append(decorator.getText()+"\n");
- }
+ XWPFParagraph paragraph = i.next();
+
+
+ try {
+ CTSectPr ctSectPr = null;
+ if (paragraph.getCTP().getPPr()!=null) {
+ ctSectPr = paragraph.getCTP().getPPr().getSectPr();
+ }
+
+ XWPFHeaderFooterPolicy headerFooterPolicy = null;
+
+ if (ctSectPr!=null) {
+ headerFooterPolicy = new XWPFHeaderFooterPolicy(document, ctSectPr);
+
+ extractHeaders(text, headerFooterPolicy);
+ }
+
+ XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
+ new XWPFHyperlinkDecorator(paragraph, null, fetchHyperlinks));
+ text.append(decorator.getText()).append('\n');
+
+ if (ctSectPr!=null) {
+ extractFooters(text, headerFooterPolicy);
+ }
+ } catch (IOException e) {
+ throw new POIXMLException(e);
+ } catch (XmlException e) {
+ throw new POIXMLException(e);
+ }
+ }
// Then our table based text
Iterator<XWPFTable> j = document.getTablesIterator();
while(j.hasNext()) {
- text.append(j.next().getText()+"\n");
+ text.append(j.next().getText()).append('\n');
}
// Finish up with all the footers
- // TODO - put them in where they're needed
- if(hfPolicy.getFirstPageFooter() != null) {
- text.append( hfPolicy.getFirstPageFooter().getText() );
- }
- if(hfPolicy.getEvenPageFooter() != null) {
- text.append( hfPolicy.getEvenPageFooter().getText() );
- }
- if(hfPolicy.getDefaultFooter() != null) {
- text.append( hfPolicy.getDefaultFooter().getText() );
- }
+ extractFooters(text, hfPolicy);
return text.toString();
}
+
+ private void extractFooters(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
+ if(hfPolicy.getFirstPageFooter() != null) {
+ text.append( hfPolicy.getFirstPageFooter().getText() );
+ }
+ if(hfPolicy.getEvenPageFooter() != null) {
+ text.append( hfPolicy.getEvenPageFooter().getText() );
+ }
+ if(hfPolicy.getDefaultFooter() != null) {
+ text.append( hfPolicy.getDefaultFooter().getText() );
+ }
+ }
+
+ private void extractHeaders(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
+ if(hfPolicy.getFirstPageHeader() != null) {
+ text.append( hfPolicy.getFirstPageHeader().getText() );
+ }
+ if(hfPolicy.getEvenPageHeader() != null) {
+ text.append( hfPolicy.getEvenPageHeader().getText() );
+ }
+ if(hfPolicy.getDefaultHeader() != null) {
+ text.append( hfPolicy.getDefaultHeader().getText() );
+ }
+ }
}
Modified: poi/trunk/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHeaderFooterPolicy.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHeaderFooterPolicy.java?rev=809662&r1=809661&r2=809662&view=diff
==============================================================================
--- poi/trunk/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHeaderFooterPolicy.java (original)
+++ poi/trunk/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHeaderFooterPolicy.java Mon Aug 31 17:02:06 2009
@@ -83,19 +83,26 @@
private XWPFHeader defaultHeader;
private XWPFFooter defaultFooter;
-
+ /**
+ * Figures out the policy for the given document,
+ * and creates any header and footer objects
+ * as required.
+ */
+ public XWPFHeaderFooterPolicy(XWPFDocument doc) throws IOException, XmlException {
+ this(doc, doc.getDocument().getBody().getSectPr());
+ }
+
/**
* Figures out the policy for the given document,
* and creates any header and footer objects
* as required.
*/
- public XWPFHeaderFooterPolicy(XWPFDocument doc) throws IOException, XmlException {
+ public XWPFHeaderFooterPolicy(XWPFDocument doc, CTSectPr sectPr) throws IOException, XmlException {
// Grab what headers and footers have been defined
// For now, we don't care about different ranges, as it
// doesn't seem that .docx properly supports that
// feature of the file format yet
this.doc = doc;
- CTSectPr sectPr = doc.getDocument().getBody().getSectPr();
for(int i=0; i<sectPr.sizeOfHeaderReferenceArray(); i++) {
// Get the header
CTHdrFtrRef ref = sectPr.getHeaderReferenceArray(i);
Modified: poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
URL: http://svn.apache.org/viewvc/poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java?rev=809662&r1=809661&r2=809662&view=diff
==============================================================================
--- poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java (original)
+++ poi/trunk/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java Mon Aug 31 17:02:06 2009
@@ -198,4 +198,13 @@
assertTrue(extractor.getText().contains("extremely well"));
}
+ public void testParagraphHeader() {
+ XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("Headers.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+ assertTrue(extractor.getText().contains("Section 1"));
+ assertTrue(extractor.getText().contains("Section 2"));
+ assertTrue(extractor.getText().contains("Section 3"));
+ }
+
}
Added: poi/trunk/test-data/document/Headers.docx
URL: http://svn.apache.org/viewvc/poi/trunk/test-data/document/Headers.docx?rev=809662&view=auto
==============================================================================
Binary file - no diff available.
Propchange: poi/trunk/test-data/document/Headers.docx
------------------------------------------------------------------------------
svn:executable = *
Propchange: poi/trunk/test-data/document/Headers.docx
------------------------------------------------------------------------------
svn:mime-type = application/octet-stream
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@poi.apache.org
For additional commands, e-mail: commits-help@poi.apache.org