You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2010/11/09 12:20:20 UTC
svn commit: r1032924 -
/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
Author: maxcom
Date: Tue Nov 9 11:20:20 2010
New Revision: 1032924
URL: http://svn.apache.org/viewvc?rev=1032924&view=rev
Log:
TIKA-510: Use POI usermodel API for text extraction from XSLF shape
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java?rev=1032924&r1=1032923&r2=1032924&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XSLFPowerPointExtractorDecorator.java Tue Nov 9 11:20:20 2010
@@ -30,6 +30,8 @@ import org.apache.poi.xslf.XSLFSlideShow
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.XMLSlideShow;
import org.apache.poi.xslf.usermodel.XSLFSlide;
+import org.apache.poi.xslf.usermodel.XSLFCommonSlideData;
+import org.apache.poi.xslf.usermodel.DrawingParagraph;
import org.apache.tika.exception.TikaException;
import org.apache.tika.sax.XHTMLContentHandler;
import org.apache.xmlbeans.XmlException;
@@ -71,7 +73,7 @@ public class XSLFPowerPointExtractorDeco
.getSlideComments(slideId);
xhtml.startElement("div");
- extractShapeContent(rawSlide.getCSld().getSpTree(), xhtml);
+ extractShapeContent(slide.getCommonSlideData(), xhtml);
if (comments != null) {
for (CTComment comment : comments.getCmArray()) {
@@ -80,26 +82,16 @@ public class XSLFPowerPointExtractorDeco
}
if (notes != null) {
- extractShapeContent(notes.getCSld().getSpTree(), xhtml);
+ extractShapeContent(new XSLFCommonSlideData(notes.getCSld()), xhtml);
}
xhtml.endElement("div");
}
}
- private void extractShapeContent(CTGroupShape gs, XHTMLContentHandler xhtml)
+ private void extractShapeContent(XSLFCommonSlideData data, XHTMLContentHandler xhtml)
throws SAXException {
- CTShape[] shapes = gs.getSpArray();
- for (CTShape shape : shapes) {
- CTTextBody textBody = shape.getTxBody();
- if (textBody != null) {
- CTTextParagraph[] paras = textBody.getPArray();
- for (CTTextParagraph textParagraph : paras) {
- CTRegularTextRun[] textRuns = textParagraph.getRArray();
- for (CTRegularTextRun textRun : textRuns) {
- xhtml.element("p", textRun.getT());
- }
- }
- }
+ for (DrawingParagraph p : data.getText()) {
+ xhtml.element("p", p.getText().toString());
}
}