You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2011/04/13 16:06:53 UTC
svn commit: r1091791 -
/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
Author: maxcom
Date: Wed Apr 13 14:06:52 2011
New Revision: 1091791
URL: http://svn.apache.org/viewvc?rev=1091791&view=rev
Log:
docx: extract image description in alt attribute
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java?rev=1091791&r1=1091790&r2=1091791&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java Wed Apr 13 14:06:52 2011
@@ -33,6 +33,7 @@ import org.apache.xmlbeans.XmlException;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
public class XWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
private XWPFDocument document;
@@ -159,7 +160,12 @@ public class XWPFWordExtractorDecorator
if(paragraph.getDocument() != null) {
XWPFPictureData data = picture.getPictureData();
if(data != null) {
- xhtml.startElement("img", "src", "embedded:" + data.getFileName());
+ AttributesImpl attr = new AttributesImpl();
+
+ attr.addAttribute("", "src", "src", "CDATA", "embedded:" + data.getFileName());
+ attr.addAttribute("", "alt", "alt", "CDATA", picture.getDescription());
+
+ xhtml.startElement("img", attr);
xhtml.endElement("img");
}
}