You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tika.apache.org by ma...@apache.org on 2011/04/13 16:06:53 UTC

svn commit: r1091791 - /tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java

Author: maxcom
Date: Wed Apr 13 14:06:52 2011
New Revision: 1091791

URL: http://svn.apache.org/viewvc?rev=1091791&view=rev
Log:
docx: extract image description in alt attribute

Modified:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java

Modified: tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java
URL: http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java?rev=1091791&r1=1091790&r2=1091791&view=diff
==============================================================================
--- tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java (original)
+++ tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ooxml/XWPFWordExtractorDecorator.java Wed Apr 13 14:06:52 2011
@@ -33,6 +33,7 @@ import org.apache.xmlbeans.XmlException;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSectPr;
 import org.xml.sax.SAXException;
+import org.xml.sax.helpers.AttributesImpl;
 
 public class XWPFWordExtractorDecorator extends AbstractOOXMLExtractor {
     private XWPFDocument document;
@@ -159,7 +160,12 @@ public class XWPFWordExtractorDecorator 
              if(paragraph.getDocument() != null) {
                 XWPFPictureData data = picture.getPictureData();
                 if(data != null) {
-                   xhtml.startElement("img", "src", "embedded:" + data.getFileName());
+                   AttributesImpl attr = new AttributesImpl();
+
+                   attr.addAttribute("", "src", "src", "CDATA", "embedded:" + data.getFileName());
+                   attr.addAttribute("", "alt", "alt", "CDATA", picture.getDescription());
+
+                   xhtml.startElement("img", attr);
                    xhtml.endElement("img");
                 }
              }