You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@poi.apache.org by agastheswar <ag...@gmail.com> on 2012/04/18 16:54:04 UTC
retrieval of images from WordtoHTML converter
package org.apache.poi.hwpf.converter;
import java.io.File;
import java.io.FileWriter;
import java.util.List;
import java.util.Stack;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.poi.hpsf.SummaryInformation;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFDocumentCore;
import org.apache.poi.hwpf.converter.FontReplacer.Triplet;
import org.apache.poi.hwpf.usermodel.Bookmark;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.OfficeDrawing;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Section;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.util.Beta;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Text;
import static
org.apache.poi.hwpf.converter.AbstractWordUtils.TWIPS_PER_INCH;
* This implementation doesn't create images or links to them. This can be
* changed by overriding {@link #processImage(Element, boolean, Picture)}
* method.
*
public class WordToHtmlConverter extends AbstractWordConverter
{
private static class BlockProperies
{
final String pFontName;
final int pFontSize;
public BlockProperies( String pFontName, int pFontSize )
{
this.pFontName = pFontName;
this.pFontSize = pFontSize;
}
}
private static final POILogger logger = POILogFactory
.getLogger( WordToHtmlConverter.class );
private static String getSectionStyle( Section section )
{
float leftMargin = section.getMarginLeft() / TWIPS_PER_INCH;
float rightMargin = section.getMarginRight() / TWIPS_PER_INCH;
float topMargin = section.getMarginTop() / TWIPS_PER_INCH;
float bottomMargin = section.getMarginBottom() / TWIPS_PER_INCH;
String style = "margin: " + topMargin + "in " + rightMargin + "in "
+ bottomMargin + "in " + leftMargin + "in;";
if ( section.getNumColumns() > 1 )
{
style += "column-count: " + ( section.getNumColumns() ) + ";";
if ( section.isColumnsEvenlySpaced() )
{
float distance = section.getDistanceBetweenColumns()
/ TWIPS_PER_INCH;
style += "column-gap: " + distance + "in;";
}
else
{
style += "column-gap: 0.25in;";
}
}
return style;
}
public static void main( String[] args )
{
if ( args.length < 2 )
{
System.err
.println( "Usage: WordToHtmlConverter <inputFile.doc>
<saveTo.html>" );
return;
}
System.out.println( "Converting " + args[0] );
System.out.println( "Saving output to " + args[1] );
try
{
Document doc = WordToHtmlConverter.process( new File( args[0] )
);
FileWriter out = new FileWriter( args[1] );
DOMSource domSource = new DOMSource( doc );
StreamResult streamResult = new StreamResult( out );
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
// TODO set encoding from a command argument
serializer.setOutputProperty( OutputKeys.ENCODING, "UTF-8" );
serializer.setOutputProperty( OutputKeys.INDENT, "yes" );
serializer.setOutputProperty( OutputKeys.METHOD, "html" );
serializer.transform( domSource, streamResult );
out.close();
}
catch ( Exception e )
{
e.printStackTrace();
}
}
static Document process( File docFile ) throws Exception
{
final HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc(
docFile );
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(
DocumentBuilderFactory.newInstance().newDocumentBuilder()
.newDocument() );
wordToHtmlConverter.processDocument( wordDocument );
return wordToHtmlConverter.getDocument();
}
private final Stack<BlockProperies> blocksProperies = new
Stack<BlockProperies>();
private final HtmlDocumentFacade htmlDocumentFacade;
private Element notes = null;
/**
* Creates new instance of {@link WordToHtmlConverter}. Can be used for
* output several {@link HWPFDocument}s into single HTML document.
*
* @param document
* XML DOM Document used as HTML document
*/
public WordToHtmlConverter( Document document )
{
this.htmlDocumentFacade = new HtmlDocumentFacade( document );
}
public WordToHtmlConverter( HtmlDocumentFacade htmlDocumentFacade )
{
this.htmlDocumentFacade = htmlDocumentFacade;
}
@Override
protected void afterProcess()
{
if ( notes != null )
htmlDocumentFacade.getBody().appendChild( notes );
htmlDocumentFacade.updateStylesheet();
}
public Document getDocument()
{
return htmlDocumentFacade.getDocument();
}
@Override
protected void outputCharacters( Element pElement,
CharacterRun characterRun, String text )
{
Element span = htmlDocumentFacade.document.createElement( "span" );
pElement.appendChild( span );
StringBuilder style = new StringBuilder();
BlockProperies blockProperies = this.blocksProperies.peek();
Triplet triplet = getCharacterRunTriplet( characterRun );
if ( WordToHtmlUtils.isNotEmpty( triplet.fontName )
&& !WordToHtmlUtils.equals( triplet.fontName,
blockProperies.pFontName ) )
{
style.append( "font-family:" + triplet.fontName + ";" );
}
if ( characterRun.getFontSize() / 2 != blockProperies.pFontSize )
{
style.append( "font-size:" + characterRun.getFontSize() / 2 +
"pt;" );
}
if ( triplet.bold )
{
style.append( "font-weight:bold;" );
}
if ( triplet.italic )
{
style.append( "font-style:italic;" );
}
WordToHtmlUtils.addCharactersProperties( characterRun, style );
if ( style.length() != 0 )
htmlDocumentFacade.addStyleClass( span, "s", style.toString() );
Text textNode = htmlDocumentFacade.createText( text );
span.appendChild( textNode );
}
@Override
protected void processBookmarks( HWPFDocumentCore wordDocument,
Element currentBlock, Range range, int currentTableLevel,
List<Bookmark> rangeBookmarks )
{
Element parent = currentBlock;
for ( Bookmark bookmark : rangeBookmarks )
{
Element bookmarkElement = htmlDocumentFacade
.createBookmark( bookmark.getName() );
parent.appendChild( bookmarkElement );
parent = bookmarkElement;
}
if ( range != null )
processCharacters( wordDocument, currentTableLevel, range,
parent );
}
@Override
protected void processDocumentInformation(
SummaryInformation summaryInformation )
{
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getTitle() ) )
htmlDocumentFacade.setTitle( summaryInformation.getTitle() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getAuthor() ) )
htmlDocumentFacade.addAuthor( summaryInformation.getAuthor() );
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getKeywords() )
)
htmlDocumentFacade.addKeywords( summaryInformation.getKeywords()
);
if ( WordToHtmlUtils.isNotEmpty( summaryInformation.getComments() )
)
htmlDocumentFacade
.addDescription( summaryInformation.getComments() );
}
@Override
public void processDocumentPart( HWPFDocumentCore wordDocument, Range
range )
{
super.processDocumentPart( wordDocument, range );
afterProcess();
}
@Override
protected void processDrawnObject( HWPFDocument doc,
CharacterRun characterRun, OfficeDrawing officeDrawing,
String path, Element block )
{
Element img = htmlDocumentFacade.createImage( path );
block.appendChild( img );
}
@Override
protected void processEndnoteAutonumbered( HWPFDocument wordDocument,
int noteIndex, Element block, Range endnoteTextRange )
{
processNoteAutonumbered( wordDocument, "end", noteIndex, block,
endnoteTextRange );
}
@Override
protected void processFootnoteAutonumbered( HWPFDocument wordDocument,
int noteIndex, Element block, Range footnoteTextRange )
{
processNoteAutonumbered( wordDocument, "foot", noteIndex, block,
footnoteTextRange );
}
@Override
protected void processHyperlink( HWPFDocumentCore wordDocument,
Element currentBlock, Range textRange, int currentTableLevel,
String hyperlink )
{
Element basicLink = htmlDocumentFacade.createHyperlink( hyperlink );
currentBlock.appendChild( basicLink );
if ( textRange != null )
processCharacters( wordDocument, currentTableLevel, textRange,
basicLink );
}
protected void processImage( Element currentBlock, boolean inlined,
Picture picture, String imageSourcePath )
{
final int aspectRatioX = picture.getHorizontalScalingFactor();
final int aspectRatioY = picture.getVerticalScalingFactor();
StringBuilder style = new StringBuilder();
final float imageWidth;
final float imageHeight;
final float cropTop;
final float cropBottom;
final float cropLeft;
final float cropRight;
if ( aspectRatioX > 0 )
{
imageWidth = picture.getDxaGoal() * aspectRatioX / 1000
/ TWIPS_PER_INCH;
cropRight = picture.getDxaCropRight() * aspectRatioX / 1000
/ TWIPS_PER_INCH;
cropLeft = picture.getDxaCropLeft() * aspectRatioX / 1000
/ TWIPS_PER_INCH;
}
else
{
imageWidth = picture.getDxaGoal() / TWIPS_PER_INCH;
cropRight = picture.getDxaCropRight() / TWIPS_PER_INCH;
cropLeft = picture.getDxaCropLeft() / TWIPS_PER_INCH;
}
if ( aspectRatioY > 0 )
{
imageHeight = picture.getDyaGoal() * aspectRatioY / 1000
/ TWIPS_PER_INCH;
cropTop = picture.getDyaCropTop() * aspectRatioY / 1000
/ TWIPS_PER_INCH;
cropBottom = picture.getDyaCropBottom() * aspectRatioY / 1000
/ TWIPS_PER_INCH;
}
else
{
imageHeight = picture.getDyaGoal() / TWIPS_PER_INCH;
cropTop = picture.getDyaCropTop() / TWIPS_PER_INCH;
cropBottom = picture.getDyaCropBottom() / TWIPS_PER_INCH;
}
Element root;
if ( cropTop != 0 || cropRight != 0 || cropBottom != 0 || cropLeft
!= 0 )
{
float visibleWidth = Math
.max( 0, imageWidth - cropLeft - cropRight );
float visibleHeight = Math.max( 0, imageHeight - cropTop
- cropBottom );
root = htmlDocumentFacade.createBlock();
htmlDocumentFacade.addStyleClass( root, "d",
"vertical-align:text-bottom;width:" + visibleWidth
+ "in;height:" + visibleHeight + "in;" );
// complex
Element inner = htmlDocumentFacade.createBlock();
htmlDocumentFacade.addStyleClass( inner, "d",
"position:relative;width:" + visibleWidth + "in;height:"
+ visibleHeight + "in;overflow:hidden;" );
root.appendChild( inner );
Element image = htmlDocumentFacade.createImage( imageSourcePath
);
htmlDocumentFacade.addStyleClass( image, "i",
"position:absolute;left:-" + cropLeft + ";top:-" +
cropTop
+ ";width:" + imageWidth + "in;height:"
+ imageHeight + "in;" );
inner.appendChild( image );
style.append( "overflow:hidden;" );
}
else
{
root = htmlDocumentFacade.createImage( imageSourcePath );
root.setAttribute( "style", "width:" + imageWidth + "in;height:"
+ imageHeight + "in;vertical-align:text-bottom;" );
}
currentBlock.appendChild( root );
}
@Override
protected void processImageWithoutPicturesManager( Element currentBlock,
boolean inlined, Picture picture )
{
// no default implementation -- skip
currentBlock.appendChild( htmlDocumentFacade.document
.createComment( "Image link to '"
+ picture.suggestFullFileName() + "' can be here" )
);
}
@Override
protected void processLineBreak( Element block, CharacterRun
characterRun )
{
block.appendChild( htmlDocumentFacade.createLineBreak() );
}
protected void processNoteAutonumbered( HWPFDocument doc, String type,
int noteIndex, Element block, Range noteTextRange )
{
final String textIndex = String.valueOf( noteIndex + 1 );
final String textIndexClass =
htmlDocumentFacade.getOrCreateCssClass(
"a", "vertical-align:super;font-size:smaller;" );
final String forwardNoteLink = type + "note_" + textIndex;
final String backwardNoteLink = type + "note_back_" + textIndex;
Element anchor = htmlDocumentFacade.createHyperlink( "#"
+ forwardNoteLink );
anchor.setAttribute( "name", backwardNoteLink );
anchor.setAttribute( "class", textIndexClass + " " + type
+ "noteanchor" );
anchor.setTextContent( textIndex );
block.appendChild( anchor );
if ( notes == null )
{
notes = htmlDocumentFacade.createBlock();
notes.setAttribute( "class", "notes" );
}
Element note = htmlDocumentFacade.createBlock();
note.setAttribute( "class", type + "note" );
notes.appendChild( note );
Element bookmark = htmlDocumentFacade.createBookmark(
forwardNoteLink );
bookmark.setAttribute( "href", "#" + backwardNoteLink );
bookmark.setTextContent( textIndex );
bookmark.setAttribute( "class", textIndexClass + " " + type
+ "noteindex" );
note.appendChild( bookmark );
note.appendChild( htmlDocumentFacade.createText( " " ) );
Element span = htmlDocumentFacade.getDocument().createElement(
"span" );
span.setAttribute( "class", type + "notetext" );
note.appendChild( span );
this.blocksProperies.add( new BlockProperies( "", -1 ) );
try
{
processCharacters( doc, Integer.MIN_VALUE, noteTextRange, span
);
}
finally
{
this.blocksProperies.pop();
}
}
@Override
protected void processPageBreak( HWPFDocumentCore wordDocument, Element
flow )
{
flow.appendChild( htmlDocumentFacade.createLineBreak() );
}
protected void processPageref( HWPFDocumentCore hwpfDocument,
Element currentBlock, Range textRange, int currentTableLevel,
String pageref )
{
Element basicLink = htmlDocumentFacade.createHyperlink( "#" +
pageref );
currentBlock.appendChild( basicLink );
if ( textRange != null )
processCharacters( hwpfDocument, currentTableLevel, textRange,
basicLink );
}
protected void processParagraph( HWPFDocumentCore hwpfDocument,
Element parentElement, int currentTableLevel, Paragraph
paragraph,
String bulletText )
{
final Element pElement = htmlDocumentFacade.createParagraph();
parentElement.appendChild( pElement );
StringBuilder style = new StringBuilder();
WordToHtmlUtils.addParagraphProperties( paragraph, style );
final int charRuns = paragraph.numCharacterRuns();
if ( charRuns == 0 )
{
return;
}
{
final String pFontName;
final int pFontSize;
final CharacterRun characterRun = paragraph.getCharacterRun( 0
);
if ( characterRun != null )
{
Triplet triplet = getCharacterRunTriplet( characterRun );
pFontSize = characterRun.getFontSize() / 2;
pFontName = triplet.fontName;
WordToHtmlUtils.addFontFamily( pFontName, style );
WordToHtmlUtils.addFontSize( pFontSize, style );
}
else
{
pFontSize = -1;
pFontName = WordToHtmlUtils.EMPTY;
}
blocksProperies.push( new BlockProperies( pFontName, pFontSize )
);
}
try
{
if ( WordToHtmlUtils.isNotEmpty( bulletText ) )
{
if ( bulletText.endsWith( "\t" ) )
{
/*
* We don't know how to handle all cases in HTML, but at
* least simplest case shall be handled
*/
final float defaultTab = TWIPS_PER_INCH / 2;
float firstLinePosition = paragraph.getIndentFromLeft()
+ paragraph.getFirstLineIndent() + 20; // char
have
// some
space
float nextStop = (float) ( Math.ceil( firstLinePosition
/ defaultTab ) * defaultTab );
final float spanMinWidth = nextStop - firstLinePosition;
Element span = htmlDocumentFacade.getDocument()
.createElement( "span" );
htmlDocumentFacade
.addStyleClass( span, "s",
"display: inline-block; text-indent: 0;
min-width: "
+ ( spanMinWidth /
TWIPS_PER_INCH )
+ "in;" );
pElement.appendChild( span );
Text textNode = htmlDocumentFacade.createText(
bulletText
.substring( 0, bulletText.length() - 1 )
+ UNICODECHAR_ZERO_WIDTH_SPACE
+ UNICODECHAR_NO_BREAK_SPACE );
span.appendChild( textNode );
}
else
{
Text textNode = htmlDocumentFacade.createText(
bulletText
.substring( 0, bulletText.length() - 1 ) );
pElement.appendChild( textNode );
}
}
processCharacters( hwpfDocument, currentTableLevel, paragraph,
pElement );
}
finally
{
blocksProperies.pop();
}
if ( style.length() > 0 )
htmlDocumentFacade.addStyleClass( pElement, "p",
style.toString() );
WordToHtmlUtils.compactSpans( pElement );
return;
}
protected void processSection( HWPFDocumentCore wordDocument,
Section section, int sectionCounter )
{
Element div = htmlDocumentFacade.createBlock();
htmlDocumentFacade.addStyleClass( div, "d", getSectionStyle( section
) );
htmlDocumentFacade.body.appendChild( div );
processParagraphes( wordDocument, div, section, Integer.MIN_VALUE );
}
@Override
protected void processSingleSection( HWPFDocumentCore wordDocument,
Section section )
{
htmlDocumentFacade.addStyleClass( htmlDocumentFacade.body, "b",
getSectionStyle( section ) );
processParagraphes( wordDocument, htmlDocumentFacade.body, section,
Integer.MIN_VALUE );
}
protected void processTable( HWPFDocumentCore hwpfDocument, Element
flow,
Table table )
{
Element tableHeader = htmlDocumentFacade.createTableHeader();
Element tableBody = htmlDocumentFacade.createTableBody();
final int[] tableCellEdges = WordToHtmlUtils
.buildTableCellEdgesArray( table );
final int tableRows = table.numRows();
int maxColumns = Integer.MIN_VALUE;
for ( int r = 0; r < tableRows; r++ )
{
maxColumns = Math.max( maxColumns, table.getRow( r ).numCells()
);
}
for ( int r = 0; r < tableRows; r++ )
{
TableRow tableRow = table.getRow( r );
Element tableRowElement = htmlDocumentFacade.createTableRow();
StringBuilder tableRowStyle = new StringBuilder();
WordToHtmlUtils.addTableRowProperties( tableRow, tableRowStyle
);
// index of current element in tableCellEdges[]
int currentEdgeIndex = 0;
final int rowCells = tableRow.numCells();
for ( int c = 0; c < rowCells; c++ )
{
TableCell tableCell = tableRow.getCell( c );
if ( tableCell.isVerticallyMerged()
&& !tableCell.isFirstVerticallyMerged() )
{
currentEdgeIndex += getNumberColumnsSpanned(
tableCellEdges, currentEdgeIndex, tableCell );
continue;
}
Element tableCellElement;
if ( tableRow.isTableHeader() )
{
tableCellElement = htmlDocumentFacade
.createTableHeaderCell();
}
else
{
tableCellElement = htmlDocumentFacade.createTableCell();
}
StringBuilder tableCellStyle = new StringBuilder();
WordToHtmlUtils.addTableCellProperties( tableRow, tableCell,
r == 0, r == tableRows - 1, c == 0, c == rowCells -
1,
tableCellStyle );
int colSpan = getNumberColumnsSpanned( tableCellEdges,
currentEdgeIndex, tableCell );
currentEdgeIndex += colSpan;
if ( colSpan == 0 )
continue;
if ( colSpan != 1 )
tableCellElement.setAttribute( "colspan",
String.valueOf( colSpan ) );
final int rowSpan = getNumberRowsSpanned( table,
tableCellEdges, r, c, tableCell );
if ( rowSpan > 1 )
tableCellElement.setAttribute( "rowspan",
String.valueOf( rowSpan ) );
processParagraphes( hwpfDocument, tableCellElement,
tableCell,
table.getTableLevel() );
if ( !tableCellElement.hasChildNodes() )
{
tableCellElement.appendChild( htmlDocumentFacade
.createParagraph() );
}
if ( tableCellStyle.length() > 0 )
htmlDocumentFacade.addStyleClass( tableCellElement,
tableCellElement.getTagName(),
tableCellStyle.toString() );
tableRowElement.appendChild( tableCellElement );
}
if ( tableRowStyle.length() > 0 )
tableRowElement.setAttribute( "class", htmlDocumentFacade
.getOrCreateCssClass( "r", tableRowStyle.toString()
) );
if ( tableRow.isTableHeader() )
{
tableHeader.appendChild( tableRowElement );
}
else
{
tableBody.appendChild( tableRowElement );
}
}
final Element tableElement = htmlDocumentFacade.createTable();
tableElement
.setAttribute(
"class",
htmlDocumentFacade
.getOrCreateCssClass( "t",
"table-layout:fixed;border-collapse:collapse;border-spacing:0;" ) );
if ( tableHeader.hasChildNodes() )
{
tableElement.appendChild( tableHeader );
}
if ( tableBody.hasChildNodes() )
{
tableElement.appendChild( tableBody );
flow.appendChild( tableElement );
}
else
{
logger.log( POILogger.WARN, "Table without body starting at [",
Integer.valueOf( table.getStartOffset() ), "; ",
Integer.valueOf( table.getEndOffset() ), ")" );
}
}
}
The author says we have to over ride an method to get images im unable to
understand to where to over ride please can any one help its urgent
!!!!!!!!!!!!!
--
View this message in context: http://apache-poi.1045710.n5.nabble.com/retrieval-of-images-from-WordtoHTML-converter-tp5649302p5649302.html
Sent from the POI - User mailing list archive at Nabble.com.
---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@poi.apache.org
For additional commands, e-mail: user-help@poi.apache.org
Re: retrieval of images from WordtoHTML converter
Posted by agastheswar <ag...@gmail.com>.
hi can you give me the code so that i can directly embedded in the code!!!!
--
View this message in context: http://apache-poi.1045710.n5.nabble.com/retrieval-of-images-from-WordtoHTML-converter-tp5649302p5649439.html
Sent from the POI - User mailing list archive at Nabble.com.
---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@poi.apache.org
For additional commands, e-mail: user-help@poi.apache.org
Re: retrieval of images from WordtoHTML converter
Posted by Sergey Vladimirov <vl...@gmail.com>.
You can either override method "protected void processImage( Element
currentBlock, boolean inlined, Picture picture )" from
AbstractWordConverter OR, better, to provide PictureManager
implementation to converter.
--
Sergey Vladimirov
On Wed, Apr 18, 2012 at 5:54 PM, agastheswar <ag...@gmail.com> wrote:
> AbstractWordConverter
---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@poi.apache.org
For additional commands, e-mail: user-help@poi.apache.org