You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@avalon.apache.org by cz...@apache.org on 2002/04/22 13:20:38 UTC
cvs commit: jakarta-avalon-excalibur/all/src/scratchpad/org/apache/excalibur/xmlizer/impl HTMLXMLizer.java TextXMLizer.java
cziegeler 02/04/22 04:20:38
Modified: all build.xml
all/src/scratchpad/org/apache/excalibur/xmlizer/impl
TextXMLizer.java
Added: all/src/scratchpad/org/apache/excalibur/xmlizer/impl
HTMLXMLizer.java
Log:
Adding HTML to XMLizer
Revision Changes Path
1.145 +5 -0 jakarta-avalon-excalibur/all/build.xml
Index: build.xml
===================================================================
RCS file: /home/cvs/jakarta-avalon-excalibur/all/build.xml,v
retrieving revision 1.144
retrieving revision 1.145
diff -u -r1.144 -r1.145
--- build.xml 22 Apr 2002 10:09:13 -0000 1.144
+++ build.xml 22 Apr 2002 11:20:38 -0000 1.145
@@ -131,6 +131,9 @@
<available property="jaxen.present" classname="org.jaxen.dom.XPath">
<classpath refid="project.class.path"/>
</available>
+ <available property="jtidy.present" classname="org.w3c.tidy.Tidy">
+ <classpath refid="project.class.path"/>
+ </available>
<available property="jms.present" classname="javax.jms.Queue">
<classpath refid="project.class.path"/>
</available>
@@ -251,6 +254,8 @@
target="1.2">
<classpath refid="project.class.path" />
<exclude name="**/test/**"/>
+ <exclude name="org/apache/excalibur/xmlizer/impl/HTMLXMLizer.java"
+ unless="jtidy.present"/>
</javac>
</target>
1.2 +2 -2 jakarta-avalon-excalibur/all/src/scratchpad/org/apache/excalibur/xmlizer/impl/TextXMLizer.java
Index: TextXMLizer.java
===================================================================
RCS file: /home/cvs/jakarta-avalon-excalibur/all/src/scratchpad/org/apache/excalibur/xmlizer/impl/TextXMLizer.java,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -r1.1 -r1.2
--- TextXMLizer.java 19 Apr 2002 10:58:58 -0000 1.1
+++ TextXMLizer.java 22 Apr 2002 11:20:38 -0000 1.2
@@ -26,7 +26,7 @@
* to SAX events.
*
* @author <a href="mailto:cziegeler@apache.org">Carsten Ziegeler</a>
- * @version CVS $Revision: 1.1 $ $Date: 2002/04/19 10:58:58 $
+ * @version CVS $Revision: 1.2 $ $Date: 2002/04/22 11:20:38 $
*/
public class TextXMLizer
@@ -75,7 +75,7 @@
final InputSource inputSource = new InputSource( stream );
- inputSource.setSystemId( systemID );
+ if ( null != systemID ) inputSource.setSystemId( systemID );
Parser parser = null;
try
1.1 jakarta-avalon-excalibur/all/src/scratchpad/org/apache/excalibur/xmlizer/impl/HTMLXMLizer.java
Index: HTMLXMLizer.java
===================================================================
/*
* Copyright (C) The Apache Software Foundation. All rights reserved.
*
* This software is published under the terms of the Apache Software License
* version 1.1, a copy of which has been included with this distribution in
* the LICENSE.txt file.
*/
package org.apache.excalibur.xmlizer.impl;
import java.io.InputStream;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.util.Properties;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.apache.avalon.excalibur.xml.Parser;
import org.apache.excalibur.xmlizer.XMLizer;
import org.apache.avalon.framework.logger.AbstractLogEnabled;
import org.apache.avalon.framework.component.Component;
import org.apache.avalon.framework.component.ComponentException;
import org.apache.avalon.framework.component.ComponentManager;
import org.apache.avalon.framework.component.Composable;
import org.apache.avalon.framework.thread.ThreadSafe;
import org.w3c.tidy.Tidy;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* Converter for transforming an input stream contain text/html data
* to SAX events.
* This class uses jtidy.
*
* @author <a href="mailto:cziegeler@apache.org">Carsten Ziegeler</a>
* @version CVS $Revision: 1.1 $ $Date: 2002/04/22 11:20:38 $
*/
public class HTMLXMLizer
extends AbstractLogEnabled
implements XMLizer, ThreadSafe, Composable
{
/** The component manager */
protected ComponentManager manager;
/** Used for converting DOM -> SAX */
protected static Properties format;
static {
Properties format = new Properties();
format.put(OutputKeys.METHOD, "xml");
format.put(OutputKeys.OMIT_XML_DECLARATION, "no");
format.put(OutputKeys.INDENT, "yes");
}
/**
* Composable interface
*/
public void compose(ComponentManager manager)
{
this.manager = manager;
}
/**
* Generates SAX events from the given input stream
* <b>NOTE</b> : if the implementation can produce lexical events, care should be taken
* that <code>handler</code> can actually be a {@link XMLConsumer} that accepts such
* events or directly implements the LexicalHandler interface!
* @param stream the data
* @param mimeType the mime-type for the data
* @param systemID the URI defining the data (this is optional and can be null)
* @throws ComponentException if no suitable converter is found
*/
public void toSAX( InputStream stream,
String mimeType,
String systemID,
ContentHandler handler )
throws SAXException, IOException, ComponentException
{
if ( null == stream ) {
throw new ComponentException("Stream must not be null.");
}
if ( null == handler ) {
throw new ComponentException("Handler must not be null.");
}
if ( null == mimeType ) {
if ( this.getLogger().isDebugEnabled() ) {
this.getLogger().debug("No mime-type for xmlizing " + systemID + ", guessing text/xml");
}
}
final Tidy xhtmlconvert = new Tidy();
xhtmlconvert.setXmlOut(true);
xhtmlconvert.setXHTML(true);
xhtmlconvert.setShowWarnings(false);
final StringWriter writer = new StringWriter();
try {
final Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperties(format);
transformer.transform(new DOMSource(xhtmlconvert.parseDOM(stream, null)),
new StreamResult(writer));
} catch (TransformerException te) {
throw new SAXException("Exception during transformation.", te);
}
final InputSource inputSource =
new InputSource( new java.io.StringReader(writer.toString()) );
if ( null != systemID) inputSource.setSystemId( systemID );
Parser parser = null;
try
{
parser = (Parser)this.manager.lookup( Parser.ROLE );
parser.parse( inputSource, handler );
}
finally
{
if( parser != null ) this.manager.release( parser );
}
}
}
--
To unsubscribe, e-mail: <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>