You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@avalon.apache.org by cz...@apache.org on 2002/04/22 13:20:38 UTC

cvs commit: jakarta-avalon-excalibur/all/src/scratchpad/org/apache/excalibur/xmlizer/impl HTMLXMLizer.java TextXMLizer.java

cziegeler    02/04/22 04:20:38

  Modified:    all      build.xml
               all/src/scratchpad/org/apache/excalibur/xmlizer/impl
                        TextXMLizer.java
  Added:       all/src/scratchpad/org/apache/excalibur/xmlizer/impl
                        HTMLXMLizer.java
  Log:
  Adding HTML to XMLizer
  
  Revision  Changes    Path
  1.145     +5 -0      jakarta-avalon-excalibur/all/build.xml
  
  Index: build.xml
  ===================================================================
  RCS file: /home/cvs/jakarta-avalon-excalibur/all/build.xml,v
  retrieving revision 1.144
  retrieving revision 1.145
  diff -u -r1.144 -r1.145
  --- build.xml	22 Apr 2002 10:09:13 -0000	1.144
  +++ build.xml	22 Apr 2002 11:20:38 -0000	1.145
  @@ -131,6 +131,9 @@
           <available property="jaxen.present" classname="org.jaxen.dom.XPath">
               <classpath refid="project.class.path"/>
           </available>
  +        <available property="jtidy.present" classname="org.w3c.tidy.Tidy">
  +            <classpath refid="project.class.path"/>
  +        </available>
           <available property="jms.present" classname="javax.jms.Queue">
               <classpath refid="project.class.path"/>
           </available>
  @@ -251,6 +254,8 @@
               target="1.2">
               <classpath refid="project.class.path" />
               <exclude name="**/test/**"/>
  +            <exclude name="org/apache/excalibur/xmlizer/impl/HTMLXMLizer.java"
  +                unless="jtidy.present"/>
           </javac>
       </target>
   
  
  
  
  1.2       +2 -2      jakarta-avalon-excalibur/all/src/scratchpad/org/apache/excalibur/xmlizer/impl/TextXMLizer.java
  
  Index: TextXMLizer.java
  ===================================================================
  RCS file: /home/cvs/jakarta-avalon-excalibur/all/src/scratchpad/org/apache/excalibur/xmlizer/impl/TextXMLizer.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- TextXMLizer.java	19 Apr 2002 10:58:58 -0000	1.1
  +++ TextXMLizer.java	22 Apr 2002 11:20:38 -0000	1.2
  @@ -26,7 +26,7 @@
    * to SAX events.
    *
    * @author <a href="mailto:cziegeler@apache.org">Carsten Ziegeler</a>
  - * @version CVS $Revision: 1.1 $ $Date: 2002/04/19 10:58:58 $
  + * @version CVS $Revision: 1.2 $ $Date: 2002/04/22 11:20:38 $
    */
   
   public class TextXMLizer
  @@ -75,7 +75,7 @@
   
   
           final InputSource inputSource = new InputSource( stream );
  -        inputSource.setSystemId( systemID );
  +        if ( null != systemID ) inputSource.setSystemId( systemID );
   
           Parser parser = null;
           try
  
  
  
  1.1                  jakarta-avalon-excalibur/all/src/scratchpad/org/apache/excalibur/xmlizer/impl/HTMLXMLizer.java
  
  Index: HTMLXMLizer.java
  ===================================================================
  /*
   * Copyright (C) The Apache Software Foundation. All rights reserved.
   *
   * This software is published under the terms of the Apache Software License
   * version 1.1, a copy of which has been included with this distribution in
   * the LICENSE.txt file.
   */
  package org.apache.excalibur.xmlizer.impl;
  
  import java.io.InputStream;
  import java.io.IOException;
  import java.io.StringReader;
  import java.io.StringWriter;
  import java.util.Properties;
  import javax.xml.transform.OutputKeys;
  import javax.xml.transform.Transformer;
  import javax.xml.transform.TransformerException;
  import javax.xml.transform.TransformerFactory;
  import javax.xml.transform.dom.DOMSource;
  import javax.xml.transform.stream.StreamResult;
  import org.apache.avalon.excalibur.xml.Parser;
  import org.apache.excalibur.xmlizer.XMLizer;
  import org.apache.avalon.framework.logger.AbstractLogEnabled;
  import org.apache.avalon.framework.component.Component;
  import org.apache.avalon.framework.component.ComponentException;
  import org.apache.avalon.framework.component.ComponentManager;
  import org.apache.avalon.framework.component.Composable;
  import org.apache.avalon.framework.thread.ThreadSafe;
  import org.w3c.tidy.Tidy;
  import org.xml.sax.ContentHandler;
  import org.xml.sax.InputSource;
  import org.xml.sax.SAXException;
  
  /**
   * Converter for transforming an input stream contain text/html data
   * to SAX events.
   * This class uses jtidy.
   *
   * @author <a href="mailto:cziegeler@apache.org">Carsten Ziegeler</a>
   * @version CVS $Revision: 1.1 $ $Date: 2002/04/22 11:20:38 $
   */
  
  public class HTMLXMLizer
      extends AbstractLogEnabled
      implements XMLizer, ThreadSafe, Composable
  {
  
      /** The component manager */
      protected ComponentManager manager;
  
      /** Used for converting DOM -> SAX */
      protected static Properties format;
  
      static {
          Properties format = new Properties();
          format.put(OutputKeys.METHOD, "xml");
          format.put(OutputKeys.OMIT_XML_DECLARATION, "no");
          format.put(OutputKeys.INDENT, "yes");
      }
  
      /**
       * Composable interface
       */
      public void compose(ComponentManager manager)
      {
          this.manager = manager;
      }
  
      /**
       * Generates SAX events from the given input stream
       * <b>NOTE</b> : if the implementation can produce lexical events, care should be taken
       * that <code>handler</code> can actually be a {@link XMLConsumer} that accepts such
       * events or directly implements the LexicalHandler interface!
       * @param stream    the data
       * @param mimeType  the mime-type for the data
       * @param systemID  the URI defining the data (this is optional and can be null)
       * @throws ComponentException if no suitable converter is found
       */
      public void toSAX( InputStream    stream,
                         String         mimeType,
                         String         systemID,
                         ContentHandler handler )
          throws SAXException, IOException, ComponentException
      {
          if ( null == stream ) {
              throw new ComponentException("Stream must not be null.");
          }
          if ( null == handler ) {
              throw new ComponentException("Handler must not be null.");
          }
          if ( null == mimeType ) {
              if ( this.getLogger().isDebugEnabled() ) {
                  this.getLogger().debug("No mime-type for xmlizing " + systemID + ", guessing text/xml");
              }
          }
  
          final Tidy xhtmlconvert = new Tidy();
          xhtmlconvert.setXmlOut(true);
          xhtmlconvert.setXHTML(true);
          xhtmlconvert.setShowWarnings(false);
          final StringWriter writer = new StringWriter();
          try {
              final Transformer transformer = TransformerFactory.newInstance().newTransformer();
              transformer.setOutputProperties(format);
              transformer.transform(new DOMSource(xhtmlconvert.parseDOM(stream, null)),
                                new StreamResult(writer));
          } catch (TransformerException te) {
              throw new SAXException("Exception during transformation.", te);
          }
          final InputSource inputSource =
                new InputSource( new java.io.StringReader(writer.toString()) );
          if ( null != systemID) inputSource.setSystemId( systemID );
  
          Parser parser = null;
          try
          {
              parser = (Parser)this.manager.lookup( Parser.ROLE );
  
              parser.parse( inputSource, handler );
          }
          finally
          {
              if( parser != null ) this.manager.release( parser );
          }
      }
  
  }
  
  
  
  

--
To unsubscribe, e-mail:   <ma...@jakarta.apache.org>
For additional commands, e-mail: <ma...@jakarta.apache.org>