You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by cz...@apache.org on 2002/02/11 10:51:19 UTC

cvs commit: xml-cocoon2/src/java/org/apache/cocoon/components/source URLSource.java

cziegeler    02/02/11 01:51:19

  Modified:    src/java/org/apache/cocoon/components/source URLSource.java
  Added:       lib/core avalon-scratchpad-20020205.jar
  Log:
  Added excalibur scratchpad for start of reintegration components
  
  Revision  Changes    Path
  1.1                  xml-cocoon2/lib/core/avalon-scratchpad-20020205.jar
  
  	<<Binary file>>
  
  
  1.5       +165 -105  xml-cocoon2/src/java/org/apache/cocoon/components/source/URLSource.java
  
  Index: URLSource.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/source/URLSource.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- URLSource.java	4 Feb 2002 12:31:09 -0000	1.4
  +++ URLSource.java	11 Feb 2002 09:51:19 -0000	1.5
  @@ -55,12 +55,16 @@
   
   package org.apache.cocoon.components.source;
   
  +import org.apache.avalon.excalibur.source.SourceParameters;
  +import org.apache.avalon.excalibur.source.SourceUtil;
   import org.apache.avalon.framework.component.ComponentManager;
   import org.apache.cocoon.ProcessingException;
   import org.apache.cocoon.ResourceNotFoundException;
   import org.apache.cocoon.components.parser.Parser;
   import org.apache.cocoon.environment.ModifiableSource;
  +import org.apache.cocoon.util.ClassUtils;
   import org.apache.cocoon.xml.XMLConsumer;
  +import org.w3c.tidy.Tidy;
   import org.xml.sax.ContentHandler;
   import org.xml.sax.InputSource;
   import org.xml.sax.SAXException;
  @@ -71,19 +75,52 @@
   import java.io.FileNotFoundException;
   import java.io.IOException;
   import java.io.InputStream;
  +import java.io.StringWriter;
   import java.lang.reflect.Method;
  -import java.net.URL;
  -import java.net.URLConnection;
  +import java.net.*;
  +import java.util.Iterator;
  +import java.util.Properties;
  +import javax.xml.transform.OutputKeys;
  +import javax.xml.transform.Transformer;
  +import javax.xml.transform.TransformerFactory;
  +import javax.xml.transform.dom.DOMSource;
  +import javax.xml.transform.stream.StreamResult;
   
   /**
    * Description of a source which is described by an URL.
    *
    * @author <a href="mailto:cziegeler@apache.org">Carsten Ziegeler</a>
  - * @version CVS $Id: URLSource.java,v 1.4 2002/02/04 12:31:09 cziegeler Exp $
  + * @version CVS $Id: URLSource.java,v 1.5 2002/02/11 09:51:19 cziegeler Exp $
    */
   
   public class URLSource implements ModifiableSource {
   
  +    /** Is JTidy available? */
  +    private static boolean jtidyAvailable;
  +
  +    /** Properties used for converting HTML to XML */
  +    private static Properties xmlProperties;
  +
  +    /** The TrAX factory for serializing xml */
  +    public static TransformerFactory transformerFactory = TransformerFactory.newInstance();
  +
  +    /**
  +     * Test if JTidy is available
  +     */
  +    static {
  +        jtidyAvailable = false;
  +        try {
  +            Class jtidy = ClassUtils.loadClass("org.w3c.tidy.Tidy");
  +            if ( null != jtidy ) jtidyAvailable = true;
  +        } catch (ClassNotFoundException cnfe) {
  +            // ignore
  +        }
  +        xmlProperties = new Properties();
  +        xmlProperties.put(OutputKeys.METHOD, "xml");
  +        xmlProperties.put(OutputKeys.OMIT_XML_DECLARATION, "no");
  +    }
  +
  +
       /** Identifier for file urls */
       private final String FILE = "file:";
   
  @@ -93,6 +130,9 @@
       /** The content length */
       private long contentLength;
   
  +    /** Is the content html or xml? */
  +    private boolean isHTMLContent = false;
  +
       /** The system id */
       private String systemId;
   
  @@ -111,6 +151,12 @@
       /** The ComponentManager needed for streaming */
       private ComponentManager manager;
   
  +    /** The <code>SourceParameters</code> for post */
  +    private SourceParameters postParameters;
  +
  +    /** Follow Redirects ? */
  +    private boolean followRedirects = true;
  +
       /**
        * Construct a new object
        */
  @@ -119,6 +165,11 @@
           this.manager = manager;
           this.systemId = url.toExternalForm();
           this.isFile = systemId.startsWith(FILE);
  +        if (this.isFile == true) {
  +            if (systemId.endsWith(".htm") || systemId.endsWith(".html")) {
  +                this.isHTMLContent = true;
  +            }
  +        }
           this.url = url;
           this.gotInfos = false;
       }
  @@ -134,19 +185,25 @@
                   this.lastModificationDate = file.lastModified();
                   this.contentLength = file.length();
               } else {
  -                try {
  -                    if (this.connection == null) {
  -                        this.connection = this.url.openConnection();
  -                        String userInfo = this.getUserInfo();
  -                        if (this.url.getProtocol().startsWith("http") && userInfo != null) {
  -                            this.connection.setRequestProperty("Authorization","Basic "+this.encodeBASE64(userInfo));
  +                if (this.postParameters == null) {
  +                    try {
  +                        if (this.connection == null) {
  +                            this.connection = this.url.openConnection();
  +                            String userInfo = this.getUserInfo();
  +                            if (this.url.getProtocol().startsWith("http") && userInfo != null) {
  +                                this.connection.setRequestProperty("Authorization","Basic "+SourceUtil.encodeBASE64(userInfo));
  +                            }
                           }
  +                        this.lastModificationDate = this.connection.getLastModified();
  +                        this.contentLength = this.connection.getContentLength();
  +                    } catch (IOException ignore) {
  +                        this.lastModificationDate = 0;
  +                        this.contentLength = -1;
                       }
  -                    this.lastModificationDate = this.connection.getLastModified();
  -                    this.contentLength = this.connection.getContentLength();
  -                } catch (IOException ignore) {
  -                    this.lastModificationDate = 0;
  -                    this.contentLength = -1;
  +                } else {
  +                    // do not open connection when using post!
  +                        this.lastModificationDate = 0;
  +                        this.contentLength = -1;
                   }
               }
               this.gotInfos = true;
  @@ -183,7 +240,7 @@
           this.getInfos();
           try{
               InputStream input = null;
  -            if (this.isFile) {
  +            if ( this.isFile ) {
                   input = new FileInputStream(this.systemId.substring(FILE.length()));
               } else {
                   if (this.connection == null) {
  @@ -191,17 +248,65 @@
                       /* The following requires a jdk 1.3 */
                       String userInfo = this.getUserInfo();
                       if (this.url.getProtocol().startsWith("http") && userInfo != null) {
  -                        this.connection.setRequestProperty("Authorization","Basic "+encodeBASE64(userInfo));
  -                    }
  +                        this.connection.setRequestProperty("Authorization","Basic "+SourceUtil.encodeBASE64(userInfo));
                   }
  +                if (this.followRedirects == false && this.connection instanceof HttpURLConnection) {
  +                   ((HttpURLConnection)connection).setInstanceFollowRedirects(false);
  +                }
  +                // do a post operation
  +                if (this.connection instanceof HttpURLConnection
  +                    && this.postParameters != null) {
  +                    StringBuffer buffer = new StringBuffer(2000);
  +                    String key;
  +                    Iterator i = postParameters.getParameterNames();
  +                    Iterator values;
  +                    String value;
  +                    boolean first = true;
  +                    while ( i.hasNext() ) {
  +                        key = (String)i.next();
  +                        values = this.postParameters.getParameterValues(key);
  +                        while (values.hasNext() == true) {
  +                            value = SourceUtil.encode((String)values.next());
  +                            if (first == false) buffer.append('&');
  +                            first = false;
  +                            buffer.append(key.toString());
  +                            buffer.append('=');
  +                            buffer.append(value);
  +                        }
  +                    }
  +                    HttpURLConnection httpCon = (HttpURLConnection)connection;
  +                    httpCon.setDoInput(true);
   
  +                    if (buffer.length() > 1) { // only post if we have parameters
  +                        String postString = buffer.toString();
  +                        httpCon.setRequestMethod("POST"); // this is POST
  +                        httpCon.setDoOutput(true);
  +                        httpCon.setRequestProperty("Content-type", "application/x-www-form-urlencoded");
  +
  +                        // A content-length header must be contained in a POST request
  +                        httpCon.setRequestProperty("Content-length", Integer.toString(postString.length()));
  +                        java.io.OutputStream out = new java.io.BufferedOutputStream(httpCon.getOutputStream());
  +                        out.write(postString.getBytes());
  +                        out.close();
  +                    }
  +                    if ("text/html".equals(httpCon.getContentType()) == true) {
  +                        this.isHTMLContent = true;
  +                    }
  +                    input = httpCon.getInputStream();
  +                    this.connection = null; // make sure a new connection is created next time
  +                    return input;
  +                }
  +            }
  +            if ("text/html".equals(this.connection.getContentType()) == true) {
  +                this.isHTMLContent = true;
  +            }
                   input = this.connection.getInputStream();
                   this.connection = null; // make sure a new connection is created next time
               }
               return input;
  -        }catch(FileNotFoundException e){
  +        } catch(FileNotFoundException e) {
               throw new ResourceNotFoundException("Resource not found "
  -                                                + this.systemId);
  +                                                + this.systemId, e);
           }
       }
   
  @@ -267,92 +372,34 @@
        */
       public InputSource getInputSource()
       throws IOException, ProcessingException {
  -        InputSource newObject = new InputSource(this.getInputStream());
  +        InputStream stream = this.getInputStream();
  +        if ( this.isHTMLContent && jtidyAvailable ) {
  +            try {
  +                final Tidy xhtmlconvert = new Tidy();
  +                xhtmlconvert.setXmlOut(true);
  +                xhtmlconvert.setXHTML(true);
  +                xhtmlconvert.setShowWarnings(false);
  +                final org.w3c.dom.Document doc = xhtmlconvert.parseDOM(stream, null);
  +                final StringWriter writer = new StringWriter();
  +                final Transformer transformer;
  +                transformer = transformerFactory.newTransformer();
  +                transformer.setOutputProperties(xmlProperties);
  +                transformer.transform(new DOMSource(doc), new StreamResult(writer));
  +                final String xmlstring = writer.toString();
  +                InputSource newObject = new InputSource(new java.io.StringReader(xmlstring));
  +                newObject.setSystemId(this.systemId);
  +                return newObject;
  +            } catch (Exception ignore) {
  +                // Let someone else worry about what we got . This is as before.
  +                this.refresh();
  +                stream = this.getInputStream();
  +            }
  +        }
  +        InputSource newObject = new InputSource(stream);
           newObject.setSystemId(this.systemId);
           return newObject;
       }
   
  -     public static final char [ ] alphabet = {
  -       'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // 0 to 7
  -       'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // 8 to 15
  -       'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', // 16 to 23
  -       'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', // 24 to 31
  -       'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', // 32 to 39
  -       'o', 'p', 'q', 'r', 's', 't', 'u', 'v', // 40 to 47
  -       'w', 'x', 'y', 'z', '0', '1', '2', '3', // 48 to 55
  -       '4', '5', '6', '7', '8', '9', '+', '/' }; // 56 to 63
  -
  -     /**
  -      * BASE 64 encoding.
  -      * See also RFC 1421
  -      * @since 1.2
  -      */
  -     public static String encodeBASE64 ( String s ) {
  -         return encodeBASE64 ( s.getBytes ( ) );
  -     }
  -
  -     /**
  -      * BASE 64 encoding.
  -      * See also RFC 1421
  -      * @since 1.2
  -      */
  -     public static String encodeBASE64 ( byte [ ] octetString ) {
  -         int bits24;
  -         int bits6;
  -
  -         char [ ] out
  -         = new char [ ( ( octetString.length - 1 ) / 3 + 1 ) * 4 ];
  -
  -         int outIndex = 0;
  -         int i = 0;
  -
  -         while ( ( i + 3 ) <= octetString.length ) {
  -             // store the octets
  -             bits24 = ( octetString [ i++ ] & 0xFF ) << 16;
  -             bits24 |= ( octetString [ i++ ] & 0xFF ) << 8;
  -             bits24 |= ( octetString [ i++ ] & 0xFF ) << 0;
  -
  -             bits6 = ( bits24 & 0x00FC0000 ) >> 18;
  -             out [ outIndex++ ] = alphabet [ bits6 ];
  -             bits6 = ( bits24 & 0x0003F000 ) >> 12;
  -             out [ outIndex++ ] = alphabet [ bits6 ];
  -             bits6 = ( bits24 & 0x00000FC0 ) >> 6;
  -             out [ outIndex++ ] = alphabet [ bits6 ];
  -             bits6 = ( bits24 & 0x0000003F );
  -             out [ outIndex++ ] = alphabet [ bits6 ];
  -         }
  -
  -         if ( octetString.length - i == 2 ) {
  -             // store the octets
  -             bits24 = ( octetString [ i ] & 0xFF ) << 16;
  -             bits24 |= ( octetString [ i + 1 ] & 0xFF ) << 8;
  -
  -             bits6 = ( bits24 & 0x00FC0000 ) >> 18;
  -             out [ outIndex++ ] = alphabet [ bits6 ];
  -             bits6 = ( bits24 & 0x0003F000 ) >> 12;
  -             out [ outIndex++ ] = alphabet [ bits6 ];
  -             bits6 = ( bits24 & 0x00000FC0 ) >> 6;
  -             out [ outIndex++ ] = alphabet [ bits6 ];
  -
  -             // padding
  -             out [ outIndex++ ] = '=';
  -         } else if ( octetString.length - i == 1 ) {
  -             // store the octets
  -             bits24 = ( octetString [ i ] & 0xFF ) << 16;
  -
  -             bits6 = ( bits24 & 0x00FC0000 ) >> 18;
  -             out [ outIndex++ ] = alphabet [ bits6 ];
  -             bits6 = ( bits24 & 0x0003F000 ) >> 12;
  -             out [ outIndex++ ] = alphabet [ bits6 ];
  -
  -             // padding
  -             out [ outIndex++ ] = '=';
  -             out [ outIndex++ ] = '=';
  -         }
  -
  -         return new String ( out );
  -     }
  -
       /**
        * Stream content to a content handler or to an XMLConsumer.
        *
  @@ -361,8 +408,7 @@
        * @throws SAXException if failed to parse source document.
        */
       public void toSAX(ContentHandler handler)
  -        throws SAXException, ProcessingException
  -    {
  +    throws SAXException, ProcessingException {
           Parser parser = null;
           try {
               parser = (Parser)this.manager.lookup(Parser.ROLE);
  @@ -390,7 +436,21 @@
           }
       }
   
  -    public void recycle()
  -    {
  +    public void recycle() {
  +    }
  +
  +    /**
  +     * Set the post parameters
  +     */
  +    public void setPostParameters(SourceParameters pars) {
  +        this.postParameters = pars;
  +    }
  +
  +    /**
  +     * Set the follow redirects flag
  +     */
  +    public void setFollowRedirects(boolean flag) {
  +        this.followRedirects = flag;
       }
  +
   }
  
  
  

----------------------------------------------------------------------
In case of troubles, e-mail:     webmaster@xml.apache.org
To unsubscribe, e-mail:          cocoon-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: cocoon-cvs-help@xml.apache.org