You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by cz...@apache.org on 2002/02/11 10:51:19 UTC
cvs commit: xml-cocoon2/src/java/org/apache/cocoon/components/source URLSource.java
cziegeler 02/02/11 01:51:19
Modified: src/java/org/apache/cocoon/components/source URLSource.java
Added: lib/core avalon-scratchpad-20020205.jar
Log:
Added excalibur scratchpad for start of reintegration components
Revision Changes Path
1.1 xml-cocoon2/lib/core/avalon-scratchpad-20020205.jar
<<Binary file>>
1.5 +165 -105 xml-cocoon2/src/java/org/apache/cocoon/components/source/URLSource.java
Index: URLSource.java
===================================================================
RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/components/source/URLSource.java,v
retrieving revision 1.4
retrieving revision 1.5
diff -u -r1.4 -r1.5
--- URLSource.java 4 Feb 2002 12:31:09 -0000 1.4
+++ URLSource.java 11 Feb 2002 09:51:19 -0000 1.5
@@ -55,12 +55,16 @@
package org.apache.cocoon.components.source;
+import org.apache.avalon.excalibur.source.SourceParameters;
+import org.apache.avalon.excalibur.source.SourceUtil;
import org.apache.avalon.framework.component.ComponentManager;
import org.apache.cocoon.ProcessingException;
import org.apache.cocoon.ResourceNotFoundException;
import org.apache.cocoon.components.parser.Parser;
import org.apache.cocoon.environment.ModifiableSource;
+import org.apache.cocoon.util.ClassUtils;
import org.apache.cocoon.xml.XMLConsumer;
+import org.w3c.tidy.Tidy;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
@@ -71,19 +75,52 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
+import java.io.StringWriter;
import java.lang.reflect.Method;
-import java.net.URL;
-import java.net.URLConnection;
+import java.net.*;
+import java.util.Iterator;
+import java.util.Properties;
+import javax.xml.transform.OutputKeys;
+import javax.xml.transform.Transformer;
+import javax.xml.transform.TransformerFactory;
+import javax.xml.transform.dom.DOMSource;
+import javax.xml.transform.stream.StreamResult;
/**
* Description of a source which is described by an URL.
*
* @author <a href="mailto:cziegeler@apache.org">Carsten Ziegeler</a>
- * @version CVS $Id: URLSource.java,v 1.4 2002/02/04 12:31:09 cziegeler Exp $
+ * @version CVS $Id: URLSource.java,v 1.5 2002/02/11 09:51:19 cziegeler Exp $
*/
public class URLSource implements ModifiableSource {
+ /** Is JTidy available? */
+ private static boolean jtidyAvailable;
+
+ /** Properties used for converting HTML to XML */
+ private static Properties xmlProperties;
+
+ /** The TrAX factory for serializing xml */
+ public static TransformerFactory transformerFactory = TransformerFactory.newInstance();
+
+ /**
+ * Test if JTidy is available
+ */
+ static {
+ jtidyAvailable = false;
+ try {
+ Class jtidy = ClassUtils.loadClass("org.w3c.tidy.Tidy");
+ if ( null != jtidy ) jtidyAvailable = true;
+ } catch (ClassNotFoundException cnfe) {
+ // ignore
+ }
+ xmlProperties = new Properties();
+ xmlProperties.put(OutputKeys.METHOD, "xml");
+ xmlProperties.put(OutputKeys.OMIT_XML_DECLARATION, "no");
+ }
+
+
/** Identifier for file urls */
private final String FILE = "file:";
@@ -93,6 +130,9 @@
/** The content length */
private long contentLength;
+ /** Is the content html or xml? */
+ private boolean isHTMLContent = false;
+
/** The system id */
private String systemId;
@@ -111,6 +151,12 @@
/** The ComponentManager needed for streaming */
private ComponentManager manager;
+ /** The <code>SourceParameters</code> for post */
+ private SourceParameters postParameters;
+
+ /** Follow Redirects ? */
+ private boolean followRedirects = true;
+
/**
* Construct a new object
*/
@@ -119,6 +165,11 @@
this.manager = manager;
this.systemId = url.toExternalForm();
this.isFile = systemId.startsWith(FILE);
+ if (this.isFile == true) {
+ if (systemId.endsWith(".htm") || systemId.endsWith(".html")) {
+ this.isHTMLContent = true;
+ }
+ }
this.url = url;
this.gotInfos = false;
}
@@ -134,19 +185,25 @@
this.lastModificationDate = file.lastModified();
this.contentLength = file.length();
} else {
- try {
- if (this.connection == null) {
- this.connection = this.url.openConnection();
- String userInfo = this.getUserInfo();
- if (this.url.getProtocol().startsWith("http") && userInfo != null) {
- this.connection.setRequestProperty("Authorization","Basic "+this.encodeBASE64(userInfo));
+ if (this.postParameters == null) {
+ try {
+ if (this.connection == null) {
+ this.connection = this.url.openConnection();
+ String userInfo = this.getUserInfo();
+ if (this.url.getProtocol().startsWith("http") && userInfo != null) {
+ this.connection.setRequestProperty("Authorization","Basic "+SourceUtil.encodeBASE64(userInfo));
+ }
}
+ this.lastModificationDate = this.connection.getLastModified();
+ this.contentLength = this.connection.getContentLength();
+ } catch (IOException ignore) {
+ this.lastModificationDate = 0;
+ this.contentLength = -1;
}
- this.lastModificationDate = this.connection.getLastModified();
- this.contentLength = this.connection.getContentLength();
- } catch (IOException ignore) {
- this.lastModificationDate = 0;
- this.contentLength = -1;
+ } else {
+ // do not open connection when using post!
+ this.lastModificationDate = 0;
+ this.contentLength = -1;
}
}
this.gotInfos = true;
@@ -183,7 +240,7 @@
this.getInfos();
try{
InputStream input = null;
- if (this.isFile) {
+ if ( this.isFile ) {
input = new FileInputStream(this.systemId.substring(FILE.length()));
} else {
if (this.connection == null) {
@@ -191,17 +248,65 @@
/* The following requires a jdk 1.3 */
String userInfo = this.getUserInfo();
if (this.url.getProtocol().startsWith("http") && userInfo != null) {
- this.connection.setRequestProperty("Authorization","Basic "+encodeBASE64(userInfo));
- }
+ this.connection.setRequestProperty("Authorization","Basic "+SourceUtil.encodeBASE64(userInfo));
}
+ if (this.followRedirects == false && this.connection instanceof HttpURLConnection) {
+ ((HttpURLConnection)connection).setInstanceFollowRedirects(false);
+ }
+ // do a post operation
+ if (this.connection instanceof HttpURLConnection
+ && this.postParameters != null) {
+ StringBuffer buffer = new StringBuffer(2000);
+ String key;
+ Iterator i = postParameters.getParameterNames();
+ Iterator values;
+ String value;
+ boolean first = true;
+ while ( i.hasNext() ) {
+ key = (String)i.next();
+ values = this.postParameters.getParameterValues(key);
+ while (values.hasNext() == true) {
+ value = SourceUtil.encode((String)values.next());
+ if (first == false) buffer.append('&');
+ first = false;
+ buffer.append(key.toString());
+ buffer.append('=');
+ buffer.append(value);
+ }
+ }
+ HttpURLConnection httpCon = (HttpURLConnection)connection;
+ httpCon.setDoInput(true);
+ if (buffer.length() > 1) { // only post if we have parameters
+ String postString = buffer.toString();
+ httpCon.setRequestMethod("POST"); // this is POST
+ httpCon.setDoOutput(true);
+ httpCon.setRequestProperty("Content-type", "application/x-www-form-urlencoded");
+
+ // A content-length header must be contained in a POST request
+ httpCon.setRequestProperty("Content-length", Integer.toString(postString.length()));
+ java.io.OutputStream out = new java.io.BufferedOutputStream(httpCon.getOutputStream());
+ out.write(postString.getBytes());
+ out.close();
+ }
+ if ("text/html".equals(httpCon.getContentType()) == true) {
+ this.isHTMLContent = true;
+ }
+ input = httpCon.getInputStream();
+ this.connection = null; // make sure a new connection is created next time
+ return input;
+ }
+ }
+ if ("text/html".equals(this.connection.getContentType()) == true) {
+ this.isHTMLContent = true;
+ }
input = this.connection.getInputStream();
this.connection = null; // make sure a new connection is created next time
}
return input;
- }catch(FileNotFoundException e){
+ } catch(FileNotFoundException e) {
throw new ResourceNotFoundException("Resource not found "
- + this.systemId);
+ + this.systemId, e);
}
}
@@ -267,92 +372,34 @@
*/
public InputSource getInputSource()
throws IOException, ProcessingException {
- InputSource newObject = new InputSource(this.getInputStream());
+ InputStream stream = this.getInputStream();
+ if ( this.isHTMLContent && jtidyAvailable ) {
+ try {
+ final Tidy xhtmlconvert = new Tidy();
+ xhtmlconvert.setXmlOut(true);
+ xhtmlconvert.setXHTML(true);
+ xhtmlconvert.setShowWarnings(false);
+ final org.w3c.dom.Document doc = xhtmlconvert.parseDOM(stream, null);
+ final StringWriter writer = new StringWriter();
+ final Transformer transformer;
+ transformer = transformerFactory.newTransformer();
+ transformer.setOutputProperties(xmlProperties);
+ transformer.transform(new DOMSource(doc), new StreamResult(writer));
+ final String xmlstring = writer.toString();
+ InputSource newObject = new InputSource(new java.io.StringReader(xmlstring));
+ newObject.setSystemId(this.systemId);
+ return newObject;
+ } catch (Exception ignore) {
+ // Let someone else worry about what we got . This is as before.
+ this.refresh();
+ stream = this.getInputStream();
+ }
+ }
+ InputSource newObject = new InputSource(stream);
newObject.setSystemId(this.systemId);
return newObject;
}
- public static final char [ ] alphabet = {
- 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', // 0 to 7
- 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', // 8 to 15
- 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', // 16 to 23
- 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', // 24 to 31
- 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', // 32 to 39
- 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', // 40 to 47
- 'w', 'x', 'y', 'z', '0', '1', '2', '3', // 48 to 55
- '4', '5', '6', '7', '8', '9', '+', '/' }; // 56 to 63
-
- /**
- * BASE 64 encoding.
- * See also RFC 1421
- * @since 1.2
- */
- public static String encodeBASE64 ( String s ) {
- return encodeBASE64 ( s.getBytes ( ) );
- }
-
- /**
- * BASE 64 encoding.
- * See also RFC 1421
- * @since 1.2
- */
- public static String encodeBASE64 ( byte [ ] octetString ) {
- int bits24;
- int bits6;
-
- char [ ] out
- = new char [ ( ( octetString.length - 1 ) / 3 + 1 ) * 4 ];
-
- int outIndex = 0;
- int i = 0;
-
- while ( ( i + 3 ) <= octetString.length ) {
- // store the octets
- bits24 = ( octetString [ i++ ] & 0xFF ) << 16;
- bits24 |= ( octetString [ i++ ] & 0xFF ) << 8;
- bits24 |= ( octetString [ i++ ] & 0xFF ) << 0;
-
- bits6 = ( bits24 & 0x00FC0000 ) >> 18;
- out [ outIndex++ ] = alphabet [ bits6 ];
- bits6 = ( bits24 & 0x0003F000 ) >> 12;
- out [ outIndex++ ] = alphabet [ bits6 ];
- bits6 = ( bits24 & 0x00000FC0 ) >> 6;
- out [ outIndex++ ] = alphabet [ bits6 ];
- bits6 = ( bits24 & 0x0000003F );
- out [ outIndex++ ] = alphabet [ bits6 ];
- }
-
- if ( octetString.length - i == 2 ) {
- // store the octets
- bits24 = ( octetString [ i ] & 0xFF ) << 16;
- bits24 |= ( octetString [ i + 1 ] & 0xFF ) << 8;
-
- bits6 = ( bits24 & 0x00FC0000 ) >> 18;
- out [ outIndex++ ] = alphabet [ bits6 ];
- bits6 = ( bits24 & 0x0003F000 ) >> 12;
- out [ outIndex++ ] = alphabet [ bits6 ];
- bits6 = ( bits24 & 0x00000FC0 ) >> 6;
- out [ outIndex++ ] = alphabet [ bits6 ];
-
- // padding
- out [ outIndex++ ] = '=';
- } else if ( octetString.length - i == 1 ) {
- // store the octets
- bits24 = ( octetString [ i ] & 0xFF ) << 16;
-
- bits6 = ( bits24 & 0x00FC0000 ) >> 18;
- out [ outIndex++ ] = alphabet [ bits6 ];
- bits6 = ( bits24 & 0x0003F000 ) >> 12;
- out [ outIndex++ ] = alphabet [ bits6 ];
-
- // padding
- out [ outIndex++ ] = '=';
- out [ outIndex++ ] = '=';
- }
-
- return new String ( out );
- }
-
/**
* Stream content to a content handler or to an XMLConsumer.
*
@@ -361,8 +408,7 @@
* @throws SAXException if failed to parse source document.
*/
public void toSAX(ContentHandler handler)
- throws SAXException, ProcessingException
- {
+ throws SAXException, ProcessingException {
Parser parser = null;
try {
parser = (Parser)this.manager.lookup(Parser.ROLE);
@@ -390,7 +436,21 @@
}
}
- public void recycle()
- {
+ public void recycle() {
+ }
+
+ /**
+ * Set the post parameters
+ */
+ public void setPostParameters(SourceParameters pars) {
+ this.postParameters = pars;
+ }
+
+ /**
+ * Set the follow redirects flag
+ */
+ public void setFollowRedirects(boolean flag) {
+ this.followRedirects = flag;
}
+
}
----------------------------------------------------------------------
In case of troubles, e-mail: webmaster@xml.apache.org
To unsubscribe, e-mail: cocoon-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: cocoon-cvs-help@xml.apache.org