You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by bu...@apache.org on 2005/01/25 08:55:20 UTC
cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network AdvancedInputStream.java Authenticator.java BaseResourceRequest.java BlockingResourceRequest.java DOCUMENTATION DataEvent.java HTTPClientNetworkResource.java MalformedResourceException.java NetworkEventListener.java NetworkException.java ResourceRequest.java ResourceRequestFactory.java URLCookieManager.java URLResolver.java URLResourceRequest.java
burton 2005/01/24 23:55:20
Added: feedparser/src/java/org/apache/commons/feedparser/network
AdvancedInputStream.java Authenticator.java
BaseResourceRequest.java
BlockingResourceRequest.java DOCUMENTATION
DataEvent.java HTTPClientNetworkResource.java
MalformedResourceException.java
NetworkEventListener.java NetworkException.java
ResourceRequest.java ResourceRequestFactory.java
URLCookieManager.java URLResolver.java
URLResourceRequest.java
Log:
init of network API into feedparser......
Revision Changes Path
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/AdvancedInputStream.java
Index: AdvancedInputStream.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.*;
import java.net.*;
import java.util.*;
/**
*
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
* @version $Id: AdvancedInputStream.java,v 1.1 2005/01/25 07:55:19 burton Exp $
*/
public class AdvancedInputStream extends InputStream {
private InputStream is = null;
private BaseResourceRequest request = null;
/**
*
* Create a new <code>AdvancedInputStream</code> instance.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public AdvancedInputStream( InputStream is, ResourceRequest request ) {
this.is = is;
this.request = (BaseResourceRequest)request;
}
public int read() throws IOException {
int v = is.read();
request.fireDataEvent( 1 ); //one byte is read
return v;
}
public int read( byte b[] ) throws IOException {
int v = is.read( b );
request.fireDataEvent( b.length );
return v;
}
public int read( byte b[], int off, int len ) throws IOException {
int v = is.read( b, off, len );
request.fireDataEvent( len );
return v;
}
public long skip( long n ) throws IOException {
long v = is.skip( n );
request.fireDataEvent( n );
return v;
}
//non-read related
public int available() throws IOException {
return is.available();
}
public void close() throws IOException {
is.close();
request.fireOnClosed();
}
public void mark(int readlimit) {
is.mark( readlimit );
}
public void reset() throws IOException {
is.reset();
}
public boolean markSupported() {
return is.markSupported();
}
}
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/Authenticator.java
Index: Authenticator.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.*;
import java.util.*;
import java.net.PasswordAuthentication;
/**
*
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
* @version $Id: Authenticator.java,v 1.1 2005/01/25 07:55:19 burton Exp $
*/
public class Authenticator extends java.net.Authenticator {
String username = null;
String password = null;
public Authenticator() { }
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication ( username, password.toCharArray() );
}
}
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/BaseResourceRequest.java
Index: BaseResourceRequest.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.*;
import java.net.*;
import java.util.*;
/**
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
* @version $Id: BaseResourceRequest.java,v 1.1 2005/01/25 07:55:19 burton Exp $
*/
public abstract class BaseResourceRequest implements ResourceRequest {
public static boolean FOLLOW_REDIRECTS = true;
private String resource = null;
private DataEvent event = new DataEvent();
private long _ifModifiedSince = -1;
private long _responseCode = HttpURLConnection.HTTP_OK;
private String _etag = null;
private byte[] data = new byte[0];
private boolean localCache = false;
private boolean followRedirects = FOLLOW_REDIRECTS;
/**
* A single resource request can now have a given event listener.
*/
private NetworkEventListener eventListener = null;
private HashMap requestHeaders = new HashMap();
/**
*
* Get the value of <code>resource</code>.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public String getResource() {
return this.resource;
}
/**
*
* Set the value of <code>resource</code>.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public void setResource( String resource ) {
this.resource = resource;
}
/**
* Fire a new ArchiveEvent
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
*/
public void fireDataEvent( long count ) {
event.count = count;
event.resource = resource;
fireDataEvent( event );
}
public void fireInit() {
DataEvent event = new DataEvent();
event.request = this;
Iterator i = ResourceRequestFactory.getNetworkEventListeners();
while ( i.hasNext() ) {
((NetworkEventListener)i.next()).init( event );
}
if ( eventListener != null )
eventListener.init( event );
}
/**
* Fire a new ArchiveEvent
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
*/
public void fireDataEvent( DataEvent event ) {
event.request = this;
Iterator i = ResourceRequestFactory.getNetworkEventListeners();
while ( i.hasNext() ) {
((NetworkEventListener)i.next()).dataEvent( event );
}
if ( eventListener != null )
eventListener.dataEvent( event );
}
public void fireOnClosed() {
Iterator i = ResourceRequestFactory.getNetworkEventListeners();
while ( i.hasNext() ) {
((NetworkEventListener)i.next()).onClosed();
}
if ( eventListener != null )
eventListener.onClosed();
}
/**
* @see ResourceRequest
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public String getInputStreamAsString() throws IOException {
return new String( getInputStreamAsByteArray() );
}
/**
* @see ResourceRequest
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public byte[] getInputStreamAsByteArray() throws IOException {
InputStream is = getInputStream();
int contentLength = -1;
try {
contentLength = getContentLength() + 5000;
} catch ( IOException e ) { e.printStackTrace(); }
if ( contentLength == -1 ) {
//use a larger default than what's provided with the
//ByteArrayOutputStream
contentLength = 100000;
}
//include length of content from the original site with contentLength
ByteArrayOutputStream bos = new ByteArrayOutputStream( contentLength );
//now process the Reader...
byte data[] = new byte[200];
int readCount = 0;
while( ( readCount = is.read( data )) > 0 ) {
bos.write( data, 0, readCount );
}
is.close();
bos.close();
return bos.toByteArray();
}
/**
* @see ResourceRequest
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public InputStream getLocalInputStream() throws NetworkException {
try {
byte[] data;
if ( this.data.length > 0 ) {
//we have cached this... return the cached value.
data = this.data;
} else {
data = getInputStreamAsByteArray();
if ( localCache )
this.data = data;
}
return new ByteArrayInputStream( data );
} catch ( NetworkException n ) {
throw n;
} catch ( Throwable t ) {
throw new NetworkException( t );
}
}
public byte[] getLocalInputStreamAsByteArray() throws IOException {
//FIXME: this needs to use the cache.
return this.data;
}
public void setLocalCache( boolean v ) {
this.localCache = v;
}
/**
* Copy this resource request to the given OutputStream
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public void toOutputStream( OutputStream out ) throws IOException {
InputStream is = getInputStream();
//now process the Reader...
byte data[] = new byte[200];
int readCount = 0;
while( ( readCount = is.read( data )) > 0 ) {
out.write( data, 0, readCount );
}
is.close();
}
public long getIfModifiedSince() {
return _ifModifiedSince;
}
public void setIfModifiedSince( long ifModifiedSince ) {
this._ifModifiedSince = ifModifiedSince;
}
public String getEtag() {
return _etag;
}
public void setEtag( String etag ) {
this._etag = etag;
}
/**
* Get and set an HTTP style response code. Only used with HTTP URLs.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public long getResponseCode() {
return this._responseCode;
}
public void setResponseCode( int responseCode ) {
this._responseCode = responseCode;
}
public int getContentLength() throws IOException {
return -1;
}
public void setEventListener( NetworkEventListener eventListener ) {
this.eventListener = eventListener;
}
public String getHeaderField( String name ) {
//default impl always returns null
return null;
}
public void setRequestHeaderField( String name, String value ) {
requestHeaders.put( name, value );
}
public Iterator getRequestHeaderFields() {
return requestHeaders.keySet().iterator();
}
public String getRequestHeaderField( String name ) {
return (String)requestHeaders.get( name );
}
public void setRequestMethod( String method ) throws NetworkException {
throw new NetworkException( "not implemented" );
}
public boolean getFollowRedirects() {
return followRedirects;
}
public void setFollowRedirects( boolean v ) {
this.followRedirects = v;
}
public String getResourceFromRedirect() {
return getResource();
}
}
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/BlockingResourceRequest.java
Index: BlockingResourceRequest.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.*;
import java.net.*;
import java.util.*;
/**
* SCHEME - block://localhost/?duration=100
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
* @version $Id: BlockingResourceRequest.java,v 1.1 2005/01/25 07:55:19 burton Exp $
*/
public class BlockingResourceRequest extends BaseResourceRequest implements ResourceRequest {
public static final String SCHEME = "block";
/**
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public void init() throws IOException {
String resource = getResource();
}
/**
*
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public InputStream getInputStream() throws IOException {
try {
Thread.sleep( 100 );
return null;
} catch ( Throwable t ) {
IOException e = new IOException( t.getMessage() );
e.initCause( t );
throw e;
}
}
public static void main( String[] args ) {
try {
} catch ( Throwable t ) {
t.printStackTrace();
}
}
}
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/DOCUMENTATION
Index: DOCUMENTATION
===================================================================
** FIXME **
- What are we going to call this package?
- Jakarta Netto ?
- httplib?
- What about the transparent HTCache work?
- Look at all the imports and make SURE its isolated
- Might have to move the event package over.
** DOCUMENTATION **
- InputStream utilities
- String content
- byte array
- Local cached input stream
- Initial support for Digest and Basic authentication and an API to support
this.
- Support for easy and common HTTP custimization (changing UserAgent, etc)
- Identical default behavior between URL and HTTPClient implementations
- Support for session cookies when java.net.URL is used. (Still being
implemented)
- No infinite timeouts per the default java.net.URL. We set default timeouts to
5 minutes instead of infiniity.
- NetworkException instead of generic IOException. This allows you to use
network IO alongside fileIO and note the difference in try/catch blocks. This
also throws the 'cause' of the exception which IOException does not.
- Download progress via event listeners. Currently most network IO packages are
just black boxes. We support blocking IO which event callbacks during reads.
- HTTP status codes in exceptions. java.net.URL doesn't support this.
- Support for Jakarta HttpClient and java.net.URL as IO implementations. While
java.net.URL is reliable we believe that Jakarta HttpClient is very compelling.
- Support for a future async IO API... While blocking IO has proven reliable we
want to provide an async IO layer which uses event callbacks.
- Simple ResourceRequest API which has no requirements on internal JRE classes.
- Provide portability to migrate away from non-free software (java.net.URL)
- Easy URI scheme registration IO handlers. One can now create URI base IO
handlers and read from them. This comes in handy for adhoc scheme creation.
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/DataEvent.java
Index: DataEvent.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.*;
import java.net.*;
import java.util.*;
import java.text.DateFormat;
/**
* Represents a data request
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
* @version $Id: DataEvent.java,v 1.1 2005/01/25 07:55:19 burton Exp $
*/
public class DataEvent {
/**
* The amount of bytes read.
*/
public long count = 0;
/**
* The resource URL.
*/
public String resource = null;
public ResourceRequest request = null;
}
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/HTTPClientNetworkResource.java
Index: HTTPClientNetworkResource.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.*;
import java.net.*;
import java.util.*;
import org.apache.commons.httpclient.*;
import org.apache.commons.httpclient.methods.*;
/**
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
* @version $Id: HTTPClientNetworkResource.java,v 1.1 2005/01/25 07:55:19 burton Exp $
*/
public class HTTPClientNetworkResource extends BaseResourceRequest implements ResourceRequest {
public static final int TIMEOUT = 3 * 1000 * 60;
public static HttpConnectionManager hcm = new HttpConnectionManager();
private HttpConnection conn = null;
/**
*
* Create a new <code>URLNetworkResource</code> instance.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public void init() throws IOException {
try {
this.conn = hcm.getConnection( getResource() );
if ( this.conn.isOpen() == false ) {
//set socket timeout...
this.conn.setSoTimeout( TIMEOUT );
}
} catch ( HttpException e ) {
IOException e1 = new IOException( e.getMessage() );
e1.initCause( e );
throw e1;
}
}
/**
*
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public InputStream getInputStream() throws IOException {
// try {
// //now get the method so that we can execute it.
// HttpMethod method = new GetMethod( new HttpMethod( getResource() ).getPath() );
// method.setFollowRedirects( true );
// int result = method.execute( new HttpState(), conn );
// //FIXME: 302 isn't being followed.
// //302 redirect
// if ( method.getStatusCode() == HttpURLConnection.HTTP_MOVED_TEMP ) {
// //technically we shouldn't get this.. but if we do
// String location = method.getResponseHeader( "Location" ).getValue();
// Log.message( "Redirect: " + location );
// HTTPClientNetworkResource redirect = new HTTPClientNetworkResource();
// redirect.setResource( location );
// redirect.init();
// return redirect.getInputStream();
// }
// // when should we throw an exception? 404?
// if ( method.getStatusCode() != 200 ) {
// throw new IOException( "HTTP " + method.getStatusCode() + " - " + method.getStatusText() );
// }
// InputStream is = new AdvancedInputStream( method.getResponseBodyAsStream(), this );
// return is;
// } catch ( HttpException e ) {
// throw new IOException( e.getMessage() );
// } finally {
// //we have to release this connection when done or else we run out of them.
// hcm.releaseConnection( conn );
// }
//NOTE: the above code does not compile and we are not using this code
//so returning null is acceptable.
return null;
}
}
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/MalformedResourceException.java
Index: MalformedResourceException.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.IOException;
import java.net.*;
/**
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
* @version $Id: MalformedResourceException.java,v 1.1 2005/01/25 07:55:19 burton Exp $
*/
public class MalformedResourceException extends NetworkException {
/**
*
* Create a new <code>MalformedResourceException</code> instance.
*
* @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
*/
public MalformedResourceException( String message ) {
super( message );
}
}
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/NetworkEventListener.java
Index: NetworkEventListener.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.*;
import java.net.*;
import java.util.*;
import java.text.DateFormat;
/**
* An AggregationEventListener provides event dispatch from the core Aggregation
* class to implementors of the AggregationEventListener interface.
*
* Note that all NetworkEventListeners should be threadsafe.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
* @version $Id: NetworkEventListener.java,v 1.1 2005/01/25 07:55:19 burton Exp $
*/
public interface NetworkEventListener extends EventListener {
/**
* Called once per request.
*
* @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
*/
public void init( DataEvent event );
public void dataEvent( DataEvent event );
/**
* Called when this stream is closed.
*/
public void onClosed();
}
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/NetworkException.java
Index: NetworkException.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.IOException;
import java.net.*;
/**
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
* @version $Id: NetworkException.java,v 1.1 2005/01/25 07:55:19 burton Exp $
*/
public class NetworkException extends IOException {
private ResourceRequest request = null;
public Exception e = null;
private URL _url = null;
private URLConnection _urlConnection = null;
private int responseCode = -1;
/**
*
* Create a new <code>NetworkException</code> instance.
*
* @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
*/
public NetworkException( String message ) {
super( message );
}
public NetworkException( Throwable t) {
super( t.getMessage() );
}
/**
*
* Create a new <code>NetworkException</code> instance.
*
* @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
*/
public NetworkException( String message,
Exception e,
ResourceRequest request,
URL _url,
URLConnection _urlConnection ) {
super( message ); //why doesn't java.io.IOException support nesting?
this.e = e;
this.request = request;
this._url = _url;
this._urlConnection = _urlConnection;
initCause( e );
}
/**
*
* Create a new <code>NetworkException</code> instance.
*
* @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
*/
public NetworkException( Exception e,
ResourceRequest request,
URL _url,
URLConnection _urlConnection ) {
super( e.getMessage() ); //why doesn't java.io.IOException support nesting?
this.e = e;
this.request = request;
this._url = _url;
this._urlConnection = _urlConnection;
initCause( e );
}
public ResourceRequest getResourceRequest() {
return request;
}
public URL getURL() {
return _url;
}
public URLConnection getURLConnection() {
return _urlConnection;
}
public Exception getException() {
return e;
}
public int getResponseCode() {
if ( _urlConnection == null ) {
return -1;
}
if ( responseCode == -1 ) {
//parse the exception
String status = (String)_urlConnection.getHeaderField( null );
if ( status == null ) {
return -1;
}
int begin = "HTTP/1.1 ".length();
int offset = "200".length();
int end = begin + offset;
responseCode = Integer.parseInt( status.substring( begin, end ) );
}
return responseCode;
}
}
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/ResourceRequest.java
Index: ResourceRequest.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.*;
import java.net.*;
import java.util.*;
/**
* A ResourceRequest is a generic interface to a network resource such as an
* HTTP URL.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
* @version $Id: ResourceRequest.java,v 1.1 2005/01/25 07:55:19 burton Exp $
*/
public interface ResourceRequest {
/**
* Perform all initialization and connection to the remote server. This
* should always be called BEFORE network getInputStream() if you want to
* perform other operations first. When using a HEAD request this must be
* used and not getInputStream()
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public void init() throws IOException;
/**
* Get an input stream for this content.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public InputStream getInputStream() throws IOException;
/**
* Set the resource for this request.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public String getResource();
public void setResource( String resource );
/**
* Get the resource but make sure all redirects are taken into
* consideration.
*
* @author <a href="mailto:burton@rojo.com">Kevin A. Burton</a>
*/
public String getResourceFromRedirect();
/**
* Get the given Input Stream as a String by calling read() until we have
* all the data locally.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public String getInputStreamAsString() throws IOException;
public byte[] getInputStreamAsByteArray() throws IOException;
public InputStream getLocalInputStream() throws NetworkException;
public byte[] getLocalInputStreamAsByteArray() throws IOException;
/**
* When true we cache getLocalInputStream() so that multiple requests are
* returned from local data. Provides more flexibility but uses more
* memory.
*/
public void setLocalCache( boolean v );
/**
* Copy this input stream to an OutputStream
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public void toOutputStream( OutputStream out ) throws IOException;
/**
* Set the If-Modified-Since header for HTTP URL connections and protocols
* that support similar operation.
*
* A value of -1 means do not use the If-Modified-Since header
*
* Fri Jun 06 2003 08:34 PM (burton@peerfear.org): Currently just URLResourceRequest
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public long getIfModifiedSince();
public void setIfModifiedSince( long ifModifiedSince );
/**
* The HTTP ETag to use with If-None-Match
*
* @author <a href="mailto:burton@rojo.com">Kevin A. Burton</a>
*/
public String getEtag();
public void setEtag( String etag );
/**
* Get and set an HTTP style response code. Only used with HTTP URLs.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public long getResponseCode();
public void setResponseCode( int responseCode );
/**
* Return the conent length of this request or -1 if not known.
*
* @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
*/
public int getContentLength() throws IOException;
public void setEventListener( NetworkEventListener eventListener );
/**
* Get a given response header.
*
* @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
*/
public String getHeaderField( String name );
/**
* Set a given request header such as UserAgent, ETag, etc.
*
* @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
*/
public void setRequestHeaderField( String name, String value );
/**
* Get the names of all set request headers.
*
* @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
*/
public Iterator getRequestHeaderFields();
public String getRequestHeaderField( String name );
public void setRequestMethod( String method ) throws NetworkException;
public boolean getFollowRedirects();
public void setFollowRedirects( boolean v );
}
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/ResourceRequestFactory.java
Index: ResourceRequestFactory.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.*;
import java.net.*;
import java.util.*;
import org.apache.log4j.*;
/**
* Get a ResourceRequest for a given URL. The request is handled based on the
* URL.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
* @version $Id: ResourceRequestFactory.java,v 1.1 2005/01/25 07:55:19 burton Exp $
*/
public class ResourceRequestFactory {
private static Logger log = Logger.getLogger( ResourceRequestFactory.class.getName() );
/**
* Specified in java.security to indicate the caching policy for successful
* name lookups from the name service.. The value is specified as as integer
* to indicate the number of seconds to cache the successful lookup.
*
*
* sun.net.inetaddr.ttl:
*
* This is a sun private system property which corresponds to
* networkaddress.cache.ttl. It takes the same value and has the same meaning,
* but can be set as a command-line option. However, the preferred way is to
* use the security property mentioned above.
*
* A value of -1 indicates "cache forever".
*/
public static int NETWORKADDRESS_CACHE_TTL = 5 * 60 * 1000;
/**
* These properties specify the default connect and read timeout (resp.) for
* the protocol handler used by java.net.URLConnection.
*
* sun.net.client.defaultConnectTimeout specifies the timeout (in
* milliseconds) to establish the connection to the host. For example for
* http connections it is the timeout when establishing the connection to
* the http server. For ftp connection it is the timeout when establishing
* the connection to ftp servers.
*
* sun.net.client.defaultReadTimeout specifies the timeout (in milliseconds)
* when reading from input stream when a connection is established to a
* resource.
*/
public static int DEFAULT_CONNECT_TIMEOUT = 1 * 60 * 1000;
public static int DEFAULT_READ_TIMEOUT = DEFAULT_CONNECT_TIMEOUT;
//FIXME: (should this be a linked list?)
private static ArrayList listeners = new ArrayList( 30 );
private static HashMap schemeMap = null;
private static boolean transparentHTCacheEnabled = false;
/**
* When offline we either throw an exception or return content from the
* cache directly. This can be used to run code that does not depend on the
* network.
*/
private static boolean offline = false;
public static ResourceRequest getResourceRequest( String resource,
long ifModifiedSince ) throws NetworkException {
return getResourceRequest( resource, ifModifiedSince, null );
}
/**
* Get a ResourceRequest for the protocol represented in the resource URL.
* It is important that we use a ResourceRequest implementation that supports
* fetching the URL.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public static ResourceRequest getResourceRequest( String resource,
long ifModifiedSince,
String etag ) throws NetworkException {
//log.debug( resource );
//make sure we are initialized correctly.
ResourceRequestFactory.init();
//make sure we have an index..
int schemeIndex = resource.indexOf( ":" );
if ( schemeIndex == -1 )
throw new NetworkException( "Unknown scheme: '" + resource + "'" );
String scheme = resource.substring( 0, schemeIndex );
if ( scheme == null || scheme.equals( "" ) )
throw new MalformedResourceException( "Not supported: " + resource );
Class clazz = (Class)schemeMap.get( scheme );
if ( clazz == null ) {
throw new MalformedResourceException( "Scheme not supported: " + scheme );
}
try {
ResourceRequest request = (ResourceRequest)clazz.newInstance();
request.setResource( resource );
//setup resource request options.
request.setIfModifiedSince( ifModifiedSince );
//set the etag... when its null nothing will happen
request.setEtag( etag );
request.init();
return request;
} catch ( Throwable t ) {
throw new NetworkException( t );
}
}
/**
*
* @see getResourceRequest( String )
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static ResourceRequest getResourceRequest( String resource ) throws NetworkException {
return getResourceRequest( resource, -1 );
}
/**
*
* @see getResourceRequest( String )
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static ResourceRequest getResourceRequest( URL resource ) throws NetworkException {
return getResourceRequest( resource.toString() );
}
/**
* Add an event listener to this instance of the factory. This provides a
* mechanism to give default listeners to each new ResourceRequest.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public static void addEventListener( NetworkEventListener listener ) {
listeners.add( listener );
}
/**
* Get all event listeners.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static Iterator getNetworkEventListeners() {
return listeners.iterator();
}
/**
* Make sure the factory is initialized. Called once per JVM instance.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
private synchronized static void init() {
//set the authenticator to use
//FIXME: remove this until we figure out how to do proxy authentication.
//java.net.Authenticator.setDefault ( new Authenticator() );
// A full list of properties is available here:
// http://java.sun.com/j2se/1.4.2/docs/guide/net/properties.html
System.setProperty( "sun.net.inetaddr.ttl",
Integer.toString( NETWORKADDRESS_CACHE_TTL ) );
System.setProperty( "sun.net.client.defaultReadTimeout",
Integer.toString( DEFAULT_READ_TIMEOUT ) );
System.setProperty( "sun.net.client.defaultConnectTimeout",
Integer.toString( DEFAULT_CONNECT_TIMEOUT ) );
if ( schemeMap == null ) {
schemeMap = new HashMap();
schemeMap.put( "file", URLResourceRequest.class );
schemeMap.put( "http", URLResourceRequest.class );
schemeMap.put( "https", URLResourceRequest.class );
//schemeMap.put( "sitefilter", SiteFilterRequest.class );
schemeMap.put( BlockingResourceRequest.SCHEME, BlockingResourceRequest.class );
}
}
/**
* Return true if we support fetching content with the given scheme.
* Examples would be "http" and "file"
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static boolean isSupportedScheme( String scheme ) {
return schemeMap.get( scheme ) != null;
}
/**
* When the transparent HTCache is enabled we will keep content local
* similar to the Mozilla cache and return the cached copy and use
* if-Modified-Since when necessary.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static void setTransparentHTCacheEnabled( boolean enabled ) {
transparentHTCacheEnabled = enabled;
}
/**
* Return true if we can enable the htcache.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static boolean isTransparentHTCacheEnabled() {
return transparentHTCacheEnabled;
}
/**
* Enable/disable offline operation.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static void setOffline( boolean offline ) {
ResourceRequestFactory.offline = offline;
}
/**
*
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static boolean isOffline() {
return offline;
}
}
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/URLCookieManager.java
Index: URLCookieManager.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.*;
import java.net.*;
import java.util.*;
import java.util.zip.*;
/**
*
* By default java.net.URL does NOT handle cookies. This is a simple extension
* that allows us to persist cookies in the VM during runtime.
*
* FIXME: How can we make sure to delete older sites...?! no need for this to
* grow to infinite size.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public class URLCookieManager {
static Hashtable cookies = new Hashtable();
/**
* Get the cookies for a site. When none are available return null.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static Hashtable getCookies( String site ) {
return (Hashtable)cookies.get( site );
}
/**
* Add cookies to this request and perform any other init.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static void init( ResourceRequest request ) {
String site = getSite( request );
Hashtable cookies = getCookies( site );
if ( cookies == null )
return;
String header = getCookiesHeader( cookies );
request.setRequestHeaderField( "Cookies", header );
}
public static String getSite( ResourceRequest request ) {
String resource = request.getResource();
int end = resource.indexOf( "://" );
end = resource.indexOf( "/", end );
if ( end == -1 )
end = resource.length();
return resource.substring( 0, end );
}
/**
* Save the cookies FROM this request.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static void save( ResourceRequest request ) {
String header = request.getHeaderField( "Set-Cookie" );
Hashtable cookies = parseCookieHeader( header );
String site = getSite( request );
//FIXME: merge these... new cookies into the site cookies
}
/**
* Parse a given Cookie header into a hashtable.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static String getCookiesHeader( Hashtable cookies ) {
Enumeration keys = cookies.keys();
StringBuffer buff = new StringBuffer( 1024 );
while ( keys.hasMoreElements() ) {
String name = (String)keys.nextElement();
String value = (String)cookies.get( name );
if ( buff.length() > 0 )
buff.append( "; " );
buff.append( name );
buff.append( "=" );
buff.append( value );
}
return buff.toString();
}
/**
* Parse a given Cookie header into a hashtable.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public static Hashtable parseCookieHeader( String header ) {
//this is a simple format and easy to parse
//Cookie: password=HvS11dffnlD50bOLZYgG4oZFA-U
/**
* Where should we read the cookie name from
*/
int begin = 0;
/**
* Where do we spit into the variable
*/
int split = 0;
/**
* Where is the end of the cookie.
*/
int end = 0;
Hashtable result = new Hashtable();
while ( (split = header.indexOf( "=", begin )) != -1 ) {
end = header.indexOf( ";", split );
if ( end == -1 )
end = header.length();
String name = header.substring( begin, split );
String value = header.substring( split+1, end );
//move to the next one.
begin = end + 2;
result.put( name, value );
}
return result;
}
public static void main( String[] args ) {
parseCookieHeader( "password=HvS11dffnlD50bOLZYgG4oZFA-U; username=burtonator; rojoWeb=12.43.53.196.1091730560640949; JSESSIONID=B1245A7FEB43537E994324A157330F3A" );
}
}
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/URLResolver.java
Index: URLResolver.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.*;
import java.net.*;
import java.util.*;
/**
* This is a portable and thin URL resolver. The goal is to quickly resolve and
* normalize URLs. This includes potentially saving redirects and having them
* fully qualified.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
* @version $Id: URLResolver.java,v 1.1 2005/01/25 07:55:19 burton Exp $
*/
public class URLResolver {
public static String resolve( String resource ) {
//include "www" in hostnames like xmlhack.com
resource = resolveTrailingSlash( resource );
resource = resolveNoHostname( resource );
return resource;
}
private static String resolveNoHostname( String resource ) {
if ( resource.startsWith( "http://" ) ) {
int first = resource.indexOf( "." );
int second = resource.indexOf( ".", first + 1 );
if ( second == -1 ) {
//then we don't have a hostname on this domain.
return resource.substring( 0, "http://".length() ) +
"www." +
resource.substring( "http://".length(), resource.length() );
}
}
return resource;
}
private static String resolveTrailingSlash( String resource ) {
if ( resource.startsWith( "http://" ) && ( resource.endsWith( "org" ) ||
resource.endsWith( "com" ) ||
resource.endsWith( "net" ) ) ) {
return resource + "/";
}
return resource;
}
}
1.1 jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/URLResourceRequest.java
Index: URLResourceRequest.java
===================================================================
/*
* Copyright 1999,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.feedparser.network;
import java.io.*;
import java.net.*;
import java.util.*;
import java.util.zip.*;
import org.apache.log4j.*;
import sun.net.www.protocol.http.HttpURLConnection;
/**
* ResourceRequest implementation that uses java.net.URL as the backend.
*
* Differences from other ResourceRequests.
*
* setRequestMethod() - Allows us to change the request type (HEAD, etc).
*
* getContentLength() - Returns the length/size of the content represented by
* this resource. Can be used by clients with setRequestMethod( "HEAD" ) to
* find the size of a remote resource without doing a full fetch.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
* @version $Id: URLResourceRequest.java,v 1.1 2005/01/25 07:55:19 burton Exp $
*/
public class URLResourceRequest extends BaseResourceRequest implements ResourceRequest {
private static Logger log = Logger.getLogger( URLResourceRequest.class.getName() );
public static final String ACCEPT_ENCODING_HEADER = "Accept-Encoding";
public static final String IF_NONE_MATCH_HEADER = "If-None-Match";
public static final String GZIP_ENCODING = "gzip";
public static final String USER_AGENT_HEADER = "User-Agent";
public static String USER_AGENT
= "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1; aggregator:FeedParser; http://jakarta.apache.org/commons/sandbox/feedparser/) Gecko/20021130";
public static String USER_AGENT_MOZILLA
= "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1) Gecko/20021130";
/**
* Not used anymore. Provided for historical reasons.
*/
public static final String REFERER
= "http://jakarta.apache.org/commons/sandbox/feedparser/?isAggregator=true";
public static final int MAX_CONTENT_LENGTH = 1000000;
private URL _url = null;
private URLConnection _urlConnection = null;
private InputStream inputStream = null;
private boolean initConnection = false;
/**
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public void init() throws IOException {
String resource = this.getResource();
//if we are offline... we don't need to init.
if ( ResourceRequestFactory.isOffline() ) { return; }
//pull from the HTCache if it is enabled and then short-circuit so that
//we don't fetch from the network.
//NOTE: currently removed because the htcache wasn't portable. I can OSS
//this in the future if necessary
// if ( ResourceRequestFactory.isTransparentHTCacheEnabled() &&
// HTCache.hasContentInCache( this.getResource() ) ) {
//
// //get the input stream we can use from the HTCache.
// this.inputStream = HTCache.getContentAsInputStream( resource );
// return;
//
// }
_url = new URL( this.getResource() );
_urlConnection = _url.openConnection();
}
/**
* Init the actual connection. Should be called AFTER init() but before
* getInputStream() so that we can set any runtime params requestMethod,
* etc. If getInputStream() is called without an initConnection() we do
* this automatically. initConnection() might not want to be called when
* doing a HEAD request.
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public void initConnection() throws NetworkException {
long before = System.currentTimeMillis();
initConnection = true;
this.fireInit();
//FIXME: do smart user agent detection. if this is a .html file we can
//set it to us Mozilla and if not we can use NewsMonster
//_urlConnection.setRequestProperty( "Referer", REFERER );
String resource = this.getResource();
//set the user agent if it hasn't ALREADY been set by the caller.
if ( getRequestHeaderField( USER_AGENT_HEADER ) == null ) {
if ( resource.startsWith( "http://diveintomark.org" ) ) {
//We can play that game too Mark. Nice try...
_urlConnection.setRequestProperty( USER_AGENT_HEADER, USER_AGENT_MOZILLA );
} else {
_urlConnection.setRequestProperty( USER_AGENT_HEADER, USER_AGENT );
}
}
_urlConnection.setRequestProperty( ACCEPT_ENCODING_HEADER, GZIP_ENCODING );
//copy over any headers set in the request..
Iterator it = getRequestHeaderFields();
while ( it.hasNext() ) {
String key = (String)it.next();
_urlConnection.setRequestProperty( key, getRequestHeaderField( key ) );
}
if ( _urlConnection instanceof HttpURLConnection ) {
HttpURLConnection httpURLConn = (HttpURLConnection)_urlConnection;
httpURLConn.setFollowRedirects( getFollowRedirects() );
httpURLConn.setInstanceFollowRedirects( getFollowRedirects() );
if ( this.getIfModifiedSince() != -1 )
httpURLConn.setIfModifiedSince( this.getIfModifiedSince() );
if ( getEtag() != null )
httpURLConn.setRequestProperty( IF_NONE_MATCH_HEADER, getEtag() );
try {
httpURLConn.connect();
//setResource( getRedirectedResource() );
this.setResponseCode( httpURLConn.getResponseCode() );
} catch ( IOException e ) {
throw new NetworkException( e );
}
}
int contentLength = _urlConnection.getContentLength();
//bigger than 1 meg and it is a remote document (it is safe to process
//local documents)
if ( contentLength > MAX_CONTENT_LENGTH &&
this.getResource().startsWith( "file://" ) == false ) {
//NOTE: make 100% sure this doens't just go ahead and download the
//file FIRST before doing a HEAD. I think that's what happens but I
//might be wrong.
throw new NetworkException( "Content is too large - " + contentLength + " - " + getResource() );
}
long after = System.currentTimeMillis();
log.debug( getResource() + " - init duration: " + (after-before) );
}
java.lang.reflect.Field FIELD_HTTP_URL_CONNECTION_HTTP = null;
java.lang.reflect.Field FIELD_HTTP_CLIENT_URL = null;
/**
* This method used Reflection to pull out the redirected URL in
* java.net.URL. Internally sun.net.www.protocol.http.HttpURLConnection
* stores a reference to sun.net.www.http.HttpClient which then in turn does
* all the redirection and stores the redirect java.net.URL. We just use
* reflection to FETCH this URL and then call toString to get the correct
* value.
*
* Java needs the concept of readonly private variables.
*
* @author <a href="mailto:burton@rojo.com">Kevin A. Burton</a>
*/
public String getResourceFromRedirect() {
try {
if ( FIELD_HTTP_URL_CONNECTION_HTTP == null ) {
//Note: when using a FILE URL this won't work!
FIELD_HTTP_URL_CONNECTION_HTTP = _urlConnection.getClass().getDeclaredField( "http" );
FIELD_HTTP_URL_CONNECTION_HTTP.setAccessible( true );
}
Object http = FIELD_HTTP_URL_CONNECTION_HTTP.get( _urlConnection );
//when java.net.URL has already cleaned itself up 'http' will be
//null here.
if ( http == null )
return getResource();
if ( FIELD_HTTP_CLIENT_URL == null ) {
FIELD_HTTP_CLIENT_URL = http.getClass().getDeclaredField( "url" );
FIELD_HTTP_CLIENT_URL.setAccessible( true );
}
Object url = FIELD_HTTP_CLIENT_URL.get( http );
//this will be a java.net.URL and now I can call the toString method
//on it which will return our full URI.
return url.toString();
} catch ( Throwable t ) {
//log.error( t );
return getResource();
}
}
public InputStream getInputStream() throws NetworkException {
try {
return _getInputStream();
} catch ( IOException e ) {
String message = null;
//the modern VM buries the FileNotFoundException which prevents a
//catch. Very very ugly.
if ( e.getCause() instanceof FileNotFoundException ) {
message = "File not found: " + e.getCause().getMessage();
} else {
message = e.getMessage();
}
throw new NetworkException( message, e, this, _url, _urlConnection );
}
}
/**
*
*
* @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
*/
public InputStream _getInputStream() throws IOException {
if ( ! initConnection ) { initConnection(); }
String resource = this.getResource();
//if we haven't pulled from the cache (as above) and we are offline we
//need to throw an exception.
if ( ResourceRequestFactory.isOffline() ) {
//see if we can return from the HTCache.
// if ( ResourceRequestFactory.isTransparentHTCacheEnabled() &&
// HTCache.hasContentInCache( resource ) )
// return HTCache.getContentAsInputStream( resource );
//if not we should throw an exception
throw new IOException( "ResourceRequestFactory is offline and content was not in cache - " +
resource );
}
//if we are using an input stream NOT from init()
if ( this.inputStream == null ) {
this.inputStream = _urlConnection.getInputStream();
this.inputStream = new AdvancedInputStream( this.inputStream, this );
//first decompress
if ( GZIP_ENCODING.equals( _urlConnection.getContentEncoding() ) ) {
//note. the advanced input stream must be wrapped by a GZIP
//input stream and not vice-versa or we will end up with
//incorrect results.
this.inputStream = new GZIPInputStream( this.inputStream );
}
// if ( ResourceRequestFactory.isTransparentHTCacheEnabled() ) {
// System.out.println( "cache store for: " +
// resource + " as " +
// HTCache.getContentAsPath( resource ) );
// //FIXME: performance improvement... don't write do disk and then
// //read from disk.?
// //store this content from the network and save it in the cache. Then fetch it and return
// HTCache.store( resource, this.inputStream );
// return HTCache.getContentAsInputStream( resource );
// }
}
setResource( getResourceFromRedirect() );
//this is potentially teh cached input stream created if we have used
//the HTCache.
return inputStream;
}
/**
* Set the RequestMethod of this URLConnection.
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public void setRequestMethod( String method ) throws NetworkException {
try {
if ( _urlConnection instanceof HttpURLConnection ) {
((HttpURLConnection)_urlConnection).setRequestMethod( method );
}
} catch ( ProtocolException pe ) {
NetworkException ne = new NetworkException( pe.getMessage() );
ne.initCause( pe );
throw ne;
}
}
/**
*
*
* @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
*/
public int getContentLength() throws IOException {
if ( ! initConnection ) { initConnection(); }
//if ( _urlConnection instanceof HttpURLConnection ) {
return _urlConnection.getContentLength();
}
public String getHeaderField( String name ) {
return _urlConnection.getHeaderField( name );
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org