You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@commons.apache.org by bu...@apache.org on 2005/01/25 08:55:20 UTC

cvs commit: jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network AdvancedInputStream.java Authenticator.java BaseResourceRequest.java BlockingResourceRequest.java DOCUMENTATION DataEvent.java HTTPClientNetworkResource.java MalformedResourceException.java NetworkEventListener.java NetworkException.java ResourceRequest.java ResourceRequestFactory.java URLCookieManager.java URLResolver.java URLResourceRequest.java

burton      2005/01/24 23:55:20

  Added:       feedparser/src/java/org/apache/commons/feedparser/network
                        AdvancedInputStream.java Authenticator.java
                        BaseResourceRequest.java
                        BlockingResourceRequest.java DOCUMENTATION
                        DataEvent.java HTTPClientNetworkResource.java
                        MalformedResourceException.java
                        NetworkEventListener.java NetworkException.java
                        ResourceRequest.java ResourceRequestFactory.java
                        URLCookieManager.java URLResolver.java
                        URLResourceRequest.java
  Log:
  init of network API into feedparser......
  
  Revision  Changes    Path
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/AdvancedInputStream.java
  
  Index: AdvancedInputStream.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.*;
  import java.net.*;
  import java.util.*;
  
  /**
   * 
   *
   * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
   * @version $Id: AdvancedInputStream.java,v 1.1 2005/01/25 07:55:19 burton Exp $
   */
  public class AdvancedInputStream extends InputStream {
  
      private InputStream is = null;
  
      private BaseResourceRequest request = null;
  
      /**
       * 
       * Create a new <code>AdvancedInputStream</code> instance.
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public AdvancedInputStream( InputStream is, ResourceRequest request ) {
          this.is = is;
          this.request = (BaseResourceRequest)request;
      }
  
      public int read() throws IOException {
  
          int v = is.read();
  
          request.fireDataEvent( 1 ); //one byte is read
  
          return v;
      }
  
      public int read( byte b[] ) throws IOException {
  
          int v = is.read( b );
          
          request.fireDataEvent( b.length );
  
          return v;
  
      }
  
      public int read( byte b[], int off, int len ) throws IOException {
  
          int v = is.read( b, off, len ); 
  
          request.fireDataEvent( len );
  
          return v;
  
      }
  
      public long skip( long n ) throws IOException {
  
          long v = is.skip( n );
  
          request.fireDataEvent( n );
  
          return v;
      }
  
      //non-read related
      
      public int available() throws IOException {
          return is.available();
      }
  
      public void close() throws IOException {
          is.close();
  
          request.fireOnClosed();
      }
  
      public void mark(int readlimit) {
          is.mark( readlimit );
      }
  
      public void reset() throws IOException {
          is.reset();
      }
  
      public boolean markSupported() {
          return is.markSupported();
      }
  
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/Authenticator.java
  
  Index: Authenticator.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.*;
  import java.util.*;
  import java.net.PasswordAuthentication;
  /**
   * 
   *
   * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
   * @version $Id: Authenticator.java,v 1.1 2005/01/25 07:55:19 burton Exp $
   */
  public class Authenticator extends java.net.Authenticator {
  
      String username = null;
      String password = null;
  
      public Authenticator() { }
      
      protected PasswordAuthentication getPasswordAuthentication() {
          return new PasswordAuthentication ( username, password.toCharArray() );
      }
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/BaseResourceRequest.java
  
  Index: BaseResourceRequest.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.*;
  import java.net.*;
  import java.util.*;
  
  /**
   * 
   * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
   * @version $Id: BaseResourceRequest.java,v 1.1 2005/01/25 07:55:19 burton Exp $
   */
  public abstract class BaseResourceRequest implements ResourceRequest {
  
      public static boolean FOLLOW_REDIRECTS = true;
      
      private String resource = null;
  
      private DataEvent event = new DataEvent();
  
      private long _ifModifiedSince = -1;
  
      private long _responseCode = HttpURLConnection.HTTP_OK;
  
      private String _etag = null;
      
      private byte[] data = new byte[0];
  
      private boolean localCache = false;
  
      private boolean followRedirects = FOLLOW_REDIRECTS;
  
      /**
       * A single resource request can now have a given event listener.
       */
      private NetworkEventListener eventListener = null;
  
      private HashMap requestHeaders = new HashMap();
      
      /**
       * 
       * Get the value of <code>resource</code>.
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public String getResource() { 
          
          return this.resource;
          
      }
  
      /**
       * 
       * Set the value of <code>resource</code>.
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public void setResource( String resource ) { 
          
          this.resource = resource;
          
      }
  
      /**
       * Fire a new ArchiveEvent
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
       */
      public void fireDataEvent( long count ) {
  
          event.count = count;
          event.resource = resource;
          
          fireDataEvent( event );
          
      }
  
      public void fireInit() {
  
          DataEvent event = new DataEvent();
          event.request = this;
          
          Iterator i = ResourceRequestFactory.getNetworkEventListeners();
  
          while ( i.hasNext() ) {
              ((NetworkEventListener)i.next()).init( event );
          } 
  
          if ( eventListener != null )
              eventListener.init( event );
      }
  
      /**
       * Fire a new ArchiveEvent
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
       */
      public void fireDataEvent( DataEvent event ) {
  
          event.request = this;
          
          Iterator i = ResourceRequestFactory.getNetworkEventListeners();
  
          while ( i.hasNext() ) {
              ((NetworkEventListener)i.next()).dataEvent( event );
          } 
  
          if ( eventListener != null )
              eventListener.dataEvent( event );
      }
  
      public void fireOnClosed() {
  
          Iterator i = ResourceRequestFactory.getNetworkEventListeners();
  
          while ( i.hasNext() ) {
              ((NetworkEventListener)i.next()).onClosed();
          } 
  
          if ( eventListener != null )
              eventListener.onClosed();
      }
  
      /**
       * @see ResourceRequest
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public String getInputStreamAsString() throws IOException {
          return new String( getInputStreamAsByteArray() );
      }
  
      /**
       * @see ResourceRequest
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public byte[] getInputStreamAsByteArray() throws IOException {
  
          InputStream is = getInputStream();
  
          int contentLength = -1;
  
          try {
  
              contentLength = getContentLength() + 5000;
  
          } catch ( IOException e ) { e.printStackTrace(); }
  
          if ( contentLength == -1  ) {
  
              //use a larger default than what's provided with the
              //ByteArrayOutputStream
  
              contentLength = 100000;
          } 
  
          //include length of content from the original site with contentLength
          ByteArrayOutputStream bos = new ByteArrayOutputStream( contentLength );
        
          //now process the Reader...
          byte data[] = new byte[200];
      
          int readCount = 0;
  
          while( ( readCount = is.read( data )) > 0 ) {
              bos.write( data, 0, readCount );
          }
  
          is.close();
          bos.close();
  
          return bos.toByteArray();
  
      }
  
      /**
       * @see ResourceRequest
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public InputStream getLocalInputStream() throws NetworkException {
  
          try { 
              
              byte[] data;
              
              if ( this.data.length > 0 ) {
                  
                  //we have cached this... return the cached value.
                  data = this.data;
                  
              } else {
                  
                  data = getInputStreamAsByteArray();
                  
                  if ( localCache )
                      this.data = data;
                  
              }
              
              return new ByteArrayInputStream( data );
              
          } catch ( NetworkException n ) {
              throw n;
          } catch ( Throwable t ) {
              throw new NetworkException( t );
          }
  
      }
  
      public byte[] getLocalInputStreamAsByteArray() throws IOException {
          //FIXME: this needs to use the cache.
          return this.data;
      }
  
      public void setLocalCache( boolean v ) {
          this.localCache = v;
      }
      
      /**
       * Copy this resource request to the given OutputStream
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public void toOutputStream( OutputStream out ) throws IOException {
  
          InputStream is = getInputStream();
          
          //now process the Reader...
          byte data[] = new byte[200];
      
          int readCount = 0;
  
          while( ( readCount = is.read( data )) > 0 ) {
              
              out.write( data, 0, readCount );
          }
  
          is.close();
  
      }
  
      public long getIfModifiedSince() {
          return _ifModifiedSince;
      }
  
      public void setIfModifiedSince( long ifModifiedSince ) {
          this._ifModifiedSince = ifModifiedSince;
      }
  
      public String getEtag() {
          return _etag;
      }
      
      public void setEtag( String etag ) {
          this._etag = etag;
      }
  
      /**
       * Get and set an HTTP style response code.  Only used with HTTP URLs.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public long getResponseCode() {
          return this._responseCode;
      }
      
      public void setResponseCode( int responseCode ) {
          this._responseCode = responseCode;
      }
  
      public int getContentLength() throws IOException {
          return -1;
      }
  
      public void setEventListener( NetworkEventListener eventListener ) {
          this.eventListener = eventListener;
      }
      
     public String getHeaderField( String name ) {
         //default impl always returns null
         return  null;
      }
  
      public void setRequestHeaderField( String name, String value ) {
          requestHeaders.put( name, value );
      }
  
      public Iterator getRequestHeaderFields() {
          return requestHeaders.keySet().iterator();
      }
  
      public String getRequestHeaderField( String name ) {
          return (String)requestHeaders.get( name );
      }
  
      public void setRequestMethod( String method ) throws NetworkException {
          throw new NetworkException( "not implemented" );
      }
  
      public boolean getFollowRedirects() {
          return followRedirects;
      }
  
      public void setFollowRedirects( boolean v ) {
          this.followRedirects = v;
      }
  
      public String getResourceFromRedirect() {
          return getResource();
      }
      
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/BlockingResourceRequest.java
  
  Index: BlockingResourceRequest.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.*;
  import java.net.*;
  import java.util.*;
  
  /**
   * SCHEME - block://localhost/?duration=100
   * 
   * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
   * @version $Id: BlockingResourceRequest.java,v 1.1 2005/01/25 07:55:19 burton Exp $
   */
  public class BlockingResourceRequest extends BaseResourceRequest implements ResourceRequest {
      
      public static final String SCHEME = "block";
      
      /**
       * 
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public void init() throws IOException {
  
          String resource = getResource();
  
      }
  
      /**
       * 
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public InputStream getInputStream() throws IOException {
  
          try { 
              
              Thread.sleep( 100 );
              
              return null;
              
          } catch ( Throwable t ) {
  
              IOException e = new IOException( t.getMessage() );
              e.initCause( t );
  
              throw e;
              
          }
  
      }
  
      public static void main( String[] args ) {
  
          try { 
              
          } catch ( Throwable t ) {
              
              t.printStackTrace();
              
          }
          
      }
  
  }
  
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/DOCUMENTATION
  
  Index: DOCUMENTATION
  ===================================================================
  
  ** FIXME **
  
      - What are we going to call this package?
  
          - Jakarta Netto ?
  
          - httplib?
  
      - What about the transparent HTCache work?
  
      - Look at all the imports and make SURE its isolated
  
          - Might have to move the event package over.
  
  ** DOCUMENTATION **
  
  - InputStream utilities
  
      - String content
      - byte array
      - Local cached input stream
  
  - Initial support for Digest and Basic authentication and an API to support
    this.
      
  - Support for easy and common HTTP custimization (changing UserAgent, etc)
  
  - Identical default behavior between URL and HTTPClient implementations
      
  - Support for session cookies when java.net.URL is used. (Still being
    implemented)
  
  - No infinite timeouts per the default java.net.URL.  We set default timeouts to
    5 minutes instead of infiniity.
  
  - NetworkException instead of generic IOException.  This allows you to use
    network IO alongside fileIO and note the difference in try/catch blocks.  This
    also throws the 'cause' of the exception which IOException does not.
  
  - Download progress via event listeners.  Currently most network IO packages are
    just black boxes.  We support blocking IO which event callbacks during reads.
  
  - HTTP status codes in exceptions.  java.net.URL doesn't support this.
  
  - Support for Jakarta HttpClient and java.net.URL as IO implementations.  While
    java.net.URL is reliable we believe that Jakarta HttpClient is very compelling.
  
  - Support for a future async IO API... While blocking IO has proven reliable we
    want to provide an async IO layer which uses event callbacks.
  
  - Simple ResourceRequest API which has no requirements on internal JRE classes.
  
  - Provide portability to migrate away from non-free software (java.net.URL)
  
  - Easy URI scheme registration IO handlers.  One can now create URI base IO
    handlers and read from them.  This comes in handy for adhoc scheme creation.
  
    
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/DataEvent.java
  
  Index: DataEvent.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.*;
  import java.net.*;
  import java.util.*;
  
  import java.text.DateFormat;
  
  /**
   * Represents a data request
   *
   * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
   * @version $Id: DataEvent.java,v 1.1 2005/01/25 07:55:19 burton Exp $
   */
  public class DataEvent {
  
      /**
       * The amount of bytes read.
       */
      public long count  = 0;
  
      /**
       * The resource URL.
       */
      public String resource = null;
  
      public ResourceRequest request = null;
      
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/HTTPClientNetworkResource.java
  
  Index: HTTPClientNetworkResource.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.*;
  import java.net.*;
  import java.util.*;
  
  import org.apache.commons.httpclient.*;
  import org.apache.commons.httpclient.methods.*;
  
  /**
   * 
   * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
   * @version $Id: HTTPClientNetworkResource.java,v 1.1 2005/01/25 07:55:19 burton Exp $
   */
  public class HTTPClientNetworkResource extends BaseResourceRequest implements ResourceRequest {
  
      public static final int TIMEOUT = 3 * 1000 * 60;
      
      public static HttpConnectionManager hcm = new HttpConnectionManager();
  
      private HttpConnection conn = null;
      
      /**
       * 
       * Create a new <code>URLNetworkResource</code> instance.
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public void init() throws IOException {
  
          try { 
              
              this.conn = hcm.getConnection( getResource() );
              
              if ( this.conn.isOpen() == false ) {
                  
                  //set socket timeout...
                  this.conn.setSoTimeout( TIMEOUT );
                  
              } 
              
          } catch ( HttpException e ) {
  
              IOException e1 = new IOException( e.getMessage() );
              e1.initCause( e );
              
              throw e1;
              
          }
  
      }
  
      /**
       * 
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public InputStream getInputStream() throws IOException {
  
  //         try {
  
  //             //now get the method so that we can execute it.
  //             HttpMethod method = new GetMethod( new HttpMethod( getResource() ).getPath() );
  //             method.setFollowRedirects( true );
  
  //             int result = method.execute( new HttpState(), conn );
  
  //             //FIXME: 302 isn't being followed.
  
  //             //302 redirect
  //             if ( method.getStatusCode() == HttpURLConnection.HTTP_MOVED_TEMP ) {
  
  //                 //technically we shouldn't get this.. but if we do
  
  //                 String location = method.getResponseHeader( "Location" ).getValue();
  
  //                 Log.message( "Redirect: " + location );
  
  //                 HTTPClientNetworkResource redirect = new HTTPClientNetworkResource();
  //                 redirect.setResource( location );
  //                 redirect.init();
                  
  //                 return redirect.getInputStream();
                  
  //             }
              
  //             // when should we throw an exception?  404?
  
  //             if ( method.getStatusCode() != 200 ) {
  //                 throw new IOException( "HTTP " + method.getStatusCode() + " - " + method.getStatusText() );
  //             } 
  
  //             InputStream is = new AdvancedInputStream( method.getResponseBodyAsStream(), this );
  
  //             return is;
  
  //         } catch ( HttpException e ) {
              
  //             throw new IOException( e.getMessage() );
              
  //         } finally {
  
  //             //we have to release this connection when done or else we run out of them.
  //             hcm.releaseConnection( conn );
  
  //         }
  
          //NOTE: the above code does not compile and we are not using this code
          //so returning null is acceptable.
          
          return null;
          
      }
  
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/MalformedResourceException.java
  
  Index: MalformedResourceException.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.IOException;
  import java.net.*;
  
  /**
   * 
   * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
   * @version $Id: MalformedResourceException.java,v 1.1 2005/01/25 07:55:19 burton Exp $
   */
  public class MalformedResourceException extends NetworkException {
  
      /**
       * 
       * Create a new <code>MalformedResourceException</code> instance.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public MalformedResourceException( String message ) {
          super( message );
      }
  
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/NetworkEventListener.java
  
  Index: NetworkEventListener.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.*;
  import java.net.*;
  import java.util.*;
  
  import java.text.DateFormat;
  
  /**
   * An AggregationEventListener provides event dispatch from the core Aggregation
   * class to implementors of the AggregationEventListener interface.
   * 
   * Note that all NetworkEventListeners should be threadsafe.
   *
   * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
   * @version $Id: NetworkEventListener.java,v 1.1 2005/01/25 07:55:19 burton Exp $
   */
  public interface NetworkEventListener extends EventListener {
  
      /**
       * Called once per request.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public void init( DataEvent event );
      
      public void dataEvent( DataEvent event );
  
      /**
       * Called when this stream is closed.
       */
      public void onClosed();
      
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/NetworkException.java
  
  Index: NetworkException.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.IOException;
  import java.net.*;
  
  /**
   * 
   * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
   * @version $Id: NetworkException.java,v 1.1 2005/01/25 07:55:19 burton Exp $
   */
  public class NetworkException extends IOException {
  
      private ResourceRequest request = null;
  
      public Exception e = null;
  
      private URL _url = null;
  
      private URLConnection _urlConnection = null;
  
      private int responseCode = -1;
  
      /**
       * 
       * Create a new <code>NetworkException</code> instance.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public NetworkException( String message ) {
          super( message );
      }
  
      public NetworkException( Throwable t) {
          super( t.getMessage() );
      }
  
      /**
       * 
       * Create a new <code>NetworkException</code> instance.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public NetworkException( String message,
                               Exception e,
                               ResourceRequest request,
                               URL _url,
                               URLConnection _urlConnection ) {
  
          super( message ); //why doesn't java.io.IOException support nesting?
          this.e = e;
          this.request = request;
          this._url = _url;
          this._urlConnection = _urlConnection;
          initCause( e );
          
      }
  
      /**
       * 
       * Create a new <code>NetworkException</code> instance.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public NetworkException( Exception e,
                               ResourceRequest request,
                               URL _url,
                               URLConnection _urlConnection ) {
  
          super( e.getMessage() ); //why doesn't java.io.IOException support nesting?
          this.e = e;
          this.request = request;
          this._url = _url;
          this._urlConnection = _urlConnection;
          initCause( e );
          
      }
  
      public ResourceRequest getResourceRequest() {
          return request;
      }
  
      public URL getURL() {
          return _url;
      }
  
      public URLConnection getURLConnection() {
          return _urlConnection;
      }
  
      public Exception getException() {
          return e;
      }
  
      public int getResponseCode() {
  
          if ( _urlConnection == null ) {
              return -1;
          } 
  
          if ( responseCode == -1 ) {
  
              //parse the exception
              String status = (String)_urlConnection.getHeaderField( null );
  
              if ( status == null ) {
                  return -1;
              } 
  
              int begin = "HTTP/1.1 ".length();
              int offset = "200".length();
              int end = begin + offset;
              
              responseCode = Integer.parseInt( status.substring( begin, end ) );
  
          } 
  
          return responseCode;
          
      }
      
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/ResourceRequest.java
  
  Index: ResourceRequest.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.*;
  import java.net.*;
  import java.util.*;
  
  /**
   * A ResourceRequest is a generic interface to a network resource such as an
   * HTTP URL.
   * 
   * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
   * @version $Id: ResourceRequest.java,v 1.1 2005/01/25 07:55:19 burton Exp $
   */
  public interface ResourceRequest {
  
      /**
       * Perform all initialization and connection to the remote server.  This
       * should always be called BEFORE network getInputStream() if you want to
       * perform other operations first.  When using a HEAD request this must be
       * used and not getInputStream()
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public void init() throws IOException;
      
      /**
       * Get an input stream for this content.
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public InputStream getInputStream() throws IOException;
  
      /**
       * Set the resource for this request.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public String getResource();
      public void setResource( String resource );
  
      /**
       * Get the resource but make sure all redirects are taken into
       * consideration.
       *
       * @author <a href="mailto:burton@rojo.com">Kevin A. Burton</a>
       */
      public String getResourceFromRedirect();
      
      /**
       * Get the given Input Stream as a String by calling read() until we have
       * all the data locally.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public String getInputStreamAsString() throws IOException;
      public byte[] getInputStreamAsByteArray() throws IOException;
      public InputStream getLocalInputStream() throws NetworkException;
      public byte[] getLocalInputStreamAsByteArray() throws IOException;
  
      /**
       * When true we cache getLocalInputStream() so that multiple requests are
       * returned from local data.  Provides more flexibility but uses more
       * memory.
       */
      public void setLocalCache( boolean v );
  
      /**
       * Copy this input stream to an OutputStream
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public void toOutputStream( OutputStream out ) throws IOException;
  
      /**
       * Set the If-Modified-Since header for HTTP URL connections and protocols
       * that support similar operation.
       *
       * A value of -1 means do not use the If-Modified-Since header
       *
       * Fri Jun 06 2003 08:34 PM (burton@peerfear.org): Currently just URLResourceRequest     
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public long getIfModifiedSince();
      public void setIfModifiedSince( long ifModifiedSince );
  
      /**
       * The HTTP ETag to use with If-None-Match
       *
       * @author <a href="mailto:burton@rojo.com">Kevin A. Burton</a>
       */
      public String getEtag();
      public void setEtag( String etag );
      
      /**
       * Get and set an HTTP style response code.  Only used with HTTP URLs.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public long getResponseCode();
      public void setResponseCode( int responseCode );
  
      /**
       * Return the conent length of this request or -1 if not known.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public int getContentLength() throws IOException;
  
      public void setEventListener( NetworkEventListener eventListener );
  
      /**
       * Get a given response header.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public String getHeaderField( String name );
  
      /**
       * Set a given request header such as UserAgent, ETag, etc.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public void setRequestHeaderField( String name, String value );
  
      /**
       * Get the names of all set request headers.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin Burton</a>
       */
      public Iterator getRequestHeaderFields();
  
      public String getRequestHeaderField( String name );
  
      public void setRequestMethod( String method ) throws NetworkException;
  
      public boolean getFollowRedirects();
      public void setFollowRedirects( boolean v );
      
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/ResourceRequestFactory.java
  
  Index: ResourceRequestFactory.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.*;
  import java.net.*;
  import java.util.*;
  
  import org.apache.log4j.*;
  
  /**
   * Get a ResourceRequest for a given URL.  The request is handled based on the
   * URL.
   *
   * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
   * @version $Id: ResourceRequestFactory.java,v 1.1 2005/01/25 07:55:19 burton Exp $
   */
  public class ResourceRequestFactory {
  
      private static Logger log = Logger.getLogger( ResourceRequestFactory.class.getName() );
  
      /**
       * Specified in java.security to indicate the caching policy for successful
       * name lookups from the name service.. The value is specified as as integer
       * to indicate the number of seconds to cache the successful lookup.
       * 
       *
       * sun.net.inetaddr.ttl:
       * 
       * This is a sun private system property which corresponds to
       * networkaddress.cache.ttl. It takes the same value and has the same meaning,
       * but can be set as a command-line option. However, the preferred way is to
       * use the security property mentioned above.
       * 
       * A value of -1 indicates "cache forever".
       */
      public static int NETWORKADDRESS_CACHE_TTL = 5 * 60 * 1000;
  
      /**
       * These properties specify the default connect and read timeout (resp.) for
       * the protocol handler used by java.net.URLConnection.
       * 
       * sun.net.client.defaultConnectTimeout specifies the timeout (in
       * milliseconds) to establish the connection to the host. For example for
       * http connections it is the timeout when establishing the connection to
       * the http server. For ftp connection it is the timeout when establishing
       * the connection to ftp servers.
       * 
       * sun.net.client.defaultReadTimeout specifies the timeout (in milliseconds)
       * when reading from input stream when a connection is established to a
       * resource.
       */
      public static int DEFAULT_CONNECT_TIMEOUT = 1 * 60 * 1000;
      public static int DEFAULT_READ_TIMEOUT = DEFAULT_CONNECT_TIMEOUT;
  
      //FIXME: (should this be a linked list?)
      private static ArrayList listeners = new ArrayList( 30 );
  
      private static HashMap schemeMap = null;
  
      private static boolean transparentHTCacheEnabled = false;
  
      /**
       * When offline we either throw an exception or return content from the
       * cache directly.  This can be used to run code that does not depend on the
       * network.
       */
      private static boolean offline = false;
  
      public static ResourceRequest getResourceRequest( String resource,
                                                        long ifModifiedSince ) throws NetworkException {
  
          return getResourceRequest( resource, ifModifiedSince, null );
          
      }
  
      /**
       * Get a ResourceRequest for the protocol represented in the resource URL.
       * It is important that we use a ResourceRequest implementation that supports
       * fetching the URL.
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public static ResourceRequest getResourceRequest( String resource,
                                                        long ifModifiedSince,
                                                        String etag ) throws NetworkException {
  
          //log.debug( resource );
          
          //make sure we are initialized correctly.
          ResourceRequestFactory.init();
  
          //make sure we have an index..
  
          int schemeIndex = resource.indexOf( ":" );
  
          if ( schemeIndex == -1 )
              throw new NetworkException( "Unknown scheme: '" + resource + "'" );
  
          String scheme = resource.substring( 0, schemeIndex );
  
          if ( scheme == null || scheme.equals( "" ) )
              throw new MalformedResourceException( "Not supported: " + resource );
  
          Class clazz = (Class)schemeMap.get( scheme );
  
          if ( clazz == null ) {
              throw new MalformedResourceException( "Scheme not supported: " + scheme );
          } 
  
          try { 
              
              ResourceRequest request = (ResourceRequest)clazz.newInstance();
  
              request.setResource( resource );
  
              //setup resource request options.
              request.setIfModifiedSince( ifModifiedSince );
  
              //set the etag... when its null nothing will happen
              request.setEtag( etag );
              
              request.init();
  
              return request;
  
          } catch ( Throwable t ) {
              throw new NetworkException( t );
          }
  
      }
  
      /**
       * 
       * @see getResourceRequest( String )
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static ResourceRequest getResourceRequest( String resource ) throws NetworkException {
          return getResourceRequest( resource, -1 );
      }
      
      /**
       * 
       * @see getResourceRequest( String )
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static ResourceRequest getResourceRequest( URL resource ) throws NetworkException {
          return getResourceRequest( resource.toString() );
      }
  
      /**
       * Add an event listener to this instance of the factory.  This provides a
       * mechanism to give default listeners to each new ResourceRequest.
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public static void addEventListener( NetworkEventListener listener ) {
  
          listeners.add( listener );
  
      }
  
      /**
       * Get all event listeners.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static Iterator getNetworkEventListeners() {
  
          return listeners.iterator();
          
      }
      
      /**
       * Make sure the factory is initialized.  Called once per JVM instance.
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      private synchronized static void init() {
  
          //set the authenticator to use
  
          //FIXME: remove this until we figure out how to do proxy authentication.
          //java.net.Authenticator.setDefault ( new Authenticator() );
  
          // A full list of properties is available here:
  
          // http://java.sun.com/j2se/1.4.2/docs/guide/net/properties.html
  
          System.setProperty( "sun.net.inetaddr.ttl",
                              Integer.toString( NETWORKADDRESS_CACHE_TTL ) );
  
          System.setProperty( "sun.net.client.defaultReadTimeout",
                              Integer.toString( DEFAULT_READ_TIMEOUT ) );
  
          System.setProperty( "sun.net.client.defaultConnectTimeout",
                              Integer.toString( DEFAULT_CONNECT_TIMEOUT ) );
          
          if ( schemeMap == null ) {
              
              schemeMap = new HashMap();
              
              schemeMap.put( "file", URLResourceRequest.class );
              schemeMap.put( "http", URLResourceRequest.class );
              schemeMap.put( "https", URLResourceRequest.class );
              //schemeMap.put( "sitefilter", SiteFilterRequest.class );
              schemeMap.put( BlockingResourceRequest.SCHEME, BlockingResourceRequest.class );
              
          }
  
      }
  
      /**
       * Return true if we support fetching content with the given scheme.
       * Examples would be "http" and "file"
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static boolean isSupportedScheme( String scheme ) {
  
          return schemeMap.get( scheme ) != null;
          
      }
      
      /**
       * When the transparent HTCache is enabled we will keep content local
       * similar to the Mozilla cache and return the cached copy and use
       * if-Modified-Since when necessary.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static void setTransparentHTCacheEnabled( boolean enabled ) {
          transparentHTCacheEnabled = enabled;
      }
  
      /**
       * Return true if we can enable the htcache.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static boolean isTransparentHTCacheEnabled() {
          return transparentHTCacheEnabled;
      }
  
      /**
       * Enable/disable offline operation.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static void setOffline( boolean offline ) {
          ResourceRequestFactory.offline = offline;
      }
  
      /**
       * 
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static boolean isOffline() {
          return offline;
      }
  
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/URLCookieManager.java
  
  Index: URLCookieManager.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.*;
  import java.net.*;
  import java.util.*;
  import java.util.zip.*;
  
  /**
   * 
   * By default java.net.URL does NOT handle cookies.  This is a simple extension
   * that allows us to persist cookies in the VM during runtime.
   * 
   * FIXME: How can we make sure to delete older sites...?!  no need for this to
   * grow to infinite size.
   * 
   * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
   */
  public class URLCookieManager {
  
      static Hashtable cookies = new Hashtable();
      
      /**
       * Get the cookies for a site.  When none are available return null.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static Hashtable getCookies( String site ) {
  
          return (Hashtable)cookies.get( site );
          
      }
  
      /**
       * Add cookies to this request and perform any other init.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static void init( ResourceRequest request ) {
  
          String site = getSite( request );
          
          Hashtable cookies = getCookies( site );
  
          if ( cookies == null )
              return;
  
          String header = getCookiesHeader( cookies );
  
          request.setRequestHeaderField( "Cookies", header );
          
      }
  
      public static String getSite( ResourceRequest request ) {
  
          String resource = request.getResource();
  
          int end = resource.indexOf( "://" );
          end = resource.indexOf( "/", end );
  
          if ( end == -1 )
              end = resource.length();
          
          return resource.substring( 0, end );
  
      }
      
      /**
       * Save the cookies FROM this request.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static void save( ResourceRequest request ) {
  
          String header = request.getHeaderField( "Set-Cookie" );
  
          Hashtable cookies = parseCookieHeader( header );
  
          String site = getSite( request );
  
          //FIXME: merge these... new cookies into the site cookies 
  
      }
  
      /**
       * Parse a given Cookie header into a hashtable.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static String getCookiesHeader( Hashtable cookies ) {
  
          Enumeration keys = cookies.keys();
  
          StringBuffer buff = new StringBuffer( 1024 );
  
          while ( keys.hasMoreElements() ) {
  
              String name = (String)keys.nextElement();
              String value = (String)cookies.get( name );
  
              if ( buff.length() > 0 )
                  buff.append( "; " );
  
              buff.append( name );
              buff.append( "=" );
              buff.append( value );
              
          }
          
          return buff.toString();
  
      }
  
      /**
       * Parse a given Cookie header into a hashtable.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public static Hashtable parseCookieHeader( String header ) {
  
          //this is a simple format and easy to parse
  
          //Cookie: password=HvS11dffnlD50bOLZYgG4oZFA-U
  
          /**
           * Where should we read the cookie name from
           */
          int begin = 0;
  
          /**
           * Where do we spit into the variable
           */
          int split = 0;
  
          /**
           * Where is the end of the cookie.
           */
          int end = 0;
  
          Hashtable result = new Hashtable();
          
          while ( (split = header.indexOf( "=", begin )) != -1 ) {
  
              end = header.indexOf( ";", split );
  
              if ( end == -1 )
                  end = header.length();
              
              String name = header.substring( begin, split );
              String value = header.substring( split+1, end );
              
              //move to the next one.
              begin = end + 2;
  
              result.put( name, value );
              
          }
  
          return result;
          
      }
      
      public static void main( String[] args ) {
  
          parseCookieHeader( "password=HvS11dffnlD50bOLZYgG4oZFA-U; username=burtonator; rojoWeb=12.43.53.196.1091730560640949; JSESSIONID=B1245A7FEB43537E994324A157330F3A" );
          
      }
  
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/URLResolver.java
  
  Index: URLResolver.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.*;
  import java.net.*;
  import java.util.*;
  
  /**
   * This is a portable and thin URL resolver.  The goal is to quickly resolve and
   * normalize URLs.  This includes potentially saving redirects and having them
   * fully qualified.
   * 
   * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
   * @version $Id: URLResolver.java,v 1.1 2005/01/25 07:55:19 burton Exp $
   */
  public class URLResolver {
  
      public static String resolve( String resource ) {
  
          //include "www" in hostnames like xmlhack.com
          resource = resolveTrailingSlash( resource );
          resource = resolveNoHostname( resource );
  
          return resource;
          
      }
  
      private static String resolveNoHostname( String resource ) {
  
          if ( resource.startsWith( "http://" ) ) {
  
              int first = resource.indexOf( "." );
              int second = resource.indexOf( ".", first + 1 );
  
              if ( second == -1 ) {
                  //then we don't have a hostname on this domain.
                  return resource.substring( 0, "http://".length() ) +
                         "www." + 
                         resource.substring( "http://".length(), resource.length() );
              } 
  
          }
  
          return resource;
          
      }
  
      private static String resolveTrailingSlash( String resource ) {
  
          if ( resource.startsWith( "http://" ) && ( resource.endsWith( "org" ) ||
                                                     resource.endsWith( "com" ) ||
                                                     resource.endsWith( "net" ) ) ) {
  
              return resource + "/";
              
          } 
  
          return resource;
          
      }
      
  }
  
  
  
  1.1                  jakarta-commons-sandbox/feedparser/src/java/org/apache/commons/feedparser/network/URLResourceRequest.java
  
  Index: URLResourceRequest.java
  ===================================================================
  /*
   * Copyright 1999,2004 The Apache Software Foundation.
   * 
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   * 
   *      http://www.apache.org/licenses/LICENSE-2.0
   * 
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  package org.apache.commons.feedparser.network;
  
  import java.io.*;
  import java.net.*;
  import java.util.*;
  import java.util.zip.*;
  
  import org.apache.log4j.*;
  
  import sun.net.www.protocol.http.HttpURLConnection;
  
  /**
   * ResourceRequest implementation that uses java.net.URL as the backend.
   *
   * Differences from other ResourceRequests.
   *
   * setRequestMethod() - Allows us to change the request type (HEAD, etc).
   * 
   * getContentLength() - Returns the length/size of the content represented by
   * this resource.  Can be used by clients with setRequestMethod( "HEAD" ) to
   * find the size of a remote resource without doing a full fetch.
   *
   * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton</a>
   * @version $Id: URLResourceRequest.java,v 1.1 2005/01/25 07:55:19 burton Exp $
   */
  public class URLResourceRequest extends BaseResourceRequest implements ResourceRequest {
  
      private static Logger log = Logger.getLogger( URLResourceRequest.class.getName() );
  
      public static final String ACCEPT_ENCODING_HEADER = "Accept-Encoding";
      public static final String IF_NONE_MATCH_HEADER = "If-None-Match";
  
      public static final String GZIP_ENCODING = "gzip";
  
      public static final String USER_AGENT_HEADER = "User-Agent";
  
      public static String USER_AGENT
          = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1; aggregator:FeedParser; http://jakarta.apache.org/commons/sandbox/feedparser/) Gecko/20021130";
  
      public static String USER_AGENT_MOZILLA
          = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.2.1) Gecko/20021130";
  
      /**
       * Not used anymore.  Provided for historical reasons.
       */
      public static final String REFERER
          = "http://jakarta.apache.org/commons/sandbox/feedparser/?isAggregator=true";
      
      public static final int MAX_CONTENT_LENGTH = 1000000;
      
      private URL _url = null;
  
      private URLConnection _urlConnection = null;
  
      private InputStream inputStream = null;
  
      private boolean initConnection = false;
      
      /**
       * 
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public void init() throws IOException {
  
          String resource = this.getResource();
  
          //if we are offline... we don't need to init.
          if ( ResourceRequestFactory.isOffline() ) { return; } 
  
          //pull from the HTCache if it is enabled and then short-circuit so that
          //we don't fetch from the network.
  
          //NOTE: currently removed because the htcache wasn't portable. I can OSS
          //this in the future if necessary
  
          // if ( ResourceRequestFactory.isTransparentHTCacheEnabled() &&
          //     HTCache.hasContentInCache( this.getResource() ) ) {
          //
          //    //get the input stream we can use from the HTCache.
          //    this.inputStream = HTCache.getContentAsInputStream( resource );
          //  return;
          //    
          // }
  
          _url = new URL( this.getResource() );
          _urlConnection = _url.openConnection();
  
      }
  
      /**
       * Init the actual connection.  Should be called AFTER init() but before
       * getInputStream() so that we can set any runtime params requestMethod,
       * etc.  If getInputStream() is called without an initConnection() we do
       * this automatically.  initConnection() might not want to be called when
       * doing a HEAD request.
       * 
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public void initConnection() throws NetworkException {
  
          long before = System.currentTimeMillis();
  
          initConnection = true;
  
          this.fireInit();
  
          //FIXME: do smart user agent detection.  if this is a .html file we can
          //set it to us Mozilla and if not we can use NewsMonster
          //_urlConnection.setRequestProperty( "Referer", REFERER );
  
          String resource = this.getResource();
  
          //set the user agent if it hasn't ALREADY been set by the caller.
          if ( getRequestHeaderField( USER_AGENT_HEADER ) == null ) {
  
              if ( resource.startsWith( "http://diveintomark.org" ) ) {
                  //We can play that game too Mark.  Nice try...
                  _urlConnection.setRequestProperty( USER_AGENT_HEADER, USER_AGENT_MOZILLA );
              } else {
                  _urlConnection.setRequestProperty( USER_AGENT_HEADER, USER_AGENT );
              }
  
          } 
  
          _urlConnection.setRequestProperty( ACCEPT_ENCODING_HEADER, GZIP_ENCODING );
  
          //copy over any headers set in the request..
  
          Iterator it = getRequestHeaderFields();
  
          while ( it.hasNext() ) {
  
              String key = (String)it.next();
  
              _urlConnection.setRequestProperty( key, getRequestHeaderField( key ) );
              
          } 
  
          if ( _urlConnection instanceof HttpURLConnection ) {
  
              HttpURLConnection httpURLConn = (HttpURLConnection)_urlConnection;
  
              httpURLConn.setFollowRedirects( getFollowRedirects() );
              httpURLConn.setInstanceFollowRedirects( getFollowRedirects() );
  
              if ( this.getIfModifiedSince() != -1 )
                  httpURLConn.setIfModifiedSince( this.getIfModifiedSince() );
  
              if ( getEtag() != null )
                  httpURLConn.setRequestProperty( IF_NONE_MATCH_HEADER, getEtag() );
              
              try {
  
                  httpURLConn.connect();
  
                  //setResource( getRedirectedResource() );
                  
                  this.setResponseCode( httpURLConn.getResponseCode() ); 
  
              } catch ( IOException e ) {
                  throw new NetworkException( e );
              }
  
          } 
  
          int contentLength = _urlConnection.getContentLength();
  
          //bigger than 1 meg and it is a remote document (it is safe to process
          //local documents)
          if ( contentLength > MAX_CONTENT_LENGTH &&
               this.getResource().startsWith( "file://" ) == false ) {
  
              //NOTE: make 100% sure this doens't just go ahead and download the
              //file FIRST before doing a HEAD.  I think that's what happens but I
              //might be wrong.
              
              throw new NetworkException( "Content is too large - " + contentLength + " - " + getResource() );
              
          } 
  
          long after = System.currentTimeMillis();
          
          log.debug( getResource() + " - init duration: " + (after-before) );
          
      }
  
      java.lang.reflect.Field FIELD_HTTP_URL_CONNECTION_HTTP = null;
      java.lang.reflect.Field FIELD_HTTP_CLIENT_URL = null;
      
      /**
       * This method used Reflection to pull out the redirected URL in
       * java.net.URL.  Internally sun.net.www.protocol.http.HttpURLConnection
       * stores a reference to sun.net.www.http.HttpClient which then in turn does
       * all the redirection and stores the redirect java.net.URL.  We just use
       * reflection to FETCH this URL and then call toString to get the correct
       * value.
       * 
       * Java needs the concept of readonly private variables.
       *
       * @author <a href="mailto:burton@rojo.com">Kevin A. Burton</a>
       */
      public String getResourceFromRedirect() {
  
          try {
  
              if ( FIELD_HTTP_URL_CONNECTION_HTTP == null ) {
  
                  //Note: when using a FILE URL this won't work!                
                  FIELD_HTTP_URL_CONNECTION_HTTP = _urlConnection.getClass().getDeclaredField( "http" );
                  FIELD_HTTP_URL_CONNECTION_HTTP.setAccessible( true );
                  
              }
  
              Object http = FIELD_HTTP_URL_CONNECTION_HTTP.get( _urlConnection );
  
              //when java.net.URL has already cleaned itself up 'http' will be
              //null here.
              if ( http == null )
                  return getResource();
  
              if ( FIELD_HTTP_CLIENT_URL == null ) {
  
                  FIELD_HTTP_CLIENT_URL = http.getClass().getDeclaredField( "url" );
                  FIELD_HTTP_CLIENT_URL.setAccessible( true );
                  
              }
              
              Object url = FIELD_HTTP_CLIENT_URL.get( http );
  
              //this will be a java.net.URL and now I can call the toString method
              //on it which will return our full URI.
              return url.toString();
              
          } catch ( Throwable t ) {
              //log.error( t );
              return getResource();
          }
          
      }
  
      public InputStream getInputStream() throws NetworkException {
  
          try {
              return _getInputStream();
  
          } catch ( IOException e ) {
  
              String message = null;
              
              //the modern VM buries the FileNotFoundException which prevents a
              //catch.  Very very ugly.
              if ( e.getCause() instanceof FileNotFoundException ) {
                  message = "File not found: " + e.getCause().getMessage();
              } else {
                  message = e.getMessage();
              }
  
              throw new NetworkException( message, e, this, _url, _urlConnection );
          }
  
      }
      
      /**
       * 
       *
       * @author <a href="mailto:burton@openprivacy.org">Kevin A. Burton (burtonator)</a>
       */
      public InputStream _getInputStream() throws IOException {
  
          if ( ! initConnection ) { initConnection(); } 
  
          String resource = this.getResource();
  
          //if we haven't pulled from the cache (as above) and we are offline we
          //need to throw an exception.
          if ( ResourceRequestFactory.isOffline() ) {
  
              //see if we can return from the HTCache.
              // if ( ResourceRequestFactory.isTransparentHTCacheEnabled() &&
              //     HTCache.hasContentInCache( resource ) )
              //    return HTCache.getContentAsInputStream( resource );
  
              //if not we should throw an exception
              throw new IOException( "ResourceRequestFactory is offline and content was not in cache - " +
                                     resource );
  
          }
  
          //if we are using an input stream NOT from init() 
          if ( this.inputStream == null ) {
              
              this.inputStream = _urlConnection.getInputStream();
              this.inputStream = new AdvancedInputStream( this.inputStream, this );
  
              //first decompress
              if ( GZIP_ENCODING.equals( _urlConnection.getContentEncoding() ) ) {
  
                  //note.  the advanced input stream must be wrapped by a GZIP
                  //input stream and not vice-versa or we will end up with
                  //incorrect results.
                  
                  this.inputStream = new GZIPInputStream( this.inputStream );
  
              }
          
              // if ( ResourceRequestFactory.isTransparentHTCacheEnabled() ) {
                  
              //     System.out.println( "cache store for: " +
              //                         resource + " as " +
              //                         HTCache.getContentAsPath( resource ) );
  
              //     //FIXME: performance improvement... don't write do disk and then
              //     //read from disk.?
                  
              //     //store this content from the network and save it in the cache.  Then fetch it and return
              //     HTCache.store( resource, this.inputStream );
                  
              //     return HTCache.getContentAsInputStream( resource );
                  
              // }
  
          }
  
          setResource( getResourceFromRedirect() );
  
          //this is potentially teh cached input stream created if we have used
          //the HTCache.
          return inputStream;
          
      }
  
      /**
       * Set the RequestMethod of this URLConnection.
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public void setRequestMethod( String method ) throws NetworkException {
  
          try { 
              
              if ( _urlConnection instanceof HttpURLConnection ) {
                  
                  ((HttpURLConnection)_urlConnection).setRequestMethod( method );
                  
              } 
              
          } catch ( ProtocolException pe ) {
              
              NetworkException ne = new NetworkException( pe.getMessage() );
              ne.initCause( pe );
              throw ne;
              
          }
  
      }
  
      /**
       * 
       *
       * @author <a href="mailto:burton@peerfear.org">Kevin A. Burton</a>
       */
      public int getContentLength() throws IOException {
  
          if ( ! initConnection ) { initConnection(); } 
  
          //if ( _urlConnection instanceof HttpURLConnection ) {
  
          return  _urlConnection.getContentLength();
          
      }
      
      public String getHeaderField( String name ) {
          return  _urlConnection.getHeaderField( name );
      }
  
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: commons-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: commons-dev-help@jakarta.apache.org