You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by hu...@apache.org on 2002/08/04 20:33:51 UTC

cvs commit: xml-cocoon2/src/java/org/apache/cocoon/generation LinkStatusGenerator.java

huber       2002/08/04 11:33:51

  Modified:    src/java/org/apache/cocoon/generation
                        LinkStatusGenerator.java
  Log:
  Explictly close BufferedReader, and HttpURLConnection
  
  Revision  Changes    Path
  1.5       +170 -101  xml-cocoon2/src/java/org/apache/cocoon/generation/LinkStatusGenerator.java
  
  Index: LinkStatusGenerator.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/generation/LinkStatusGenerator.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- LinkStatusGenerator.java	2 Aug 2002 07:06:21 -0000	1.4
  +++ LinkStatusGenerator.java	4 Aug 2002 18:33:51 -0000	1.5
  @@ -1,3 +1,53 @@
  +/*
  + 
  + ============================================================================
  +                   The Apache Software License, Version 1.1
  + ============================================================================
  + 
  + Copyright (C) 1999-2002 The Apache Software Foundation. All rights reserved.
  + 
  + Redistribution and use in source and binary forms, with or without modifica-
  + tion, are permitted provided that the following conditions are met:
  + 
  + 1. Redistributions of  source code must  retain the above copyright  notice,
  +    this list of conditions and the following disclaimer.
  + 
  + 2. Redistributions in binary form must reproduce the above copyright notice,
  +    this list of conditions and the following disclaimer in the documentation
  +    and/or other materials provided with the distribution.
  + 
  + 3. The end-user documentation included with the redistribution, if any, must
  +    include  the following  acknowledgment:  "This product includes  software
  +    developed  by the  Apache Software Foundation  (http://www.apache.org/)."
  +    Alternately, this  acknowledgment may  appear in the software itself,  if
  +    and wherever such third-party acknowledgments normally appear.
  + 
  + 4. The names "Apache Cocoon" and  "Apache Software Foundation" must  not  be
  +    used to  endorse or promote  products derived from  this software without
  +    prior written permission. For written permission, please contact
  +    apache@apache.org.
  + 
  + 5. Products  derived from this software may not  be called "Apache", nor may
  +    "Apache" appear  in their name,  without prior written permission  of the
  +    Apache Software Foundation.
  + 
  + THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES,
  + INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
  + FITNESS  FOR A PARTICULAR  PURPOSE ARE  DISCLAIMED.  IN NO  EVENT SHALL  THE
  + APACHE SOFTWARE  FOUNDATION  OR ITS CONTRIBUTORS  BE LIABLE FOR  ANY DIRECT,
  + INDIRECT, INCIDENTAL, SPECIAL,  EXEMPLARY, OR CONSEQUENTIAL  DAMAGES (INCLU-
  + DING, BUT NOT LIMITED TO, PROCUREMENT  OF SUBSTITUTE GOODS OR SERVICES; LOSS
  + OF USE, DATA, OR  PROFITS; OR BUSINESS  INTERRUPTION)  HOWEVER CAUSED AND ON
  + ANY  THEORY OF LIABILITY,  WHETHER  IN CONTRACT,  STRICT LIABILITY,  OR TORT
  + (INCLUDING  NEGLIGENCE OR  OTHERWISE) ARISING IN  ANY WAY OUT OF THE  USE OF
  + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  + 
  + This software  consists of voluntary contributions made  by many individuals
  + on  behalf of the Apache Software  Foundation and was  originally created by
  + Stefano Mazzocchi  <st...@apache.org>. For more  information on the Apache
  + Software Foundation, please see <http://www.apache.org/>.
  + 
  + */
   package org.apache.cocoon.generation;
   
   import org.apache.avalon.excalibur.pool.Recyclable;
  @@ -40,23 +90,23 @@
   public class LinkStatusGenerator extends ComposerGenerator implements Recyclable, Configurable {
       /** The URI of the namespace of this generator. */
       protected static final String URI =
  -        "http://apache.org/cocoon/linkstatus/2.0";
  -
  +    "http://apache.org/cocoon/linkstatus/2.0";
  +    
       /** The namespace prefix for this namespace. */
       protected static final String PREFIX = "linkstatus";
  -
  +    
       /* Node and attribute names */
       protected static final String TOP_NODE_NAME         = "linkstatus";
       protected static final String LINK_NODE_NAME         = "link";
  -
  +    
       protected static final String HREF_ATTR_NAME    = "href";
       protected static final String REFERRER_ATTR_NAME    = "referrer";
       protected static final String CONTENT_ATTR_NAME    = "content";
       protected static final String STATUS_ATTR_NAME    = "status";
  -    protected static final String MESSAGE_ATTR_NAME    = "message";     
  -
  +    protected static final String MESSAGE_ATTR_NAME    = "message";
  +    
       protected AttributesImpl attributes = new AttributesImpl();
  -
  +    
       /**
        * Config element name specifying expected link content-typ.
        * <p>
  @@ -66,7 +116,7 @@
        * @since
        */
       public final static String LINK_CONTENT_TYPE_CONFIG = "link-content-type";
  -
  +    
       /**
        * Default value of <code>link-content-type</code> configuration value.
        * <p>
  @@ -96,7 +146,7 @@
        * @since
        */
       public final static String LINK_VIEW_QUERY_DEFAULT = "cocoon-view=links";
  -
  +    
       /**
        * Config element name specifying excluding regular expression pattern.
        * <p>
  @@ -106,7 +156,7 @@
        * @since
        */
       public final static String EXCLUDE_CONFIG = "exclude";
  -
  +    
       /**
        * Config element name specifying including regular expression pattern.
        * <p>
  @@ -116,7 +166,7 @@
        * @since
        */
       public final static String INCLUDE_CONFIG = "include";
  -
  +    
       /**
        * Config element name specifying http header value for user-Agent.
        * <p>
  @@ -133,7 +183,7 @@
        * @since
        */
       public final static String USER_AGENT_DEFAULT = Constants.COMPLETE_NAME;
  -
  +    
       /**
        * Config element name specifying http header value for accept.
        * <p>
  @@ -152,43 +202,42 @@
        * @since
        */
       public final static String ACCEPT_DEFAULT = "*/*";
  -
  +    
       private String linkViewQuery = LINK_VIEW_QUERY_DEFAULT;
       private String linkContentType = LINK_CONTENT_TYPE_DEFAULT;
       private HashSet excludeCrawlingURL;
       private HashSet includeCrawlingURL;
       private String userAgent = USER_AGENT_DEFAULT;
       private String accept = ACCEPT_DEFAULT;
  -
  +    
       private HashSet crawled;
       private HashSet linksToProcess;
  -
  +    
       /**
        * Stores links to process and the referrer links
        */
  -
       private class Link {
           private URL url;
           private String referrer;
  -
  +        
           public Link( URL url, String referrer ) {
               this.url = url;
               this.referrer = referrer;
           }
  -
  +        
           public URL getURL() {
               return url;
           }
  -
  +        
           public String getReferrer() {
               return referrer;
           }
  -
  +        
           public boolean equals( Link l ) {
               return url.equals( l.getURL());
           }
       }
  -
  +    
       /**
        * Configure the crawler component.
        * <p>
  @@ -214,8 +263,8 @@
        * @since
        */
       public void configure(Configuration configuration)
  -        throws ConfigurationException {
  -
  +    throws ConfigurationException {
  +        
           Configuration[] children;
           children = configuration.getChildren(INCLUDE_CONFIG);
           if (children != null && children.length > 0) {
  @@ -229,12 +278,12 @@
                           this.includeCrawlingURL.add(new RE(tokenized_pattern));
                       }
                   } catch (RESyntaxException rese) {
  -                    getLogger().error("Cannot create includeing regular-expression for " + 
  -                                      pattern, rese);
  +                    getLogger().error("Cannot create including regular-expression for " +
  +                    pattern, rese);
                   }
               }
           }
  -
  +        
           children = configuration.getChildren(EXCLUDE_CONFIG);
           if (children != null && children.length > 0) {
               excludeCrawlingURL = new HashSet();
  @@ -247,15 +296,15 @@
                           this.excludeCrawlingURL.add(new RE(tokenized_pattern));
                       }
                   } catch (RESyntaxException rese) {
  -                    getLogger().error("Cannot create excluding regular-expression for " + 
  -                                      pattern, rese);
  +                    getLogger().error("Cannot create excluding regular-expression for " +
  +                    pattern, rese);
                   }
               }
           } else {
               excludeCrawlingURL = new HashSet();
               setDefaultExcludeFromCrawling();
           }
  -
  +        
           Configuration child;
           String value;
           child = configuration.getChild(LINK_CONTENT_TYPE_CONFIG, false);
  @@ -272,7 +321,7 @@
                   this.linkViewQuery = value.trim();
               }
           }
  -
  +        
           child = configuration.getChild(USER_AGENT_CONFIG, false);
           if (child != null) {
               value = child.getValue();
  @@ -280,7 +329,7 @@
                   this.userAgent = value;
               }
           }
  -
  +        
           child = configuration.getChild(ACCEPT_CONFIG, false);
           if (child != null) {
               value = child.getValue();
  @@ -289,20 +338,20 @@
               }
           }
       }
  -
  +    
       public void setup(SourceResolver resolver, Map objectModel, String src, Parameters par)
  -        throws ProcessingException, SAXException, IOException {
  -
  +    throws ProcessingException, SAXException, IOException {
  +        
           super.setup(resolver, objectModel, src, par);
  -
  +        
           /* Create a reusable attributes for creating nodes */
           this.attributes = new AttributesImpl();
  -
  +        
           // already done in configure...
           //excludeCrawlingURL = new HashSet();
           //this.setDefaultExcludeFromCrawling();
       }
  -
  +    
       /**
        * Generate XML data.
        *
  @@ -312,42 +361,42 @@
        *      if the requsted URI wasn't found
        */
       public void generate()
  -        throws SAXException, ProcessingException {
  +    throws SAXException, ProcessingException {
           try {
  -
  +            
               crawled = new HashSet();
               linksToProcess = new HashSet();
  -
  +            
               URL root = new URL(source);
               linksToProcess.add(new Link( root, ""));
  -
  -
  +            
  +            
               if (getLogger().isDebugEnabled()) {
                   getLogger().debug("crawl URL " + root);
               }
  -
  +            
               this.contentHandler.startDocument();
               this.contentHandler.startPrefixMapping(PREFIX,URI);
  -
  +            
               attributes.clear();
               super.contentHandler.startElement(URI, TOP_NODE_NAME, URI+':'+TOP_NODE_NAME, attributes);
  -
  +            
               while (linksToProcess.size() > 0) {
                   Iterator i = linksToProcess.iterator();
  -
  +                
                   if (i.hasNext()) {
                       // fetch a URL
                       Link link = (Link) i.next();
  -                    URL url = link.getURL();            
  -
  +                    URL url = link.getURL();
  +                    
                       // remove it from the to-do list
                       linksToProcess.remove(link);
  -
  +                    
                       String new_url_link = processURL(url, link.getReferrer());
  -
  +                    
                       // calc all links from this url
                       if (new_url_link != null) {
  -
  +                        
                           List url_links = getLinksFromConnection(new_url_link, url);
                           if (url_links != null) {
                               // add links of this url to the to-do list
  @@ -356,7 +405,7 @@
                       }
                   }
               }
  -
  +            
               super.contentHandler.endElement(URI, TOP_NODE_NAME, URI+':'+TOP_NODE_NAME);
               this.contentHandler.endPrefixMapping(PREFIX);
               this.contentHandler.endDocument();
  @@ -365,7 +414,7 @@
               throw new ResourceNotFoundException("Could not read source ", ioe);
           }
       }
  -
  +    
       /**
        * Default exclude patterns.
        * <p>
  @@ -389,19 +438,19 @@
               ".*\\.js(\\?.*)?$",
               ".*\\.css(\\?.*)?$"
           };
  -
  +        
           for (int i = 0; i < EXCLUDE_FROM_CRAWLING_DEFAULT.length; i++) {
               String pattern = EXCLUDE_FROM_CRAWLING_DEFAULT[i];
               try {
                   excludeCrawlingURL.add(new RE(pattern));
               } catch (RESyntaxException rese) {
                   getLogger().error("Cannot create excluding regular-expression for " +
  -                                  pattern, rese);
  +                pattern, rese);
               }
           }
       }
  -
  -
  +    
  +    
       /**
        * Retrieve a list of links of a url
        *
  @@ -410,31 +459,38 @@
        *   <code>http://host/foo/bar?cocoon-view=links</code>
        * @param url_of_referrer base url of which links are requested, ie of the form
        *   <code>http://host/foo/bar</code>
  -     * @return List of links from url_of_referrer, as result of requesting url 
  +     * @return List of links from url_of_referrer, as result of requesting url
        *   url_link_string
        */
       protected List getLinksFromConnection(String url_link_string, URL url_of_referrer) {
           List url_links = null;
  +        BufferedReader br = null;
           try {
               URL url_link = new URL( url_link_string );
               URLConnection conn = url_link.openConnection();
               String content_type = conn.getContentType();
  -
  +            
  +            if (content_type == null) {
  +                getLogger().warn( "No content type available for " + String.valueOf( url_link_string ) );
  +                // caller checks if null
  +                return url_links;
  +            }
  +            
               if (getLogger().isDebugEnabled()) {
                   getLogger().debug("Content-type: " + content_type);
               }
  -
  +            
               if (content_type.equals(linkContentType)) {
                   url_links = new ArrayList();
  -
  +                
                   InputStream is = conn.getInputStream();
  -                BufferedReader br = new BufferedReader(new InputStreamReader(is));
  -
  +                br = new BufferedReader(new InputStreamReader(is));
  +                
                   // content is supposed to be a list of links,
                   // relative to current URL
                   String line;
                   String referrer = url_of_referrer.toString();
  -
  +                
                   while ((line = br.readLine()) != null) {
                       URL new_url = new URL(url_link, line);
                       boolean add_url = true;
  @@ -442,22 +498,22 @@
                       if (add_url) {
                           add_url &= !url_links.contains(new_url);
                       }
  -
  +                    
                       // don't add new_url if it has been crawled already
                       if (add_url) {
                           add_url &= !crawled.contains(new_url.toString());
                       }
  -
  +                    
                       Link new_link = new Link( new_url, referrer );
                       if (add_url) {
                           add_url &= !linksToProcess.contains(new_link);
                       }
  -
  +                    
                       // don't add if is not matched by existing include definition
                       if (add_url) {
                           add_url &= isIncludedURL(new_url.toString());
                       }
  -
  +                    
                       if (add_url) {
                           if (getLogger().isDebugEnabled()) {
                               getLogger().debug("Add URL: " + new_url.toString());
  @@ -469,10 +525,18 @@
               }
           } catch (IOException ioe) {
               getLogger().warn("Problems get links of " + url_link_string, ioe);
  +        } finally {
  +            // explictly close the stream
  +            if (br != null) {
  +                try {
  +                    br.close();
  +                    br = null;
  +                } catch (IOException ignored) {}
  +            }
           }
           return url_links;
       }
  -
  +    
       /**
        * Generate xml attributes of a url, calculate url for retrieving links
        *
  @@ -482,57 +546,63 @@
        *   and not an included-url.
        */
       protected String processURL(URL url, String referrer) throws SAXException {
  -
  +        
           if (getLogger().isDebugEnabled()) {
               getLogger().debug("getLinks URL " + url);
           }
  -
  +        
           String result = null;
  -
  +        
           // don't try to investigate a url which has been crawled already
           if (crawled.contains(url.toString())) {
               return null;
           }
  -
  +        
           // mark it as crawled
           crawled.add(url.toString());
  -
  +        
           attributes.clear();
           attributes.addAttribute("", HREF_ATTR_NAME,
  -                                HREF_ATTR_NAME, "CDATA", url.toString());
  +        HREF_ATTR_NAME, "CDATA", url.toString());
           attributes.addAttribute("", REFERRER_ATTR_NAME,
  -                                REFERRER_ATTR_NAME, "CDATA", referrer);
  -
  +        REFERRER_ATTR_NAME, "CDATA", referrer);
  +        
           // Output url, referrer, content-type, status, message for traversable url's
  +        HttpURLConnection h = null;
           try {
  +            
               URLConnection links_url_connection = url.openConnection();
  -            HttpURLConnection h = (HttpURLConnection)links_url_connection;
  +            h = (HttpURLConnection)links_url_connection;
               String content_type = links_url_connection.getContentType();
  -
  +            
               attributes.addAttribute("", CONTENT_ATTR_NAME,
  -                                    CONTENT_ATTR_NAME, "CDATA",
  -                                    content_type);
  -
  +            CONTENT_ATTR_NAME, "CDATA",
  +            content_type);
  +            
               attributes.addAttribute("", MESSAGE_ATTR_NAME,
  -                                    MESSAGE_ATTR_NAME, "CDATA",
  -                                    h.getResponseMessage());
  -
  +            MESSAGE_ATTR_NAME, "CDATA",
  +            h.getResponseMessage());
  +            
               attributes.addAttribute("", STATUS_ATTR_NAME,
  -                                    STATUS_ATTR_NAME, "CDATA",
  -                                    String.valueOf(h.getResponseCode()));
  +            STATUS_ATTR_NAME, "CDATA",
  +            String.valueOf(h.getResponseCode()));
           } catch (IOException ioe) {
               attributes.addAttribute("", MESSAGE_ATTR_NAME,
  -                                        MESSAGE_ATTR_NAME, "CDATA",
  -                                        ioe.getMessage());
  +            MESSAGE_ATTR_NAME, "CDATA",
  +            ioe.getMessage());
  +        } finally {
  +            if (h != null) {
  +                h.disconnect();
  +            }
           }
  -                                
  +        
           // don't try to get links of a url which is excluded from crawling
  -        // try to get links of a url which is included for crawling 
  +        // try to get links of a url which is included for crawling
           if (!isExcludedURL(url.toString()) && isIncludedURL( url.toString() )) {
               // add prefix and query to get data from the linkserializer.
               result = url.toExternalForm()
  -                + ((url.toExternalForm().indexOf("?") == -1) ? "?" : "&") 
  -                + linkViewQuery;
  +            + ((url.toExternalForm().indexOf("?") == -1) ? "?" : "&")
  +            + linkViewQuery;
           }
           
           super.contentHandler.startElement(URI, LINK_NODE_NAME, URI+':'+LINK_NODE_NAME, attributes);
  @@ -540,7 +610,7 @@
           
           return result;
       }
  -
  +    
       /**
        * check if URL is a candidate for indexing
        *
  @@ -556,7 +626,7 @@
               }
               return false;
           }
  -
  +        
           final String s = url.toString();
           Iterator i = excludeCrawlingURL.iterator();
           while (i.hasNext()) {
  @@ -573,8 +643,8 @@
           }
           return false;
       }
  -
  -
  +    
  +    
       /**
        * check if URL is a candidate for indexing
        *
  @@ -590,7 +660,7 @@
               }
               return true;
           }
  -
  +        
           final String s = url.toString();
           Iterator i = includeCrawlingURL.iterator();
           while (i.hasNext()) {
  @@ -607,12 +677,11 @@
           }
           return false;
       }
  -
  +    
       public void recycle() {
           super.recycle();
  -
  +        
           this.attributes = null;
           //this.excludeCrawlingURL = null;
       }
   }
  -
  
  
  

----------------------------------------------------------------------
In case of troubles, e-mail:     webmaster@xml.apache.org
To unsubscribe, e-mail:          cocoon-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: cocoon-cvs-help@xml.apache.org