You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by hu...@apache.org on 2002/06/30 18:36:11 UTC

cvs commit: xml-cocoon2/src/java/org/apache/cocoon/generation LinkStatusGenerator.java

huber       2002/06/30 09:36:11

  Modified:    src/java/org/apache/cocoon/generation
                        LinkStatusGenerator.java
  Log:
  Added more javadoc comments, fixed generating attribute values of url, not of
  url built for requesting its links
  
  Revision  Changes    Path
  1.2       +68 -78    xml-cocoon2/src/java/org/apache/cocoon/generation/LinkStatusGenerator.java
  
  Index: LinkStatusGenerator.java
  ===================================================================
  RCS file: /home/cvs/xml-cocoon2/src/java/org/apache/cocoon/generation/LinkStatusGenerator.java,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- LinkStatusGenerator.java	14 Jun 2002 16:19:14 -0000	1.1
  +++ LinkStatusGenerator.java	30 Jun 2002 16:36:11 -0000	1.2
  @@ -35,7 +35,8 @@
    *
    * @author Michael Homeijer
    * @author Nicola Ken Barozzi (nicolaken@apache.org)
  -*/
  + * @author Bernhard Huber (huber@apache.org)
  + */
   
   public class LinkStatusGenerator extends ComposerGenerator implements Recyclable, Configurable {
       /** The URI of the namespace of this generator. */
  @@ -128,10 +129,8 @@
       public final static String USER_AGENT_CONFIG = "user-agent";
       /**
        * Default value of <code>user-agent</code> configuration value.
  -     * <p>
  -     *   Its value is @see org.apache.cocoon.Constants#COMPLETE_NAME.
  -     * </p>
        *
  +     * @see org.apache.cocoon.Constants#COMPLETE_NAME
        * @since
        */
       public final static String USER_AGENT_DEFAULT = Constants.COMPLETE_NAME;
  @@ -203,10 +202,12 @@
        *  query-string appended to each crawling request.
        * </p>
        * <pre><tt>
  -     * &lt;include&gt;.*\.html?&lt;/exclude&gt; or &lt;exclude&gt;.*\.html?, .*\.xsp&lt;/exclude&gt;
  +     * &lt;include&gt;.*\.html?&lt;/include&gt; or &lt;include&gt;.*\.html?, .*\.xsp&lt;/include&gt;
        * &lt;exclude&gt;.*\.gif&lt;/exclude&gt; or &lt;exclude&gt;.*\.gif, .*\.jpe?g&lt;/exclude&gt;
        * &lt;link-content-type&gt; application/x-cocoon-links &lt;/link-content-type&gt;
        * &lt;link-view-query&gt; ?cocoon-view=links &lt;/link-view-query&gt;
  +     * &lt;user-agent&gt; Cocoon &lt;/user-agent&gt;
  +     * &lt;accept&gt; text/xml &lt;/accept&gt;
        * </tt></pre>
        *
        * @param  configuration               XML configuration of this avalon component.
  @@ -298,8 +299,9 @@
           /* Create a reusable attributes for creating nodes */
           this.attributes = new AttributesImpl();
   
  -        excludeCrawlingURL = new HashSet();
  -        this.setDefaultExcludeFromCrawling();
  +        // already done in configure...
  +        //excludeCrawlingURL = new HashSet();
  +        //this.setDefaultExcludeFromCrawling();
       }
   
       /**
  @@ -342,12 +344,12 @@
                       // remove it from the to-do list
                       linksToProcess.remove(link);
   
  -                    URLConnection conn = processURL(url, link.getReferrer());
  +                    String new_url_link = processURL(url, link.getReferrer());
   
                       // calc all links from this url
  -                    if (conn != null) {
  +                    if (new_url_link != null) {
   
  -                        List url_links = getLinksFromConnection(conn, url);
  +                        List url_links = getLinksFromConnection(new_url_link, url);
                           if (url_links != null) {
                               // add links of this url to the to-do list
                               linksToProcess.addAll(url_links);
  @@ -401,9 +403,22 @@
       }
   
   
  -    protected List getLinksFromConnection(URLConnection conn, URL url) {
  +    /**
  +     * Retrieve a list of links of a url
  +     *
  +     * @param url_link_string url for requesting links, it is assumed that
  +     *   url_link_string queries the cocoon view links, ie of the form
  +     *   <code>http://host/foo/bar?cocoon-view=links</code>
  +     * @param url_of_referrer base url of which links are requested, ie of the form
  +     *   <code>http://host/foo/bar</code>
  +     * @return List of links from url_of_referrer, as result of requesting url 
  +     *   url_link_string
  +     */
  +    protected List getLinksFromConnection(String url_link_string, URL url_of_referrer) {
           List url_links = null;
           try {
  +            URL url_link = new URL( url_link_string );
  +            URLConnection conn = url_link.openConnection();
               String content_type = conn.getContentType();
   
               if (getLogger().isDebugEnabled()) {
  @@ -419,7 +434,7 @@
                   // content is supposed to be a list of links,
                   // relative to current URL
                   String line;
  -                String referrer = url.toString();
  +                String referrer = url_of_referrer.toString();
   
                   while ((line = br.readLine()) != null) {
                       URL new_url = new URL(url, line);
  @@ -459,15 +474,23 @@
           return url_links;
       }
   
  -    protected URLConnection processURL(URL url, String referrer) throws SAXException {
  +    /**
  +     * Generate xml attributes of a url, calculate url for retrieving links
  +     *
  +     * @param url to process
  +     * @param referrer of the url
  +     * @return String url for retrieving links, or null if url is an excluded-url,
  +     *   and not an included-url.
  +     */
  +    protected String processURL(URL url, String referrer) throws SAXException {
   
           if (getLogger().isDebugEnabled()) {
               getLogger().debug("getLinks URL " + url);
           }
   
  -        URLConnection result = null;
  +        String result = null;
   
  -        // don't try to investigate url which has been crawled already
  +        // don't try to investigate a url which has been crawled already
           if (crawled.contains(url.toString())) {
               return null;
           }
  @@ -481,71 +504,38 @@
           attributes.addAttribute("", REFERRER_ATTR_NAME,
                                   REFERRER_ATTR_NAME, "CDATA", referrer);
   
  -        // don't try to get links for url which is excluded from crawling
  -        if (isExcludedURL(url.toString())) {
  -            // Check for status and output it.
  -
  -            try {
  -                URLConnection links_url_connection = url.openConnection();
  -                HttpURLConnection h = (HttpURLConnection)links_url_connection;
  -                String content_type = links_url_connection.getContentType();
  -
  -                attributes.addAttribute("", CONTENT_ATTR_NAME,
  -                                        CONTENT_ATTR_NAME, "CDATA",
  -                                        content_type);
  -
  -                attributes.addAttribute("", MESSAGE_ATTR_NAME,
  -                                        MESSAGE_ATTR_NAME, "CDATA",
  -                                        h.getResponseMessage());
  -
  -                attributes.addAttribute("", STATUS_ATTR_NAME,
  -                                        STATUS_ATTR_NAME, "CDATA",
  -                                        String.valueOf(h.getResponseCode()));
  -
  -
  -
  -            }
  -            catch (IOException ioe)
  -                {
  -                    attributes.addAttribute("", MESSAGE_ATTR_NAME,
  -                                            MESSAGE_ATTR_NAME, "CDATA",
  -                                            ioe.getMessage());
  -                }
  -                
  -        } else {
  -
  -            // Output url, referrer, content-type, status, message for traversable url's
  -            // add prefix and query to get data from the linkserializer.        
  -            try {
  -                URL links_url = new URL(url.toExternalForm()
  -                                        + ((url.toExternalForm().indexOf("?") == -1) ? "?" : "&") 
  -                                        + linkViewQuery);
  -                URLConnection links_url_connection = links_url.openConnection();
  -                HttpURLConnection h = (HttpURLConnection)links_url_connection;
  -
  -                result = links_url_connection;
  -
  -                attributes.addAttribute("", CONTENT_ATTR_NAME,
  -                                        CONTENT_ATTR_NAME, "CDATA",
  -                                        links_url_connection.getContentType());
  -
  -                attributes.addAttribute("", MESSAGE_ATTR_NAME,
  -                                        MESSAGE_ATTR_NAME, "CDATA",
  -                                        h.getResponseMessage());
  -
  -                attributes.addAttribute("", STATUS_ATTR_NAME,
  -                                        STATUS_ATTR_NAME, "CDATA",
  -                                        String.valueOf(h.getResponseCode()));
  -            }
  -            catch(IOException ioe ) {
  -                // Output url referrer status message
  -                attributes.addAttribute("", MESSAGE_ATTR_NAME,
  +        // Output url, referrer, content-type, status, message for traversable url's
  +        try {
  +            URLConnection links_url_connection = url.openConnection();
  +            HttpURLConnection h = (HttpURLConnection)links_url_connection;
  +            String content_type = links_url_connection.getContentType();
  +
  +            attributes.addAttribute("", CONTENT_ATTR_NAME,
  +                                    CONTENT_ATTR_NAME, "CDATA",
  +                                    content_type);
  +
  +            attributes.addAttribute("", MESSAGE_ATTR_NAME,
  +                                    MESSAGE_ATTR_NAME, "CDATA",
  +                                    h.getResponseMessage());
  +
  +            attributes.addAttribute("", STATUS_ATTR_NAME,
  +                                    STATUS_ATTR_NAME, "CDATA",
  +                                    String.valueOf(h.getResponseCode()));
  +        } catch (IOException ioe) {
  +            attributes.addAttribute("", MESSAGE_ATTR_NAME,
                                           MESSAGE_ATTR_NAME, "CDATA",
                                           ioe.getMessage());
  -
  -        
  -            }
           }
  +                                
  +        // don't try to get links of a url which is excluded from crawling
  +        // try to get links of a url which is included for crawling 
  +        if (!isExcludedURL(url.toString()) && isIncludedURL( url.toString() )) {
  +            // add prefix and query to get data from the linkserializer.
  +            result = url.toExternalForm()
  +                + ((url.toExternalForm().indexOf("?") == -1) ? "?" : "&") 
  +                + linkViewQuery;
  +        }
  +        
           super.contentHandler.startElement(URI, LINK_NODE_NAME, URI+':'+LINK_NODE_NAME, attributes);
           super.contentHandler.endElement(URI, LINK_NODE_NAME, URI+':'+LINK_NODE_NAME);
           
  @@ -623,7 +613,7 @@
           super.recycle();
   
           this.attributes = null;
  -        this.excludeCrawlingURL = null;
  +        //this.excludeCrawlingURL = null;
       }
   }
   
  
  
  

----------------------------------------------------------------------
In case of troubles, e-mail:     webmaster@xml.apache.org
To unsubscribe, e-mail:          cocoon-cvs-unsubscribe@xml.apache.org
For additional commands, e-mail: cocoon-cvs-help@xml.apache.org