You are viewing a plain text version of this content. The canonical link for it is here.
Posted to cvs@cocoon.apache.org by up...@apache.org on 2003/10/06 14:40:14 UTC

cvs commit: cocoon-2.1/src/java/org/apache/cocoon/bean/helpers OutputStreamListener.java

upayavira    2003/10/06 05:40:14

  Modified:    src/java/org/apache/cocoon/bean CocoonBean.java
               src/java/org/apache/cocoon/bean/helpers
                        OutputStreamListener.java
  Log:
  Moved crawling code into a separate crawler class. This has made it possible to implement all of my proposed reporting options. The CLI now reports:
  * [no of pages generated/no of pages left] [new links in page/total pages in links] page URI
  
  Revision  Changes    Path
  1.32      +52 -75    cocoon-2.1/src/java/org/apache/cocoon/bean/CocoonBean.java
  
  Index: CocoonBean.java
  ===================================================================
  RCS file: /home/cvs/cocoon-2.1/src/java/org/apache/cocoon/bean/CocoonBean.java,v
  retrieving revision 1.31
  retrieving revision 1.32
  diff -u -r1.31 -r1.32
  --- CocoonBean.java	1 Oct 2003 20:27:49 -0000	1.31
  +++ CocoonBean.java	6 Oct 2003 12:40:14 -0000	1.32
  @@ -53,6 +53,7 @@
   import org.apache.cocoon.Constants;
   import org.apache.cocoon.ResourceNotFoundException;
   import org.apache.cocoon.ProcessingException;
  +import org.apache.cocoon.bean.helpers.Crawler;
   import org.apache.cocoon.bean.helpers.DelayedOutputStream;
   import org.apache.cocoon.components.notification.SimpleNotifyingBean;
   import org.apache.cocoon.components.notification.Notifier;
  @@ -95,7 +96,6 @@
       private boolean precompileOnly = false;
       private boolean confirmExtension = true;
       private String defaultFilename = Constants.INDEX_URI;
  -    private List targets = new ArrayList();
       private boolean brokenLinkGenerate = false;
       private String brokenLinkExtension = "";
       private List excludePatterns = new ArrayList();
  @@ -103,12 +103,15 @@
       private List includeLinkExtensions = null;
       
       // Internal Objects
  -    private Map allProcessedLinks;
  -    private Map allTranslatedLinks;
       private boolean initialized;
       private List listeners = new ArrayList();
       private boolean verbose;
       SourceResolver sourceResolver;
  +    private Crawler crawler;    
  +
  +    public CocoonBean() {
  +        this.crawler = new Crawler();
  +    }
       
       //
       // INITIALISATION METHOD
  @@ -118,7 +121,7 @@
           if (this.initialized == false) {
               super.initialize();
   
  -            if (targets.size() == 0 && !precompileOnly) {
  +            if (crawler.getRemainingCount() == 0 && !precompileOnly) {
                   String error = "Please, specify at least one starting URI.";
                   log.fatalError(error);
                   throw new ProcessingException(error);
  @@ -196,7 +199,7 @@
           target.setFollowLinks(this.followLinks);
           target.setConfirmExtension(this.confirmExtension);
           target.setLogger(this.logger);
  -        targets.add(target);
  +        crawler.addTarget(target);
       }
   
       public void addTarget(String type, String sourceURI, String destURI)
  @@ -206,7 +209,7 @@
           target.setFollowLinks(this.followLinks);
           target.setConfirmExtension(this.confirmExtension);
           target.setLogger(this.logger);
  -        targets.add(target);
  +        crawler.addTarget(target);
       }
   
       public void addTarget(String sourceURI, String destURI)
  @@ -216,7 +219,7 @@
           target.setFollowLinks(this.followLinks);
           target.setConfirmExtension(this.confirmExtension);
           target.setLogger(this.logger);
  -        targets.add(target);
  +        crawler.addTarget(target);
       }
   
       public void addTargets(List uris, String destURI)
  @@ -228,7 +231,7 @@
               target.setFollowLinks(this.followLinks);
               target.setConfirmExtension(this.confirmExtension);
               target.setLogger(this.logger);
  -            targets.add(target);
  +            crawler.addTarget(target);
           }
       }
   
  @@ -254,7 +257,7 @@
           target.setFollowLinks(followLinks);
           target.setConfirmExtension(confirmExtension);
           target.setLogger(logger);
  -        targets.add(target);
  +        crawler.addTarget(target);
       }
   
       public void addExcludePattern(String pattern) {
  @@ -351,60 +354,34 @@
               this.initialize();
           }
   
  -        allProcessedLinks = new HashMap();
  -        allTranslatedLinks = new HashMap();
  -
  -        Map targetMap = new HashMap();
  -        Iterator i = targets.iterator();
  -        while (i.hasNext()) {
  -            Target target = (Target) i.next();
  -            targetMap.put(target, target);
  -        }
  -
  -        int nCount = 0;
  -        while (targetMap.size() > 0) {
  -            Target target = (Target) targetMap.keySet().iterator().next();
  -            try {
  -                if (!allProcessedLinks.containsKey(target)) {
  -                    if (precompileOnly) {
  -                        processXSP(target.getSourceURI());
  -                    } else if (this.followLinks) {
  -                        i = processTarget(target).iterator();
  -                        while (i.hasNext()) {
  -                            Target link = (Target) i.next();
  -                            targetMap.put(link, link);
  -                        }
  -                    } else {
  -                        processTarget(target);
  -                    }
  +        if (crawler.getRemainingCount()==0) {
  +            super.precompile();
  +        } else {
  +            Iterator iterator = crawler.iterator();
  +            while (iterator.hasNext()) {
  +                Target target = (Target) iterator.next();
  +                if (precompileOnly) {
  +                    processXSP(target.getSourceURI());
  +                } else {
  +                    processTarget(crawler, target);
                   }
  -            } catch (ResourceNotFoundException rnfe) {
  -                this.sendBrokenLinkWarning(target.getSourceURI(), rnfe.getMessage());
               }
  -
  -            targetMap.remove(target);
  -            nCount++;
  -
  -            if (log.isInfoEnabled()) {
  -                log.info(
  -                    "  Memory used: "
  -                        + (Runtime.getRuntime().totalMemory()
  -                            - Runtime.getRuntime().freeMemory()));
  -                log.info(
  -                    "  Processed, Translated & Left: "
  -                        + allProcessedLinks.size()
  -                        + ", "
  -                        + allTranslatedLinks.size()
  -                        + ", "
  -                        + targetMap.size());
  -            }
  -        }
  -
  -        if (nCount == 0) {
  -            super.precompile();
           }
  +        if (log.isInfoEnabled()) {
  +              log.info(
  +                  "  Memory used: "
  +                      + (Runtime.getRuntime().totalMemory()
  +                          - Runtime.getRuntime().freeMemory()));
  +              log.info(
  +                  "  Processed, Translated & Left: "
  +                      + crawler.getProcessedCount()
  +                      + ", "
  +                      + crawler.getTranslatedCount()
  +                      + ", "
  +                      + crawler.getRemainingCount());
  +          }
       }
  -
  +    
       /**
        * Processes the given Target and return all links.
        *
  @@ -439,25 +416,22 @@
        * Target objects.
        * @exception Exception if an error occurs
        */
  -    private Collection processTarget(Target target) throws Exception {
  +    private void processTarget(Crawler crawler, Target target) throws Exception {
   
           int status = 0;
           
           int linkCount = 0;
           int newLinkCount = 0;
           int pageSize = 0;
  -        
           long startTimeMillis = System.currentTimeMillis();
   
           if (target.confirmExtensions()) {
  -            if (null == allTranslatedLinks.get(target.getSourceURI())) {
  +            if (!crawler.hasTranslatedLink(target)) {
                   final String mimeType = getType(target.getDeparameterizedSourceURI(), target.getParameters());
                   target.setMimeType(mimeType);
  -                allTranslatedLinks.put(target.getSourceURI(), target.getDestinationURI());
  +                crawler.addTranslatedLink(target);
               }
           }
  -        // Store processed URI list to avoid eternal loop
  -        allProcessedLinks.put(target, target);
   
           // IS THIS STILL NEEDED?
           //if ("".equals(destinationURI)) {
  @@ -466,7 +440,6 @@
   
           // Process links
           final HashMap translatedLinks = new HashMap();
  -        final List targets = new ArrayList();
           if (target.followLinks() && target.confirmExtensions() && isCrawlablePage(target)) {
               final Iterator i =
                   this.getLinks(target.getDeparameterizedSourceURI(), target.getParameters()).iterator();
  @@ -485,18 +458,22 @@
                       continue;
                   }
   
  -                if (null == allTranslatedLinks.get(linkTarget.getSourceURI())) {
  +                if (!crawler.hasTranslatedLink(linkTarget)) {
                       try {
                           final String mimeType = 
                                   getType(linkTarget.getDeparameterizedSourceURI(), linkTarget.getParameters());
                           linkTarget.setMimeType(mimeType);
  -                        allTranslatedLinks.put(linkTarget.getSourceURI(), linkTarget.getDestinationURI());
  +                        crawler.addTranslatedLink(linkTarget);
                           log.info("  Link translated: " + linkTarget.getSourceURI());
  -                        targets.add(linkTarget);
  +                        if (crawler.addTarget(linkTarget)) {
  +                            newLinkCount++;
  +                        }
                       } catch (ProcessingException pe) {
                           this.sendBrokenLinkWarning(linkTarget.getSourceURI(), pe.getMessage());
                           if (this.brokenLinkGenerate) {
  -                           targets.add(linkTarget);
  +                           if (crawler.addTarget(linkTarget)) {
  +                               newLinkCount++;
  +                           }
                           }
                       }
                   }
  @@ -546,7 +523,9 @@
                               pageSkipped(linkTarget.getSourceURI(), "matched include/exclude rules");
                               continue;
                           }
  -                        targets.add(linkTarget);
  +                        if (crawler.addTarget(linkTarget)) {
  +                            newLinkCount++;
  +                        }
                       }
                       linkCount = gatheredLinks.size();
                   }
  @@ -573,8 +552,8 @@
                                         pageSize,
                                         linkCount,
                                         newLinkCount,
  -                                      0, //pagesRemaining,  @TODO@ Implement this
  -                                      0, //pagesComplete,   @TODO@ Implement this
  +                                      crawler.getRemainingCount(),
  +                                      crawler.getProcessedCount(),
                                         System.currentTimeMillis()- startTimeMillis);
   
                       } catch (IOException ioex) {
  @@ -588,8 +567,6 @@
               log.warn("Could not process URI: " + target.getSourceURI());
               this.sendBrokenLinkWarning(target.getSourceURI(), "URI not found");
           }
  -
  -        return targets;
       }
   
       /**
  
  
  
  1.5       +16 -5     cocoon-2.1/src/java/org/apache/cocoon/bean/helpers/OutputStreamListener.java
  
  Index: OutputStreamListener.java
  ===================================================================
  RCS file: /home/cvs/cocoon-2.1/src/java/org/apache/cocoon/bean/helpers/OutputStreamListener.java,v
  retrieving revision 1.4
  retrieving revision 1.5
  diff -u -r1.4 -r1.5
  --- OutputStreamListener.java	27 Sep 2003 09:50:29 -0000	1.4
  +++ OutputStreamListener.java	6 Oct 2003 12:40:14 -0000	1.5
  @@ -75,6 +75,8 @@
       private final long startTimeMillis;    
       private String reportFile = null;
       private String reportType = "text";
  +    private long siteSize = 0L;
  +    private int sitePages = 0;
      
       public OutputStreamListener(OutputStream os) {
           writer = new PrintWriter(os);
  @@ -97,6 +99,9 @@
                                 int pagesRemaining, 
                                 int pagesComplete, 
                                 long timeTaken) {
  +        this.siteSize += pageSize;
  +        this.sitePages++;
  +        
           double time = (((double)timeTaken)/1000);
           
           String size;
  @@ -109,7 +114,8 @@
           if (linksInPage == -1) {
               this.print("* " + sourceURI);
           } else {
  -            this.print(pad(8, "* ["+linksInPage + "] ") +
  +            this.print(pad(12, "* [" + pagesComplete + "/" + pagesRemaining + "] ") + 
  +                       pad(10, "[" + newLinksInPage + "/" + linksInPage + "] ") +
                          pad(7,time + "s ") +
                          pad(7, size) + " " +
                          sourceURI);
  @@ -125,7 +131,7 @@
       }
   
       public void brokenLinkFound(String uri, String parentURI, String message, Throwable t) {
  -        this.print(pad(28,"X [0] ")+uri+"\tBROKEN: "+message);
  +        this.print(pad(42,"X [0] ")+uri+"\tBROKEN: "+message);
           brokenLinks.add(uri + "\t" + message);
           
   //            StringWriter sw = new StringWriter();
  @@ -135,14 +141,19 @@
       }
   
       public void pageSkipped(String uri, String message) {
  -        this.print(pad(23, "^ ") + uri);
  +        this.print(pad(37, "^ ") + uri);
       }
       
       public void complete() {
           outputBrokenLinks();
   
           long duration = System.currentTimeMillis() - startTimeMillis;
  -        this.print("Total time: " + (duration / 60000) + " minutes " + (duration % 60000)/1000 + " seconds");
  +        
  +        this.print("Total time: " + 
  +                   (duration / 60000) + " minutes " + 
  +                   (duration % 60000)/1000 + " seconds, " + 
  +                   " Site size: " + this.siteSize +
  +                   " Site pages: " + this.sitePages);
           this.close();
       }