You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@maven.apache.org by bw...@apache.org on 2003/12/07 00:17:44 UTC

cvs commit: maven-plugins/linkcheck/src/main/org/apache/maven/linkcheck LinkCheckCli.java FileToCheck.java LinkCheck.java

bwalding    2003/12/06 15:17:44

  Modified:    linkcheck/src/main/org/apache/maven/linkcheck
                        FileToCheck.java LinkCheck.java
  Added:       linkcheck/src/main/org/apache/maven/linkcheck
                        LinkCheckCli.java
  Log:
  Split things out into marginally better methods.  Resolved at least one
  resource leak.
  PR: MPLINKCHECK-6
  
  Revision  Changes    Path
  1.14      +115 -72   maven-plugins/linkcheck/src/main/org/apache/maven/linkcheck/FileToCheck.java
  
  Index: FileToCheck.java
  ===================================================================
  RCS file: /home/cvs/maven-plugins/linkcheck/src/main/org/apache/maven/linkcheck/FileToCheck.java,v
  retrieving revision 1.13
  retrieving revision 1.14
  diff -u -r1.13 -r1.14
  --- FileToCheck.java	26 Oct 2003 22:49:40 -0000	1.13
  +++ FileToCheck.java	6 Dec 2003 23:17:44 -0000	1.14
  @@ -59,12 +59,15 @@
   import java.io.ByteArrayOutputStream;
   import java.io.File;
   import java.io.FileInputStream;
  +import java.io.FileNotFoundException;
  +import java.io.InputStream;
  +import java.io.OutputStream;
   import java.io.PrintWriter;
   import java.util.ArrayList;
  -import java.util.HashMap;
   import java.util.Iterator;
   import java.util.List;
  -import java.util.Map;
  +import java.util.Set;
  +import java.util.TreeSet;
   
   import org.apache.commons.logging.Log;
   import org.apache.commons.logging.LogFactory;
  @@ -79,7 +82,6 @@
   /**
    * @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
    * @version $Id$
  - *
    */
   public class FileToCheck
   {
  @@ -88,24 +90,29 @@
        */
       private static Log LOG = LogFactory.getLog(FileToCheck.class);
   
  -    private File base;
  +    private String base;
       private File fileToCheck;
       private String status = STATUS_OK;
       private String message = "";
       private int successful;
       private int unsuccessful;
  +    private List links = new ArrayList();
   
       public static final String STATUS_UNKNOWN = null;
       public static final String STATUS_JTIDY_FAILURE = "Unable to tidy source";
       public static final String STATUS_OK = "OK";
   
  -    public FileToCheck(File base, File fileToCheck)
  +    public FileToCheck(File baseFile, File fileToCheck)
       {
  -        this.base = base;
  +        this.base = baseFile.getAbsolutePath();
           this.fileToCheck = fileToCheck;
  +
       }
   
  -    private List links = new ArrayList();
  +    private void addResult(LinkCheckResult lcr)
  +    {
  +        this.links.add(lcr);
  +    }
   
       public void check(LinkValidatorManager lvm) throws Exception
       {
  @@ -116,34 +123,10 @@
   
           try
           {
  -            Tidy tidy = new Tidy();
  -            Document doc = null;
  -
  +            final Set hrefs;
               try
               {
  -                FileInputStream in = new FileInputStream(fileToCheck);
  -                tidy.setMakeClean(true);
  -                tidy.setXmlTags(true);
  -                tidy.setXmlOut(true);
  -                tidy.setQuiet(true);
  -                tidy.setShowWarnings(false);
  -                ByteArrayOutputStream baos = new ByteArrayOutputStream();
  -                PrintWriter errOut = new PrintWriter(baos);
  -                tidy.setErrout(errOut);
  -                LOG.debug("Processing:" + fileToCheck);
  -                tidy.setXHTML(true);
  -                org.w3c.dom.Document domDocument = tidy.parseDOM(in, null);
  -
  -                // now read a dom4j document from
  -                // JTidy's W3C DOM object
  -
  -                DOMReader domReader = new DOMReader();
  -                doc = domReader.read(domDocument);
  -
  -                if (LOG.isDebugEnabled())
  -                {
  -                    LOG.debug(baos.toString());
  -                }
  +                hrefs = getLinks();
               }
               catch (Throwable e)
               {
  @@ -154,30 +137,11 @@
                   LinkCheckResult lcr = new LinkCheckResult();
                   lcr.setStatus("PARSE FAILURE");
                   lcr.setTarget("N/A");
  -                this.links.add(lcr);
  +                addResult(lcr);
                   return;
               }
   
  -            List xpathResults = new ArrayList();
  -
  -            xpathResults.addAll(doc.selectNodes("//a/@href"));
  -            xpathResults.addAll(doc.selectNodes("//img/@src"));
  -            //<link rel="stylesheet" href="...">
  -            xpathResults.addAll(doc.selectNodes("//link/@href"));
  -            //<script src="http://ar.atwola.com/file/adsWrapper.js">
  -            xpathResults.addAll(doc.selectNodes("//script/@src"));
  -
  -            Map uniqueLinks = new HashMap();
  -            Iterator linkIter = xpathResults.iterator();
  -            while (linkIter.hasNext())
  -            {
  -                Node node = (Node) linkIter.next();
  -                String href = node.getText();
  -                uniqueLinks.put(href, href);
  -            }
  -
  -            Iterator iter = uniqueLinks.keySet().iterator();
  -            while (iter.hasNext())
  +            for (Iterator iter = hrefs.iterator(); iter.hasNext(); )
               {
                   String href = (String) iter.next();
   
  @@ -194,21 +158,20 @@
                       case LinkValidationResult.VALID :
                           successful++;
                           lcr.setStatus("OK");
  -                        this.links.add(lcr); //At some point we won't want to store valid links.  The tests require that we do at present
  +                        addResult(lcr); //At some point we won't want to store valid links.  The tests require that we do at present
                           break;
                       case LinkValidationResult.UNKNOWN :
                           unsuccessful++;
                           lcr.setStatus("UNKNOWN REF");
  -                        this.links.add(lcr);
  +                        addResult(lcr);
                           break;
                       case LinkValidationResult.INVALID :
                           unsuccessful++;
                           lcr.setStatus("NOT FOUND");
  -                        this.links.add(lcr);
  +                        addResult(lcr);
                           break;
                   }
   
  -                
               }
           }
           catch (Exception e)
  @@ -218,6 +181,95 @@
           }
       }
   
  +    private Set getLinks() throws FileNotFoundException
  +    {
  +        ByteArrayOutputStream baos = new ByteArrayOutputStream();
  +        PrintWriter errOut = new PrintWriter(baos);
  +        FileInputStream in = new FileInputStream(fileToCheck);
  +        try
  +        {
  +            Tidy tidy = getTidy();
  +            tidy.setErrout(errOut);
  +            LOG.debug("Processing:" + fileToCheck);
  +            org.w3c.dom.Document domDocument = tidy.parseDOM(in, null);
  +
  +            // now read a dom4j document from
  +            // JTidy's W3C DOM object
  +            final DOMReader domReader = new DOMReader();
  +            final Document doc = domReader.read(domDocument);
  +
  +            LOG.debug(baos.toString());
  +
  +            return findUniqueLinks(doc);
  +        }
  +        finally
  +        {
  +            close(in);
  +            close(baos);
  +        }
  +    }
  +
  +    private void close(InputStream is)
  +    {
  +        try
  +        {
  +            is.close();
  +        }
  +        catch (Exception e)
  +        {
  +            //Don't really care.
  +        }
  +    }
  +
  +    private void close(OutputStream os)
  +    {
  +        try
  +        {
  +            os.close();
  +        }
  +        catch (Exception e)
  +        {
  +            //Don't really care.
  +        }
  +    }
  +
  +    private Set findUniqueLinks(Document doc)
  +    {
  +        List xpathResults = new ArrayList();
  +
  +        xpathResults.addAll(doc.selectNodes("//a/@href"));
  +        xpathResults.addAll(doc.selectNodes("//img/@src"));
  +
  +        //<link rel="stylesheet" href="...">
  +        xpathResults.addAll(doc.selectNodes("//link/@href"));
  +
  +        //<script src="http://ar.atwola.com/file/adsWrapper.js">
  +        xpathResults.addAll(doc.selectNodes("//script/@src"));
  +
  +        Set results = new TreeSet();
  +        Iterator linkIter = xpathResults.iterator();
  +        while (linkIter.hasNext())
  +        {
  +            Node node = (Node) linkIter.next();
  +            String href = node.getText();
  +            results.add(href);
  +        }
  +
  +        return results;
  +    }
  +
  +    private Tidy getTidy()
  +    {
  +        Tidy tidy = new Tidy();
  +        tidy.setMakeClean(true);
  +        tidy.setXmlTags(true);
  +        tidy.setXmlOut(true);
  +        tidy.setXHTML(true);
  +        tidy.setQuiet(true);
  +        tidy.setShowWarnings(false);
  +        return tidy;
  +    }
  +
       /**
        * Returns the message.
        * @return String
  @@ -245,15 +297,6 @@
           this.message = message;
       }
   
  -    /**
  -     * Sets the status.
  -     * @param status The status to set
  -     */
  -    public void setStatus(String status)
  -    {
  -        this.status = status;
  -    }
  -
       public List getResults()
       {
           return links;
  @@ -279,10 +322,10 @@
   
       public String getName()
       {
  -        String baseName = base.getAbsolutePath();
           String fileName = fileToCheck.getAbsolutePath();
  -        if (fileName.startsWith(baseName))
  -            fileName = fileName.substring(baseName.length() + 1);
  +        if (fileName.startsWith(base)) {
  +            fileName = fileName.substring(base.length() + 1);
  +        }
   
           fileName = fileName.replace('\\', '/');
           return fileName;
  @@ -309,4 +352,4 @@
           return buf.toString();
       }
   
  -}
  +}
  \ No newline at end of file
  
  
  
  1.11      +8 -1      maven-plugins/linkcheck/src/main/org/apache/maven/linkcheck/LinkCheck.java
  
  Index: LinkCheck.java
  ===================================================================
  RCS file: /home/cvs/maven-plugins/linkcheck/src/main/org/apache/maven/linkcheck/LinkCheck.java,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- LinkCheck.java	13 Sep 2003 21:48:08 -0000	1.10
  +++ LinkCheck.java	6 Dec 2003 23:17:44 -0000	1.11
  @@ -205,6 +205,13 @@
                   }
                   else
                   {
  +                    if (allFiles.size() % 1000 == 0) { 
  +                        LOG.info("Found " + allFiles.size() + " files so far.");
  +                        final long MEG = 1024 * 1024;
  +                        Runtime r = Runtime.getRuntime();
  +                        LOG.info( "  Memory: " + ((r.totalMemory() - r.freeMemory()) / MEG) + "M/" + (r.totalMemory() / MEG) + "M");
  +                    }
  +                    //LOG.info(" File - " + file);
                       allFiles.add(new FileToCheck(baseDir, file));
                   }
               }
  
  
  
  1.1                  maven-plugins/linkcheck/src/main/org/apache/maven/linkcheck/LinkCheckCli.java
  
  Index: LinkCheckCli.java
  ===================================================================
  package org.apache.maven.linkcheck;
  
  import java.io.File;
  
  import org.apache.maven.jelly.MavenJellyContext;
  import org.apache.maven.project.Project;
  
  /**
   * @author <a href="bwalding@apache.org">Ben Walding</a>
   * @version $Id: LinkCheckCli.java,v 1.1 2003/12/06 23:17:44 bwalding Exp $
   */
  public class LinkCheckCli
  {
      public static void main(String args[]) throws Exception
      {
          LinkCheckCli lcc = new LinkCheckCli();
          lcc.doMain(args);
      }
  
      private void doMain(String args[]) throws Exception
      {
          Project p = new Project();
          MavenJellyContext ctx = new MavenJellyContext();
          ctx.setProxyHost(null);
          p.setContext(ctx);
  
          LinkCheck lc = new LinkCheck();
          lc.setBasedir(new File("d:/data"));
          lc.setOutput(new File("target/linkcheck.xml"));
          lc.setCache("target/linkcheck.cache");
          lc.setOutputEncoding("ISO-8859");
          lc.setExclude("");
          lc.setProject(p);
          lc.doExecute();
      }
  }
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@maven.apache.org
For additional commands, e-mail: dev-help@maven.apache.org