You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@maven.apache.org by bw...@apache.org on 2003/12/07 00:17:44 UTC
cvs commit: maven-plugins/linkcheck/src/main/org/apache/maven/linkcheck LinkCheckCli.java FileToCheck.java LinkCheck.java
bwalding 2003/12/06 15:17:44
Modified: linkcheck/src/main/org/apache/maven/linkcheck
FileToCheck.java LinkCheck.java
Added: linkcheck/src/main/org/apache/maven/linkcheck
LinkCheckCli.java
Log:
Split things out into marginally better methods. Resolved at least one
resource leak.
PR: MPLINKCHECK-6
Revision Changes Path
1.14 +115 -72 maven-plugins/linkcheck/src/main/org/apache/maven/linkcheck/FileToCheck.java
Index: FileToCheck.java
===================================================================
RCS file: /home/cvs/maven-plugins/linkcheck/src/main/org/apache/maven/linkcheck/FileToCheck.java,v
retrieving revision 1.13
retrieving revision 1.14
diff -u -r1.13 -r1.14
--- FileToCheck.java 26 Oct 2003 22:49:40 -0000 1.13
+++ FileToCheck.java 6 Dec 2003 23:17:44 -0000 1.14
@@ -59,12 +59,15 @@
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.InputStream;
+import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
-import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
-import java.util.Map;
+import java.util.Set;
+import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -79,7 +82,6 @@
/**
* @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
* @version $Id$
- *
*/
public class FileToCheck
{
@@ -88,24 +90,29 @@
*/
private static Log LOG = LogFactory.getLog(FileToCheck.class);
- private File base;
+ private String base;
private File fileToCheck;
private String status = STATUS_OK;
private String message = "";
private int successful;
private int unsuccessful;
+ private List links = new ArrayList();
public static final String STATUS_UNKNOWN = null;
public static final String STATUS_JTIDY_FAILURE = "Unable to tidy source";
public static final String STATUS_OK = "OK";
- public FileToCheck(File base, File fileToCheck)
+ public FileToCheck(File baseFile, File fileToCheck)
{
- this.base = base;
+ this.base = baseFile.getAbsolutePath();
this.fileToCheck = fileToCheck;
+
}
- private List links = new ArrayList();
+ private void addResult(LinkCheckResult lcr)
+ {
+ this.links.add(lcr);
+ }
public void check(LinkValidatorManager lvm) throws Exception
{
@@ -116,34 +123,10 @@
try
{
- Tidy tidy = new Tidy();
- Document doc = null;
-
+ final Set hrefs;
try
{
- FileInputStream in = new FileInputStream(fileToCheck);
- tidy.setMakeClean(true);
- tidy.setXmlTags(true);
- tidy.setXmlOut(true);
- tidy.setQuiet(true);
- tidy.setShowWarnings(false);
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- PrintWriter errOut = new PrintWriter(baos);
- tidy.setErrout(errOut);
- LOG.debug("Processing:" + fileToCheck);
- tidy.setXHTML(true);
- org.w3c.dom.Document domDocument = tidy.parseDOM(in, null);
-
- // now read a dom4j document from
- // JTidy's W3C DOM object
-
- DOMReader domReader = new DOMReader();
- doc = domReader.read(domDocument);
-
- if (LOG.isDebugEnabled())
- {
- LOG.debug(baos.toString());
- }
+ hrefs = getLinks();
}
catch (Throwable e)
{
@@ -154,30 +137,11 @@
LinkCheckResult lcr = new LinkCheckResult();
lcr.setStatus("PARSE FAILURE");
lcr.setTarget("N/A");
- this.links.add(lcr);
+ addResult(lcr);
return;
}
- List xpathResults = new ArrayList();
-
- xpathResults.addAll(doc.selectNodes("//a/@href"));
- xpathResults.addAll(doc.selectNodes("//img/@src"));
- //<link rel="stylesheet" href="...">
- xpathResults.addAll(doc.selectNodes("//link/@href"));
- //<script src="http://ar.atwola.com/file/adsWrapper.js">
- xpathResults.addAll(doc.selectNodes("//script/@src"));
-
- Map uniqueLinks = new HashMap();
- Iterator linkIter = xpathResults.iterator();
- while (linkIter.hasNext())
- {
- Node node = (Node) linkIter.next();
- String href = node.getText();
- uniqueLinks.put(href, href);
- }
-
- Iterator iter = uniqueLinks.keySet().iterator();
- while (iter.hasNext())
+ for (Iterator iter = hrefs.iterator(); iter.hasNext(); )
{
String href = (String) iter.next();
@@ -194,21 +158,20 @@
case LinkValidationResult.VALID :
successful++;
lcr.setStatus("OK");
- this.links.add(lcr); //At some point we won't want to store valid links. The tests require that we do at present
+ addResult(lcr); //At some point we won't want to store valid links. The tests require that we do at present
break;
case LinkValidationResult.UNKNOWN :
unsuccessful++;
lcr.setStatus("UNKNOWN REF");
- this.links.add(lcr);
+ addResult(lcr);
break;
case LinkValidationResult.INVALID :
unsuccessful++;
lcr.setStatus("NOT FOUND");
- this.links.add(lcr);
+ addResult(lcr);
break;
}
-
}
}
catch (Exception e)
@@ -218,6 +181,95 @@
}
}
+ private Set getLinks() throws FileNotFoundException
+ {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ PrintWriter errOut = new PrintWriter(baos);
+ FileInputStream in = new FileInputStream(fileToCheck);
+ try
+ {
+ Tidy tidy = getTidy();
+ tidy.setErrout(errOut);
+ LOG.debug("Processing:" + fileToCheck);
+ org.w3c.dom.Document domDocument = tidy.parseDOM(in, null);
+
+ // now read a dom4j document from
+ // JTidy's W3C DOM object
+ final DOMReader domReader = new DOMReader();
+ final Document doc = domReader.read(domDocument);
+
+ LOG.debug(baos.toString());
+
+ return findUniqueLinks(doc);
+ }
+ finally
+ {
+ close(in);
+ close(baos);
+ }
+ }
+
+ private void close(InputStream is)
+ {
+ try
+ {
+ is.close();
+ }
+ catch (Exception e)
+ {
+ //Don't really care.
+ }
+ }
+
+ private void close(OutputStream os)
+ {
+ try
+ {
+ os.close();
+ }
+ catch (Exception e)
+ {
+ //Don't really care.
+ }
+ }
+
+ private Set findUniqueLinks(Document doc)
+ {
+ List xpathResults = new ArrayList();
+
+ xpathResults.addAll(doc.selectNodes("//a/@href"));
+ xpathResults.addAll(doc.selectNodes("//img/@src"));
+
+ //<link rel="stylesheet" href="...">
+ xpathResults.addAll(doc.selectNodes("//link/@href"));
+
+ //<script src="http://ar.atwola.com/file/adsWrapper.js">
+ xpathResults.addAll(doc.selectNodes("//script/@src"));
+
+ Set results = new TreeSet();
+ Iterator linkIter = xpathResults.iterator();
+ while (linkIter.hasNext())
+ {
+ Node node = (Node) linkIter.next();
+ String href = node.getText();
+ results.add(href);
+ }
+
+ return results;
+ }
+
+ private Tidy getTidy()
+ {
+ Tidy tidy = new Tidy();
+ tidy.setMakeClean(true);
+ tidy.setXmlTags(true);
+ tidy.setXmlOut(true);
+ tidy.setXHTML(true);
+ tidy.setQuiet(true);
+ tidy.setShowWarnings(false);
+ return tidy;
+ }
+
/**
* Returns the message.
* @return String
@@ -245,15 +297,6 @@
this.message = message;
}
- /**
- * Sets the status.
- * @param status The status to set
- */
- public void setStatus(String status)
- {
- this.status = status;
- }
-
public List getResults()
{
return links;
@@ -279,10 +322,10 @@
public String getName()
{
- String baseName = base.getAbsolutePath();
String fileName = fileToCheck.getAbsolutePath();
- if (fileName.startsWith(baseName))
- fileName = fileName.substring(baseName.length() + 1);
+ if (fileName.startsWith(base)) {
+ fileName = fileName.substring(base.length() + 1);
+ }
fileName = fileName.replace('\\', '/');
return fileName;
@@ -309,4 +352,4 @@
return buf.toString();
}
-}
+}
\ No newline at end of file
1.11 +8 -1 maven-plugins/linkcheck/src/main/org/apache/maven/linkcheck/LinkCheck.java
Index: LinkCheck.java
===================================================================
RCS file: /home/cvs/maven-plugins/linkcheck/src/main/org/apache/maven/linkcheck/LinkCheck.java,v
retrieving revision 1.10
retrieving revision 1.11
diff -u -r1.10 -r1.11
--- LinkCheck.java 13 Sep 2003 21:48:08 -0000 1.10
+++ LinkCheck.java 6 Dec 2003 23:17:44 -0000 1.11
@@ -205,6 +205,13 @@
}
else
{
+ if (allFiles.size() % 1000 == 0) {
+ LOG.info("Found " + allFiles.size() + " files so far.");
+ final long MEG = 1024 * 1024;
+ Runtime r = Runtime.getRuntime();
+ LOG.info( " Memory: " + ((r.totalMemory() - r.freeMemory()) / MEG) + "M/" + (r.totalMemory() / MEG) + "M");
+ }
+ //LOG.info(" File - " + file);
allFiles.add(new FileToCheck(baseDir, file));
}
}
1.1 maven-plugins/linkcheck/src/main/org/apache/maven/linkcheck/LinkCheckCli.java
Index: LinkCheckCli.java
===================================================================
package org.apache.maven.linkcheck;
import java.io.File;
import org.apache.maven.jelly.MavenJellyContext;
import org.apache.maven.project.Project;
/**
* @author <a href="bwalding@apache.org">Ben Walding</a>
* @version $Id: LinkCheckCli.java,v 1.1 2003/12/06 23:17:44 bwalding Exp $
*/
public class LinkCheckCli
{
public static void main(String args[]) throws Exception
{
LinkCheckCli lcc = new LinkCheckCli();
lcc.doMain(args);
}
private void doMain(String args[]) throws Exception
{
Project p = new Project();
MavenJellyContext ctx = new MavenJellyContext();
ctx.setProxyHost(null);
p.setContext(ctx);
LinkCheck lc = new LinkCheck();
lc.setBasedir(new File("d:/data"));
lc.setOutput(new File("target/linkcheck.xml"));
lc.setCache("target/linkcheck.cache");
lc.setOutputEncoding("ISO-8859");
lc.setExclude("");
lc.setProject(p);
lc.doExecute();
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@maven.apache.org
For additional commands, e-mail: dev-help@maven.apache.org