You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@maven.apache.org by ah...@apache.org on 2006/01/15 01:36:26 UTC
svn commit: r369134 - in /maven/maven-1/plugins/trunk/linkcheck: project.xml
src/main/org/apache/maven/plugin/linkcheck/FileToCheck.java
src/main/org/apache/maven/plugin/linkcheck/LinkMatcher.java xdocs/changes.xml
Author: aheritier
Date: Sat Jan 14 16:36:18 2006
New Revision: 369134
URL: http://svn.apache.org/viewcvs?rev=369134&view=rev
Log:
PR: MPLINKCHECK-20, MPLINKCHECK-23
Submitted by: Ignacio G. Mac Dowell
Reviewed by: aheritier
Improve performance getting rid of jtidy dependency via regexps.
StackOverflowError processing apidocs/index-all.html.
Added:
maven/maven-1/plugins/trunk/linkcheck/src/main/org/apache/maven/plugin/linkcheck/LinkMatcher.java
Modified:
maven/maven-1/plugins/trunk/linkcheck/project.xml
maven/maven-1/plugins/trunk/linkcheck/src/main/org/apache/maven/plugin/linkcheck/FileToCheck.java
maven/maven-1/plugins/trunk/linkcheck/xdocs/changes.xml
Modified: maven/maven-1/plugins/trunk/linkcheck/project.xml
URL: http://svn.apache.org/viewcvs/maven/maven-1/plugins/trunk/linkcheck/project.xml?rev=369134&r1=369133&r2=369134&view=diff
==============================================================================
--- maven/maven-1/plugins/trunk/linkcheck/project.xml (original)
+++ maven/maven-1/plugins/trunk/linkcheck/project.xml Sat Jan 14 16:36:18 2006
@@ -201,28 +201,9 @@
</properties>
</dependency>
<dependency>
- <groupId>dom4j</groupId>
- <artifactId>dom4j</artifactId>
- <version>1.4</version>
- <properties>
- <comment>This library is already loaded by maven's core. Be careful to use the same version number as in the core.</comment>
- </properties>
- </dependency>
- <dependency>
- <groupId>jtidy</groupId>
- <artifactId>jtidy</artifactId>
- <version>4aug2000r7-dev</version>
- </dependency>
- <dependency>
<groupId>maven</groupId>
<artifactId>maven</artifactId>
<version>1.0.2</version>
- </dependency>
- <dependency>
- <jar>js-1.5R4-RC3.jar</jar>
- <groupId>rhino</groupId>
- <artifactId>rhino</artifactId>
- <version>1.5R4-RC3</version>
</dependency>
</dependencies>
</project>
Modified: maven/maven-1/plugins/trunk/linkcheck/src/main/org/apache/maven/plugin/linkcheck/FileToCheck.java
URL: http://svn.apache.org/viewcvs/maven/maven-1/plugins/trunk/linkcheck/src/main/org/apache/maven/plugin/linkcheck/FileToCheck.java?rev=369134&r1=369133&r2=369134&view=diff
==============================================================================
--- maven/maven-1/plugins/trunk/linkcheck/src/main/org/apache/maven/plugin/linkcheck/FileToCheck.java (original)
+++ maven/maven-1/plugins/trunk/linkcheck/src/main/org/apache/maven/plugin/linkcheck/FileToCheck.java Sat Jan 14 16:36:18 2006
@@ -17,29 +17,19 @@
* ====================================================================
*/
-import java.io.BufferedInputStream;
-import java.io.ByteArrayOutputStream;
import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.OutputStream;
-import java.io.PrintWriter;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
-import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.maven.plugin.linkcheck.validation.LinkValidationItem;
import org.apache.maven.plugin.linkcheck.validation.LinkValidationResult;
import org.apache.maven.plugin.linkcheck.validation.LinkValidatorManager;
-import org.dom4j.Document;
-import org.dom4j.Node;
-import org.dom4j.io.DOMReader;
-import org.w3c.tidy.Tidy;
/**
* @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
@@ -73,30 +63,6 @@
private int unsuccessful;
- private Set getLinks()
- throws FileNotFoundException
- {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- PrintWriter errOut = new PrintWriter( baos );
- BufferedInputStream bin = new BufferedInputStream( new FileInputStream( fileToCheck ) );
- try
- {
- Tidy tidy = getTidy();
- tidy.setErrout( errOut );
- org.w3c.dom.Document domDocument = tidy.parseDOM( bin, null );
- // now read a dom4j document from
- // JTidy's W3C DOM object
- final DOMReader domReader = new DOMReader();
- final Document doc = domReader.read( domDocument );
- return findUniqueLinks( doc );
- }
- finally
- {
- close( bin );
- close( baos );
- }
- }
-
/**
* Returns the message.
* @return String
@@ -172,7 +138,7 @@
final Set hrefs;
try
{
- hrefs = getLinks();
+ hrefs = LinkMatcher.match( fileToCheck );
}
catch ( Throwable t )
{
@@ -259,69 +225,4 @@
{
this.links.add( lcr );
}
-
- private void close( InputStream is )
- {
- try
- {
- is.close();
- }
- catch ( Exception e )
- {
- //Don't really care.
- }
- }
-
- private void close( OutputStream os )
- {
- try
- {
- os.close();
- }
- catch ( Exception e )
- {
- //Don't really care.
- }
- }
-
- private Set findUniqueLinks( Document doc )
- {
- List xpathResults = new LinkedList();
-
- xpathResults.addAll( doc.selectNodes( "//a/@href" ) );
- xpathResults.addAll( doc.selectNodes( "//img/@src" ) );
-
- xpathResults.addAll( doc.selectNodes( "//link/@href" ) );
-
- xpathResults.addAll( doc.selectNodes( "//script/@src" ) );
-
- Set results = new TreeSet();
- Iterator linkIter = xpathResults.iterator();
- Node node = null;
- String href = null;
- while ( linkIter.hasNext() )
- {
- node = (Node) linkIter.next();
- href = node.getText();
- results.add( href );
- }
- xpathResults = null;
- linkIter = null;
- node = null;
- href = null;
- return results;
- }
-
- private Tidy getTidy()
- {
- Tidy tidy = new Tidy();
- tidy.setMakeClean( true );
- tidy.setXmlTags( true );
- tidy.setXmlOut( true );
- tidy.setXHTML( true );
- tidy.setQuiet( true );
- tidy.setShowWarnings( false );
- return tidy;
- }
-
-}
\ No newline at end of file
+}
Added: maven/maven-1/plugins/trunk/linkcheck/src/main/org/apache/maven/plugin/linkcheck/LinkMatcher.java
URL: http://svn.apache.org/viewcvs/maven/maven-1/plugins/trunk/linkcheck/src/main/org/apache/maven/plugin/linkcheck/LinkMatcher.java?rev=369134&view=auto
==============================================================================
--- maven/maven-1/plugins/trunk/linkcheck/src/main/org/apache/maven/plugin/linkcheck/LinkMatcher.java (added)
+++ maven/maven-1/plugins/trunk/linkcheck/src/main/org/apache/maven/plugin/linkcheck/LinkMatcher.java Sat Jan 14 16:36:18 2006
@@ -0,0 +1,122 @@
+package org.apache.maven.plugin.linkcheck;
+
+/* ====================================================================
+ * Copyright 2001-2006 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ====================================================================
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Link matcher. Reads the contents of a file and tries to match the following:
+ * <code>
+ * <a href=""....
+ * <link href=""....
+ * <img src=""....
+ * <script src=""....
+ * </code>
+ *
+ * @author <a href="mailto:mac@apache.org">Ignacio G. Mac Dowell </a>
+ */
+class LinkMatcher
+{
+
+ /**
+ * Regexp for link matching.
+ */
+ private final static Pattern p = Pattern
+ .compile( "<(?>link|a|img|script)[^>]*?(?>href|src)\\s*?=\\s*?[\\\"'](.*?)[\\\"'][^>]*?",
+ Pattern.CASE_INSENSITIVE );
+
+ /**
+ * No need to create a new object each time a file is processed. Just clear
+ * it.
+ */
+ private final static Set linkList = new TreeSet();
+
+ /**
+ * Reads a file and returns a StringBuffer with its contents.
+ *
+ * TODO: Check for encoding issues
+ *
+ * TODO: Better exception handling?
+ *
+ * @param file
+ * the file we are reading
+ * @return a StringBuffer with file's contents.
+ * @throws IOException
+ */
+ private static StringBuffer fileToStringBuffer( File file )
+ throws IOException
+ {
+ BufferedReader reader = null;
+ final StringBuffer pageBuffer = new StringBuffer();
+ try
+ {
+ reader = new BufferedReader( new FileReader( file ) );
+ String line;
+ while ( ( line = reader.readLine() ) != null )
+ {
+ pageBuffer.append( line );
+ }
+ }
+ finally
+ {
+ reader.close();
+ }
+ return pageBuffer;
+ }
+
+ /**
+ * Performs the actual matching.
+ *
+ * @param file
+ * the file to check
+ * @return a set with all links to check
+ * @throws IOException
+ */
+ static Set match( File file )
+ throws IOException
+ {
+ linkList.clear();
+ final Matcher m = p.matcher( fileToStringBuffer( file ) );
+ String link;
+ while ( m.find() )
+ {
+ link = m.group( 1 ).trim();
+ if ( link.length() < 1 )
+ {
+ continue;
+ }
+ else if ( link.toLowerCase().indexOf( "javascript" ) != -1 )
+ {
+ continue;
+ }
+ // else if (link.toLowerCase().indexOf("mailto:") != -1) {
+ // continue;
+ // }
+ linkList.add( link );
+ }
+ return linkList;
+ }
+
+}
Modified: maven/maven-1/plugins/trunk/linkcheck/xdocs/changes.xml
URL: http://svn.apache.org/viewcvs/maven/maven-1/plugins/trunk/linkcheck/xdocs/changes.xml?rev=369134&r1=369133&r2=369134&view=diff
==============================================================================
--- maven/maven-1/plugins/trunk/linkcheck/xdocs/changes.xml (original)
+++ maven/maven-1/plugins/trunk/linkcheck/xdocs/changes.xml Sat Jan 14 16:36:18 2006
@@ -26,6 +26,8 @@
</properties>
<body>
<release version="1.4-SNAPSHOT" date="in SVN">
+ <action dev="aheritier" type="update" issue="MPLINKCHECK-23" due-to="Ignacio G. Mac Dowell">Improve performance getting rid of jtidy dependency via regexps.</action>
+ <action dev="aheritier" type="fix" issue="MPLINKCHECK-20" due-to="Ignacio G. Mac Dowell">StackOverflowError processing apidocs/index-all.html.</action>
<action dev="aheritier" type="add">If maven is in offline mode the report doesn't test external urls. A warning is displayed in the report.</action>
<action dev="aheritier" type="update" issue="MPLINKCHECK-10">"Moved Permanently" sites are reported as a warning and not as an error.</action>
<action dev="aheritier" type="update" issue="MPLINKCHECK-24">Speed and stability enhancement [better usage of httpClient].</action>