You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@maven.apache.org by ca...@apache.org on 2005/10/28 01:04:10 UTC
svn commit: r328975 - in /maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4:
project.xml src/main/org/apache/maven/linkcheck/FileToCheck.java
src/main/org/apache/maven/linkcheck/LinkMatcher.java
src/test/org/apache/maven/linkcheck/LinkCheckTest.java
Author: carlos
Date: Thu Oct 27 16:04:06 2005
New Revision: 328975
URL: http://svn.apache.org/viewcvs?rev=328975&view=rev
Log:
Use regexps instead of xml parsing MPLINKCHECK-23
Added:
maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/main/org/apache/maven/linkcheck/LinkMatcher.java (with props)
Modified:
maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/project.xml
maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/main/org/apache/maven/linkcheck/FileToCheck.java
maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/test/org/apache/maven/linkcheck/LinkCheckTest.java
Modified: maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/project.xml
URL: http://svn.apache.org/viewcvs/maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/project.xml?rev=328975&r1=328974&r2=328975&view=diff
==============================================================================
--- maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/project.xml (original)
+++ maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/project.xml Thu Oct 27 16:04:06 2005
@@ -180,28 +180,9 @@
</properties>
</dependency>
<dependency>
- <groupId>dom4j</groupId>
- <artifactId>dom4j</artifactId>
- <version>1.4</version>
- <properties>
- <comment>This library is already loaded by maven's core. Be careful to use the same version number as in the core.</comment>
- </properties>
- </dependency>
- <dependency>
- <groupId>jtidy</groupId>
- <artifactId>jtidy</artifactId>
- <version>4aug2000r7-dev</version>
- </dependency>
- <dependency>
<groupId>maven</groupId>
<artifactId>maven</artifactId>
<version>1.0.2</version>
- </dependency>
- <dependency>
- <jar>js-1.5R4-RC3.jar</jar>
- <groupId>rhino</groupId>
- <artifactId>rhino</artifactId>
- <version>1.5R4-RC3</version>
</dependency>
</dependencies>
</project>
Modified: maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/main/org/apache/maven/linkcheck/FileToCheck.java
URL: http://svn.apache.org/viewcvs/maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/main/org/apache/maven/linkcheck/FileToCheck.java?rev=328975&r1=328974&r2=328975&view=diff
==============================================================================
--- maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/main/org/apache/maven/linkcheck/FileToCheck.java (original)
+++ maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/main/org/apache/maven/linkcheck/FileToCheck.java Thu Oct 27 16:04:06 2005
@@ -17,29 +17,19 @@
* ====================================================================
*/
-import java.io.BufferedInputStream;
-import java.io.ByteArrayOutputStream;
import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
import java.io.InputStream;
import java.io.OutputStream;
-import java.io.PrintWriter;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
-import java.util.TreeSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.maven.linkcheck.validation.LinkValidationItem;
import org.apache.maven.linkcheck.validation.LinkValidationResult;
import org.apache.maven.linkcheck.validation.LinkValidatorManager;
-import org.dom4j.Document;
-import org.dom4j.Node;
-import org.dom4j.io.DOMReader;
-import org.w3c.tidy.Tidy;
/**
* @author <a href="mailto:bwalding@apache.org">Ben Walding</a>
@@ -88,7 +78,7 @@
final Set hrefs;
try
{
- hrefs = getLinks();
+ hrefs = LinkMatcher.match(fileToCheck);
}
catch (Throwable e)
{
@@ -143,34 +133,6 @@
}
}
- private Set getLinks() throws FileNotFoundException
- {
- ByteArrayOutputStream baos = new ByteArrayOutputStream();
- PrintWriter errOut = new PrintWriter(baos);
- BufferedInputStream bin = new BufferedInputStream(new FileInputStream(fileToCheck));
- try
- {
- Tidy tidy = getTidy();
- tidy.setErrout(errOut);
- LOG.debug("Processing:" + fileToCheck);
- org.w3c.dom.Document domDocument = tidy.parseDOM(bin, null);
-
- // now read a dom4j document from
- // JTidy's W3C DOM object
- final DOMReader domReader = new DOMReader();
- final Document doc = domReader.read(domDocument);
-
- LOG.debug(baos.toString());
-
- return findUniqueLinks(doc);
- }
- finally
- {
- close(bin);
- close(baos);
- }
- }
-
private void close(InputStream is)
{
try
@@ -179,7 +141,7 @@
}
catch (Exception e)
{
- //Don't really care.
+ // Don't really care.
}
}
@@ -193,43 +155,6 @@
{
//Don't really care.
}
- }
-
- private Set findUniqueLinks(Document doc)
- {
- List xpathResults = new LinkedList();
-
- xpathResults.addAll(doc.selectNodes("//a/@href"));
- xpathResults.addAll(doc.selectNodes("//img/@src"));
-
- //<link rel="stylesheet" href="...">
- xpathResults.addAll(doc.selectNodes("//link/@href"));
-
- //<script src="http://ar.atwola.com/file/adsWrapper.js">
- xpathResults.addAll(doc.selectNodes("//script/@src"));
-
- Set results = new TreeSet();
- Iterator linkIter = xpathResults.iterator();
- while (linkIter.hasNext())
- {
- Node node = (Node) linkIter.next();
- String href = node.getText();
- results.add(href);
- }
-
- return results;
- }
-
- private Tidy getTidy()
- {
- Tidy tidy = new Tidy();
- tidy.setMakeClean(true);
- tidy.setXmlTags(true);
- tidy.setXmlOut(true);
- tidy.setXHTML(true);
- tidy.setQuiet(true);
- tidy.setShowWarnings(false);
- return tidy;
}
/**
Added: maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/main/org/apache/maven/linkcheck/LinkMatcher.java
URL: http://svn.apache.org/viewcvs/maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/main/org/apache/maven/linkcheck/LinkMatcher.java?rev=328975&view=auto
==============================================================================
--- maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/main/org/apache/maven/linkcheck/LinkMatcher.java (added)
+++ maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/main/org/apache/maven/linkcheck/LinkMatcher.java Thu Oct 27 16:04:06 2005
@@ -0,0 +1,111 @@
+package org.apache.maven.linkcheck;
+
+/* ====================================================================
+ * Copyright 2001-2004 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ====================================================================
+ */
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.io.IOException;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Link matcher. Reads the contents of a file and tries to match the following:
+ * <code>
+ * <a href=""....
+ * <link href=""....
+ * <img src=""....
+ * <script src=""....
+ * </code>
+ *
+ * @author <a href="mailto:mac@apache.org">Ignacio G. Mac Dowell </a>
+ */
+class LinkMatcher {
+
+ /**
+ * Regexp for link matching.
+ */
+ private final static Pattern p = Pattern
+ .compile(
+ "<(?>link|a|img|script)[^>]*?(?>href|src)\\s*?=\\s*?[\\\"'](.*?)[\\\"'][^>]*?",
+ Pattern.CASE_INSENSITIVE);
+
+ /**
+ * No need to create a new object each time a file is processed. Just clear
+ * it.
+ */
+ private final static Set linkList = new TreeSet();
+
+ /**
+ * Reads a file and returns a StringBuffer with its contents.
+ *
+ * TODO: Check for encoding issues
+ *
+ * TODO: Better exception handling?
+ *
+ * @param file
+ * the file we are reading
+ * @return a StringBuffer with file's contents.
+ * @throws IOException
+ */
+ private static StringBuffer fileToStringBuffer(File file)
+ throws IOException {
+ BufferedReader reader = null;
+ final StringBuffer pageBuffer = new StringBuffer();
+ try {
+ reader = new BufferedReader(new FileReader(file));
+ String line;
+ while ((line = reader.readLine()) != null) {
+ pageBuffer.append(line);
+ }
+ } finally {
+ reader.close();
+ }
+ return pageBuffer;
+ }
+
+ /**
+ * Performs the actual matching.
+ *
+ * @param file
+ * the file to check
+ * @return a set with all links to check
+ * @throws IOException
+ */
+ static Set match(File file) throws IOException {
+ linkList.clear();
+ final Matcher m = p.matcher(fileToStringBuffer(file));
+ String link;
+ while (m.find()) {
+ link = m.group(1).trim();
+ if (link.length() < 1) {
+ continue;
+ } else if (link.toLowerCase().indexOf("javascript") != -1) {
+ continue;
+ }
+ // else if (link.toLowerCase().indexOf("mailto:") != -1) {
+ // continue;
+ // }
+ linkList.add(link);
+ }
+ return linkList;
+ }
+
+}
Propchange: maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/main/org/apache/maven/linkcheck/LinkMatcher.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/main/org/apache/maven/linkcheck/LinkMatcher.java
------------------------------------------------------------------------------
svn:keywords = "Author Date Id Revision"
Modified: maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/test/org/apache/maven/linkcheck/LinkCheckTest.java
URL: http://svn.apache.org/viewcvs/maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/test/org/apache/maven/linkcheck/LinkCheckTest.java?rev=328975&r1=328974&r2=328975&view=diff
==============================================================================
--- maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/test/org/apache/maven/linkcheck/LinkCheckTest.java (original)
+++ maven/maven-1/plugins/branches/MAVEN_LINCHECK_1_4/src/test/org/apache/maven/linkcheck/LinkCheckTest.java Thu Oct 27 16:04:06 2005
@@ -59,7 +59,7 @@
map.put(ftc.getName(), ftc);
}
- assertEquals("files.size()", 8, lc.getFiles().size());
+ assertEquals("files.size()", 9, lc.getFiles().size());
check(map, "nolink.html", 0);
check(map, "test-resources/nolink.html", 0);
@@ -67,6 +67,7 @@
check(map, "test-resources/test1/test2.html", 0);
check(map, "test1/test1.html", 1);
check(map, "testA.html", 3);
+ check(map, "testSplit.html", 3);
/* test excludes */
String fileName = "testExcludes.html";