You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@maven.apache.org by ol...@apache.org on 2013/03/27 12:50:47 UTC
[1/4] git commit: [WAGON-388] use HtmlFileListParser based on jsoup
Updated Branches:
refs/heads/master 98a21cb3f -> 05a76e1ec
[WAGON-388] use HtmlFileListParser based on jsoup
Project: http://git-wip-us.apache.org/repos/asf/maven-wagon/repo
Commit: http://git-wip-us.apache.org/repos/asf/maven-wagon/commit/ec65719a
Tree: http://git-wip-us.apache.org/repos/asf/maven-wagon/tree/ec65719a
Diff: http://git-wip-us.apache.org/repos/asf/maven-wagon/diff/ec65719a
Branch: refs/heads/master
Commit: ec65719a32d3662ee3fa57f81d778e677d2aa082
Parents: 98a21cb
Author: olivier lamy <ol...@apache.org>
Authored: Wed Mar 27 22:43:37 2013 +1100
Committer: olivier lamy <ol...@apache.org>
Committed: Wed Mar 27 22:43:37 2013 +1100
----------------------------------------------------------------------
wagon-providers/wagon-http-shared/pom.xml | 17 +--
.../wagon/shared/http/HtmlFileListParser.java | 176 +++++++--------
2 files changed, 82 insertions(+), 111 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/ec65719a/wagon-providers/wagon-http-shared/pom.xml
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-http-shared/pom.xml b/wagon-providers/wagon-http-shared/pom.xml
index 4a4b4ed..556a3bc 100644
--- a/wagon-providers/wagon-http-shared/pom.xml
+++ b/wagon-providers/wagon-http-shared/pom.xml
@@ -35,20 +35,9 @@ under the License.
<dependencies>
<dependency>
- <groupId>nekohtml</groupId>
- <artifactId>xercesMinimal</artifactId>
- <version>1.9.6.2</version>
- </dependency>
- <dependency>
- <groupId>nekohtml</groupId>
- <artifactId>nekohtml</artifactId>
- <version>1.9.6.2</version>
- <exclusions>
- <exclusion>
- <groupId>xerces</groupId>
- <artifactId>xercesImpl</artifactId>
- </exclusion>
- </exclusions>
+ <groupId>org.jsoup</groupId>
+ <artifactId>jsoup</artifactId>
+ <version>1.7.1</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/ec65719a/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java b/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
index 749bd5d..7448a6d 100644
--- a/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
+++ b/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
@@ -19,15 +19,13 @@ package org.apache.maven.wagon.shared.http;
* under the License.
*/
+import org.apache.commons.io.IOUtils;
import org.apache.maven.wagon.TransferFailedException;
-import org.apache.xerces.xni.Augmentations;
-import org.apache.xerces.xni.QName;
-import org.apache.xerces.xni.XMLAttributes;
-import org.apache.xerces.xni.parser.XMLInputSource;
-import org.apache.xerces.xni.parser.XMLParserConfiguration;
import org.codehaus.plexus.util.StringUtils;
-import org.cyberneko.html.HTMLConfiguration;
-import org.cyberneko.html.filters.DefaultFilter;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
import java.io.IOException;
import java.io.InputStream;
@@ -46,9 +44,25 @@ import java.util.regex.Pattern;
*/
public class HtmlFileListParser
{
+ // Apache Fancy Index Sort Headers
+ private static final Pattern APACHE_INDEX_SKIP = Pattern.compile( "\\?[CDMNS]=.*" );
+
+ // URLs with excessive paths.
+ private static final Pattern URLS_WITH_PATHS = Pattern.compile( "/[^/]*/" );
+
+ // URLs that to a parent directory.
+ private static final Pattern URLS_TO_PARENT = Pattern.compile( "\\.\\./" );
+
+ // mailto urls
+ private static final Pattern MAILTO_URLS = Pattern.compile( "mailto:.*" );
+
+ private static final Pattern[] SKIPS =
+ new Pattern[]{ APACHE_INDEX_SKIP, URLS_WITH_PATHS, URLS_TO_PARENT, MAILTO_URLS };
+
/**
* Fetches a raw HTML from a provided InputStream, parses it, and returns the file list.
*
+ * @param stream the input stream.
* @return the file list.
* @throws TransferFailedException if there was a problem fetching the raw html.
*/
@@ -57,126 +71,94 @@ public class HtmlFileListParser
{
try
{
- // Use URI object to get benefits of proper absolute and relative path resolution for free
URI baseURI = new URI( baseurl );
+ // to make debugging easier, start with a string. This is assuming UTF-8, which might not be a safe
+ // assumption.
+ String content = IOUtils.toString( stream, "utf-8" );
+ Document doc = Jsoup.parse( content, baseurl );
+ Elements links = doc.select("a[href]");
+ Set<String> results = new HashSet<String>();
+ for ( int lx = 0; lx < links.size(); lx++ )
+ {
+ Element link = links.get( lx );
+ /*
+ * The abs:href loses directories, so we deal with absolute paths ourselves below in cleanLink
+ */
+ String target = link.attr( "href" );
+ if ( target != null )
+ {
+ String clean = cleanLink( baseURI, target );
+ if ( isAcceptableLink( clean ) )
+ {
+ results.add( clean );
+ }
+ }
- Parser handler = new Parser( baseURI );
-
- XMLParserConfiguration parser = new HTMLConfiguration();
- parser.setDocumentHandler( handler );
- parser.setFeature( "http://cyberneko.org/html/features/augmentations", true );
- parser.setProperty( "http://cyberneko.org/html/properties/names/elems", "upper" );
- parser.setProperty( "http://cyberneko.org/html/properties/names/attrs", "upper" );
- parser.parse( new XMLInputSource( null, baseurl, baseURI.toString(), stream, "UTF-8" ) );
-
- return new ArrayList<String>( handler.getLinks() );
+ }
+ return new ArrayList<String>( results );
}
catch ( URISyntaxException e )
{
- throw new TransferFailedException( "Unable to parse as URI: " + baseurl, e );
+ throw new TransferFailedException( "Unable to parse as base URI: " + baseurl, e );
}
catch ( IOException e )
{
- throw new TransferFailedException( "I/O error: " + e.getMessage(), e );
+ throw new TransferFailedException( "I/O error reading HTML listing of artifacts: " + e.getMessage(), e );
}
}
- private static class Parser
- extends DefaultFilter
+ private static String cleanLink( URI baseURI, String link )
{
- // Apache Fancy Index Sort Headers
- private static final Pattern APACHE_INDEX_SKIP = Pattern.compile( "\\?[CDMNS]=.*" );
-
- // URLs with excessive paths.
- private static final Pattern URLS_WITH_PATHS = Pattern.compile( "/[^/]*/" );
-
- // URLs that to a parent directory.
- private static final Pattern URLS_TO_PARENT = Pattern.compile( "\\.\\./" );
-
- // mailto urls
- private static final Pattern MAILTO_URLS = Pattern.compile( "mailto:.*" );
-
- private static final Pattern[] SKIPS =
- new Pattern[]{ APACHE_INDEX_SKIP, URLS_WITH_PATHS, URLS_TO_PARENT, MAILTO_URLS };
-
- private Set<String> links = new HashSet<String>();
-
- private URI baseURI;
-
- public Parser( URI baseURI )
+ if ( StringUtils.isEmpty( link ) )
{
- this.baseURI = baseURI.normalize();
+ return "";
}
- public Set<String> getLinks()
- {
- return links;
- }
+ String ret = link;
- public void startElement( QName element, XMLAttributes attrs, Augmentations augs )
+ try
{
- if ( "A".equals( element.rawname ) )
+ URI linkuri = new URI( ret );
+ if ( link.startsWith( "/" ) )
{
- String href = attrs.getValue( "HREF" );
- if ( href != null )
- {
- String link = cleanLink( baseURI, href );
- if ( isAcceptableLink( link ) )
- {
- links.add( link );
- }
- }
+ linkuri = baseURI.resolve( linkuri );
}
- }
-
- private static String cleanLink( URI baseURI, String link )
- {
- if ( StringUtils.isEmpty( link ) )
+ URI relativeURI = baseURI.relativize( linkuri ).normalize();
+ ret = relativeURI.toASCIIString();
+ if ( ret.startsWith( baseURI.getPath() ) )
{
- return "";
+ ret = ret.substring( baseURI.getPath().length() );
}
- String ret = link;
-
- try
- {
- URI linkuri = new URI( ret );
- URI relativeURI = baseURI.relativize( linkuri ).normalize();
- ret = relativeURI.toASCIIString();
- if ( ret.startsWith( baseURI.getPath() ) )
- {
- ret = ret.substring( baseURI.getPath().length() );
- }
+ ret = URLDecoder.decode( ret, "UTF-8" );
+ }
+ catch ( URISyntaxException e )
+ {
+ }
+ catch ( UnsupportedEncodingException e )
+ {
+ }
- ret = URLDecoder.decode( ret, "UTF-8" );
- }
- catch ( URISyntaxException e )
- {
- }
- catch ( UnsupportedEncodingException e )
- {
- }
+ return ret;
+ }
- return ret;
+ private static boolean isAcceptableLink( String link )
+ {
+ if ( StringUtils.isEmpty( link ) )
+ {
+ return false;
}
- private static boolean isAcceptableLink( String link )
+ for ( int i = 0; i < SKIPS.length; i++ )
{
- if ( StringUtils.isEmpty( link ) )
+ if ( SKIPS[i].matcher( link ).find() )
{
return false;
}
-
- for ( int i = 0; i < SKIPS.length; i++ )
- {
- if ( SKIPS[i].matcher( link ).find() )
- {
- return false;
- }
- }
-
- return true;
}
+
+ return true;
}
-}
+
+}
\ No newline at end of file
[2/4] git commit: use last jsoup 1.7.2
Posted by ol...@apache.org.
use last jsoup 1.7.2
Project: http://git-wip-us.apache.org/repos/asf/maven-wagon/repo
Commit: http://git-wip-us.apache.org/repos/asf/maven-wagon/commit/fdec4303
Tree: http://git-wip-us.apache.org/repos/asf/maven-wagon/tree/fdec4303
Diff: http://git-wip-us.apache.org/repos/asf/maven-wagon/diff/fdec4303
Branch: refs/heads/master
Commit: fdec430331d11602300662d7a7bf9771a25319a8
Parents: ec65719
Author: olivier lamy <ol...@apache.org>
Authored: Wed Mar 27 22:46:13 2013 +1100
Committer: olivier lamy <ol...@apache.org>
Committed: Wed Mar 27 22:46:13 2013 +1100
----------------------------------------------------------------------
wagon-providers/wagon-http-shared/pom.xml | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/fdec4303/wagon-providers/wagon-http-shared/pom.xml
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-http-shared/pom.xml b/wagon-providers/wagon-http-shared/pom.xml
index 556a3bc..ca63a2b 100644
--- a/wagon-providers/wagon-http-shared/pom.xml
+++ b/wagon-providers/wagon-http-shared/pom.xml
@@ -37,7 +37,7 @@ under the License.
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
- <version>1.7.1</version>
+ <version>1.7.2</version>
</dependency>
<dependency>
<groupId>commons-io</groupId>
[4/4] git commit: use commons-lang commons-io rather than plexus-utils
Posted by ol...@apache.org.
use commons-lang commons-io rather than plexus-utils
Project: http://git-wip-us.apache.org/repos/asf/maven-wagon/repo
Commit: http://git-wip-us.apache.org/repos/asf/maven-wagon/commit/05a76e1e
Tree: http://git-wip-us.apache.org/repos/asf/maven-wagon/tree/05a76e1e
Diff: http://git-wip-us.apache.org/repos/asf/maven-wagon/diff/05a76e1e
Branch: refs/heads/master
Commit: 05a76e1ec762cec4175e59f4d15e93bb0086f235
Parents: 0268188
Author: olivier lamy <ol...@apache.org>
Authored: Wed Mar 27 22:50:34 2013 +1100
Committer: olivier lamy <ol...@apache.org>
Committed: Wed Mar 27 22:50:34 2013 +1100
----------------------------------------------------------------------
wagon-providers/wagon-file/pom.xml | 6 ++++++
.../maven/wagon/providers/file/FileWagon.java | 2 +-
wagon-providers/wagon-ftp/pom.xml | 4 ++++
.../apache/maven/wagon/providers/ftp/FtpWagon.java | 4 ++--
.../maven/wagon/providers/ftp/FtpWagonTest.java | 5 +++--
5 files changed, 16 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/05a76e1e/wagon-providers/wagon-file/pom.xml
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-file/pom.xml b/wagon-providers/wagon-file/pom.xml
index 33c8ad5..fcbcf6f 100644
--- a/wagon-providers/wagon-file/pom.xml
+++ b/wagon-providers/wagon-file/pom.xml
@@ -32,4 +32,10 @@ under the License.
<description>
Wagon provider that gets and puts artifacts using file system protocol
</description>
+ <dependencies>
+ <dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </dependency>
+ </dependencies>
</project>
http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/05a76e1e/wagon-providers/wagon-file/src/main/java/org/apache/maven/wagon/providers/file/FileWagon.java
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-file/src/main/java/org/apache/maven/wagon/providers/file/FileWagon.java b/wagon-providers/wagon-file/src/main/java/org/apache/maven/wagon/providers/file/FileWagon.java
index d6ce25f..edce708 100644
--- a/wagon-providers/wagon-file/src/main/java/org/apache/maven/wagon/providers/file/FileWagon.java
+++ b/wagon-providers/wagon-file/src/main/java/org/apache/maven/wagon/providers/file/FileWagon.java
@@ -30,6 +30,7 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.List;
+import org.apache.commons.lang.StringUtils;
import org.apache.maven.wagon.ConnectionException;
import org.apache.maven.wagon.InputData;
import org.apache.maven.wagon.LazyFileOutputStream;
@@ -40,7 +41,6 @@ import org.apache.maven.wagon.TransferFailedException;
import org.apache.maven.wagon.authorization.AuthorizationException;
import org.apache.maven.wagon.resource.Resource;
import org.codehaus.plexus.util.FileUtils;
-import org.codehaus.plexus.util.StringUtils;
/**
* Wagon Provider for Local File System
http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/05a76e1e/wagon-providers/wagon-ftp/pom.xml
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-ftp/pom.xml b/wagon-providers/wagon-ftp/pom.xml
index c25a5d7..a7bac49 100644
--- a/wagon-providers/wagon-ftp/pom.xml
+++ b/wagon-providers/wagon-ftp/pom.xml
@@ -54,6 +54,10 @@ under the License.
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
</dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ </dependency>
</dependencies>
</project>
http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/05a76e1e/wagon-providers/wagon-ftp/src/main/java/org/apache/maven/wagon/providers/ftp/FtpWagon.java
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-ftp/src/main/java/org/apache/maven/wagon/providers/ftp/FtpWagon.java b/wagon-providers/wagon-ftp/src/main/java/org/apache/maven/wagon/providers/ftp/FtpWagon.java
index 7e961f9..607c457 100644
--- a/wagon-providers/wagon-ftp/src/main/java/org/apache/maven/wagon/providers/ftp/FtpWagon.java
+++ b/wagon-providers/wagon-ftp/src/main/java/org/apache/maven/wagon/providers/ftp/FtpWagon.java
@@ -19,6 +19,7 @@ package org.apache.maven.wagon.providers.ftp;
* under the License.
*/
+import org.apache.commons.io.IOUtils;
import org.apache.commons.net.ProtocolCommandEvent;
import org.apache.commons.net.ProtocolCommandListener;
import org.apache.commons.net.ftp.FTP;
@@ -38,7 +39,6 @@ import org.apache.maven.wagon.authentication.AuthenticationInfo;
import org.apache.maven.wagon.authorization.AuthorizationException;
import org.apache.maven.wagon.repository.RepositoryPermissions;
import org.apache.maven.wagon.resource.Resource;
-import org.codehaus.plexus.util.IOUtil;
import java.io.File;
import java.io.FileInputStream;
@@ -664,7 +664,7 @@ public class FtpWagon
}
finally
{
- IOUtil.close( sourceFileStream );
+ IOUtils.close( sourceFileStream );
}
}
http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/05a76e1e/wagon-providers/wagon-ftp/src/test/java/org/apache/maven/wagon/providers/ftp/FtpWagonTest.java
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-ftp/src/test/java/org/apache/maven/wagon/providers/ftp/FtpWagonTest.java b/wagon-providers/wagon-ftp/src/test/java/org/apache/maven/wagon/providers/ftp/FtpWagonTest.java
index fe61ac3..6c8f401 100644
--- a/wagon-providers/wagon-ftp/src/test/java/org/apache/maven/wagon/providers/ftp/FtpWagonTest.java
+++ b/wagon-providers/wagon-ftp/src/test/java/org/apache/maven/wagon/providers/ftp/FtpWagonTest.java
@@ -19,6 +19,7 @@ package org.apache.maven.wagon.providers.ftp;
* under the License.
*/
+import org.apache.commons.io.FileUtils;
import org.apache.ftpserver.FtpServer;
import org.apache.ftpserver.FtpServerFactory;
import org.apache.ftpserver.ftplet.Authority;
@@ -34,7 +35,7 @@ import org.apache.maven.wagon.authentication.AuthenticationException;
import org.apache.maven.wagon.authentication.AuthenticationInfo;
import org.apache.maven.wagon.repository.Repository;
import org.apache.maven.wagon.resource.Resource;
-import org.codehaus.plexus.util.FileUtils;
+
import java.io.File;
import java.util.ArrayList;
@@ -199,7 +200,7 @@ public class FtpWagonTest
{
// do the cleanup first
File destDir = new File( getRepositoryDirectory(), "dirExists" );
- FileUtils.deleteDirectory(destDir);
+ FileUtils.deleteDirectory( destDir );
destDir.mkdirs();
destDir = new File( destDir, "not_yet_existing/also_not" );
[3/4] git commit: use commons-lang rather than plexus-utils
Posted by ol...@apache.org.
use commons-lang rather than plexus-utils
Project: http://git-wip-us.apache.org/repos/asf/maven-wagon/repo
Commit: http://git-wip-us.apache.org/repos/asf/maven-wagon/commit/02681881
Tree: http://git-wip-us.apache.org/repos/asf/maven-wagon/tree/02681881
Diff: http://git-wip-us.apache.org/repos/asf/maven-wagon/diff/02681881
Branch: refs/heads/master
Commit: 02681881b235c5ff4120ffb6b17219b79266682c
Parents: fdec430
Author: olivier lamy <ol...@apache.org>
Authored: Wed Mar 27 22:47:27 2013 +1100
Committer: olivier lamy <ol...@apache.org>
Committed: Wed Mar 27 22:47:27 2013 +1100
----------------------------------------------------------------------
wagon-providers/wagon-http-shared/pom.xml | 4 ++++
.../wagon/shared/http/HtmlFileListParser.java | 2 +-
2 files changed, 5 insertions(+), 1 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/02681881/wagon-providers/wagon-http-shared/pom.xml
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-http-shared/pom.xml b/wagon-providers/wagon-http-shared/pom.xml
index ca63a2b..133a2c9 100644
--- a/wagon-providers/wagon-http-shared/pom.xml
+++ b/wagon-providers/wagon-http-shared/pom.xml
@@ -40,6 +40,10 @@ under the License.
<version>1.7.2</version>
</dependency>
<dependency>
+ <groupId>commons-lang</groupId>
+ <artifactId>commons-lang</artifactId>
+ </dependency>
+ <dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
</dependency>
http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/02681881/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java b/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
index 7448a6d..2008b99 100644
--- a/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
+++ b/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
@@ -20,8 +20,8 @@ package org.apache.maven.wagon.shared.http;
*/
import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
import org.apache.maven.wagon.TransferFailedException;
-import org.codehaus.plexus.util.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;