You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@maven.apache.org by ol...@apache.org on 2013/03/27 12:50:47 UTC

[1/4] git commit: [WAGON-388] use HtmlFileListParser based on jsoup

Updated Branches:
  refs/heads/master 98a21cb3f -> 05a76e1ec


[WAGON-388] use HtmlFileListParser based on jsoup


Project: http://git-wip-us.apache.org/repos/asf/maven-wagon/repo
Commit: http://git-wip-us.apache.org/repos/asf/maven-wagon/commit/ec65719a
Tree: http://git-wip-us.apache.org/repos/asf/maven-wagon/tree/ec65719a
Diff: http://git-wip-us.apache.org/repos/asf/maven-wagon/diff/ec65719a

Branch: refs/heads/master
Commit: ec65719a32d3662ee3fa57f81d778e677d2aa082
Parents: 98a21cb
Author: olivier lamy <ol...@apache.org>
Authored: Wed Mar 27 22:43:37 2013 +1100
Committer: olivier lamy <ol...@apache.org>
Committed: Wed Mar 27 22:43:37 2013 +1100

----------------------------------------------------------------------
 wagon-providers/wagon-http-shared/pom.xml          |   17 +--
 .../wagon/shared/http/HtmlFileListParser.java      |  176 +++++++--------
 2 files changed, 82 insertions(+), 111 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/ec65719a/wagon-providers/wagon-http-shared/pom.xml
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-http-shared/pom.xml b/wagon-providers/wagon-http-shared/pom.xml
index 4a4b4ed..556a3bc 100644
--- a/wagon-providers/wagon-http-shared/pom.xml
+++ b/wagon-providers/wagon-http-shared/pom.xml
@@ -35,20 +35,9 @@ under the License.
 
   <dependencies>
     <dependency>
-      <groupId>nekohtml</groupId>
-      <artifactId>xercesMinimal</artifactId>
-      <version>1.9.6.2</version>
-    </dependency>
-    <dependency>
-      <groupId>nekohtml</groupId>
-      <artifactId>nekohtml</artifactId>
-      <version>1.9.6.2</version>
-      <exclusions>
-        <exclusion>
-          <groupId>xerces</groupId>
-          <artifactId>xercesImpl</artifactId>
-        </exclusion>
-      </exclusions>
+      <groupId>org.jsoup</groupId>
+      <artifactId>jsoup</artifactId>
+      <version>1.7.1</version>
     </dependency>
     <dependency>
       <groupId>commons-io</groupId>

http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/ec65719a/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java b/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
index 749bd5d..7448a6d 100644
--- a/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
+++ b/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
@@ -19,15 +19,13 @@ package org.apache.maven.wagon.shared.http;
  * under the License.
  */
 
+import org.apache.commons.io.IOUtils;
 import org.apache.maven.wagon.TransferFailedException;
-import org.apache.xerces.xni.Augmentations;
-import org.apache.xerces.xni.QName;
-import org.apache.xerces.xni.XMLAttributes;
-import org.apache.xerces.xni.parser.XMLInputSource;
-import org.apache.xerces.xni.parser.XMLParserConfiguration;
 import org.codehaus.plexus.util.StringUtils;
-import org.cyberneko.html.HTMLConfiguration;
-import org.cyberneko.html.filters.DefaultFilter;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
 
 import java.io.IOException;
 import java.io.InputStream;
@@ -46,9 +44,25 @@ import java.util.regex.Pattern;
  */
 public class HtmlFileListParser
 {
+    // Apache Fancy Index Sort Headers
+    private static final Pattern APACHE_INDEX_SKIP = Pattern.compile( "\\?[CDMNS]=.*" );
+
+    // URLs with excessive paths.
+    private static final Pattern URLS_WITH_PATHS = Pattern.compile( "/[^/]*/" );
+
+    // URLs that to a parent directory.
+    private static final Pattern URLS_TO_PARENT = Pattern.compile( "\\.\\./" );
+
+    // mailto urls
+    private static final Pattern MAILTO_URLS = Pattern.compile( "mailto:.*" );
+
+    private static final Pattern[] SKIPS =
+        new Pattern[]{ APACHE_INDEX_SKIP, URLS_WITH_PATHS, URLS_TO_PARENT, MAILTO_URLS };
+
     /**
      * Fetches a raw HTML from a provided InputStream, parses it, and returns the file list.
      *
+     * @param stream the input stream.
      * @return the file list.
      * @throws TransferFailedException if there was a problem fetching the raw html.
      */
@@ -57,126 +71,94 @@ public class HtmlFileListParser
     {
         try
         {
-            // Use URI object to get benefits of proper absolute and relative path resolution for free
             URI baseURI = new URI( baseurl );
+            // to make debugging easier, start with a string. This is assuming UTF-8, which might not be a safe
+            // assumption.
+            String content = IOUtils.toString( stream, "utf-8" );
+            Document doc = Jsoup.parse( content, baseurl );
+            Elements links = doc.select("a[href]");
+            Set<String> results = new HashSet<String>();
+            for ( int lx = 0; lx < links.size(); lx++ )
+            {
+                Element link = links.get( lx );
+                /*
+                 * The abs:href loses directories, so we deal with absolute paths ourselves below in cleanLink
+                 */
+                String target = link.attr( "href" );
+                if ( target != null )
+                {
+                    String clean = cleanLink( baseURI, target );
+                    if ( isAcceptableLink( clean ) )
+                    {
+                        results.add( clean );
+                    }
+                }
 
-            Parser handler = new Parser( baseURI );
-
-            XMLParserConfiguration parser = new HTMLConfiguration();
-            parser.setDocumentHandler( handler );
-            parser.setFeature( "http://cyberneko.org/html/features/augmentations", true );
-            parser.setProperty( "http://cyberneko.org/html/properties/names/elems", "upper" );
-            parser.setProperty( "http://cyberneko.org/html/properties/names/attrs", "upper" );
-            parser.parse( new XMLInputSource( null, baseurl, baseURI.toString(), stream, "UTF-8" ) );
-
-            return new ArrayList<String>( handler.getLinks() );
+            }
 
+            return new ArrayList<String>( results );
         }
         catch ( URISyntaxException e )
         {
-            throw new TransferFailedException( "Unable to parse as URI: " + baseurl, e );
+            throw new TransferFailedException( "Unable to parse as base URI: " + baseurl, e );
         }
         catch ( IOException e )
         {
-            throw new TransferFailedException( "I/O error: " + e.getMessage(), e );
+            throw new TransferFailedException( "I/O error reading HTML listing of artifacts: " + e.getMessage(), e );
         }
     }
 
-    private static class Parser
-        extends DefaultFilter
+    private static String cleanLink( URI baseURI, String link )
     {
-        // Apache Fancy Index Sort Headers
-        private static final Pattern APACHE_INDEX_SKIP = Pattern.compile( "\\?[CDMNS]=.*" );
-
-        // URLs with excessive paths.
-        private static final Pattern URLS_WITH_PATHS = Pattern.compile( "/[^/]*/" );
-
-        // URLs that to a parent directory.
-        private static final Pattern URLS_TO_PARENT = Pattern.compile( "\\.\\./" );
-
-        // mailto urls
-        private static final Pattern MAILTO_URLS = Pattern.compile( "mailto:.*" );
-
-        private static final Pattern[] SKIPS =
-            new Pattern[]{ APACHE_INDEX_SKIP, URLS_WITH_PATHS, URLS_TO_PARENT, MAILTO_URLS };
-
-        private Set<String> links = new HashSet<String>();
-
-        private URI baseURI;
-
-        public Parser( URI baseURI )
+        if ( StringUtils.isEmpty( link ) )
         {
-            this.baseURI = baseURI.normalize();
+            return "";
         }
 
-        public Set<String> getLinks()
-        {
-            return links;
-        }
+        String ret = link;
 
-        public void startElement( QName element, XMLAttributes attrs, Augmentations augs )
+        try
         {
-            if ( "A".equals( element.rawname ) )
+            URI linkuri = new URI( ret );
+            if ( link.startsWith( "/" ) )
             {
-                String href = attrs.getValue( "HREF" );
-                if ( href != null )
-                {
-                    String link = cleanLink( baseURI, href );
-                    if ( isAcceptableLink( link ) )
-                    {
-                        links.add( link );
-                    }
-                }
+                linkuri = baseURI.resolve( linkuri );
             }
-        }
-
-        private static String cleanLink( URI baseURI, String link )
-        {
-            if ( StringUtils.isEmpty( link ) )
+            URI relativeURI = baseURI.relativize( linkuri ).normalize();
+            ret = relativeURI.toASCIIString();
+            if ( ret.startsWith( baseURI.getPath() ) )
             {
-                return "";
+                ret = ret.substring( baseURI.getPath().length() );
             }
 
-            String ret = link;
-
-            try
-            {
-                URI linkuri = new URI( ret );
-                URI relativeURI = baseURI.relativize( linkuri ).normalize();
-                ret = relativeURI.toASCIIString();
-                if ( ret.startsWith( baseURI.getPath() ) )
-                {
-                    ret = ret.substring( baseURI.getPath().length() );
-                }
+            ret = URLDecoder.decode( ret, "UTF-8" );
+        }
+        catch ( URISyntaxException e )
+        {
+        }
+        catch ( UnsupportedEncodingException e )
+        {
+        }
 
-                ret = URLDecoder.decode( ret, "UTF-8" );
-            }
-            catch ( URISyntaxException e )
-            {
-            }
-            catch ( UnsupportedEncodingException e )
-            {
-            }
+        return ret;
+    }
 
-            return ret;
+    private static boolean isAcceptableLink( String link )
+    {
+        if ( StringUtils.isEmpty( link ) )
+        {
+            return false;
         }
 
-        private static boolean isAcceptableLink( String link )
+        for ( int i = 0; i < SKIPS.length; i++ )
         {
-            if ( StringUtils.isEmpty( link ) )
+            if ( SKIPS[i].matcher( link ).find() )
             {
                 return false;
             }
-
-            for ( int i = 0; i < SKIPS.length; i++ )
-            {
-                if ( SKIPS[i].matcher( link ).find() )
-                {
-                    return false;
-                }
-            }
-
-            return true;
         }
+
+        return true;
     }
-}
+
+}
\ No newline at end of file


[2/4] git commit: use last jsoup 1.7.2

Posted by ol...@apache.org.
use last jsoup 1.7.2


Project: http://git-wip-us.apache.org/repos/asf/maven-wagon/repo
Commit: http://git-wip-us.apache.org/repos/asf/maven-wagon/commit/fdec4303
Tree: http://git-wip-us.apache.org/repos/asf/maven-wagon/tree/fdec4303
Diff: http://git-wip-us.apache.org/repos/asf/maven-wagon/diff/fdec4303

Branch: refs/heads/master
Commit: fdec430331d11602300662d7a7bf9771a25319a8
Parents: ec65719
Author: olivier lamy <ol...@apache.org>
Authored: Wed Mar 27 22:46:13 2013 +1100
Committer: olivier lamy <ol...@apache.org>
Committed: Wed Mar 27 22:46:13 2013 +1100

----------------------------------------------------------------------
 wagon-providers/wagon-http-shared/pom.xml |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/fdec4303/wagon-providers/wagon-http-shared/pom.xml
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-http-shared/pom.xml b/wagon-providers/wagon-http-shared/pom.xml
index 556a3bc..ca63a2b 100644
--- a/wagon-providers/wagon-http-shared/pom.xml
+++ b/wagon-providers/wagon-http-shared/pom.xml
@@ -37,7 +37,7 @@ under the License.
     <dependency>
       <groupId>org.jsoup</groupId>
       <artifactId>jsoup</artifactId>
-      <version>1.7.1</version>
+      <version>1.7.2</version>
     </dependency>
     <dependency>
       <groupId>commons-io</groupId>


[4/4] git commit: use commons-lang commons-io rather than plexus-utils

Posted by ol...@apache.org.
use commons-lang commons-io rather than plexus-utils


Project: http://git-wip-us.apache.org/repos/asf/maven-wagon/repo
Commit: http://git-wip-us.apache.org/repos/asf/maven-wagon/commit/05a76e1e
Tree: http://git-wip-us.apache.org/repos/asf/maven-wagon/tree/05a76e1e
Diff: http://git-wip-us.apache.org/repos/asf/maven-wagon/diff/05a76e1e

Branch: refs/heads/master
Commit: 05a76e1ec762cec4175e59f4d15e93bb0086f235
Parents: 0268188
Author: olivier lamy <ol...@apache.org>
Authored: Wed Mar 27 22:50:34 2013 +1100
Committer: olivier lamy <ol...@apache.org>
Committed: Wed Mar 27 22:50:34 2013 +1100

----------------------------------------------------------------------
 wagon-providers/wagon-file/pom.xml                 |    6 ++++++
 .../maven/wagon/providers/file/FileWagon.java      |    2 +-
 wagon-providers/wagon-ftp/pom.xml                  |    4 ++++
 .../apache/maven/wagon/providers/ftp/FtpWagon.java |    4 ++--
 .../maven/wagon/providers/ftp/FtpWagonTest.java    |    5 +++--
 5 files changed, 16 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/05a76e1e/wagon-providers/wagon-file/pom.xml
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-file/pom.xml b/wagon-providers/wagon-file/pom.xml
index 33c8ad5..fcbcf6f 100644
--- a/wagon-providers/wagon-file/pom.xml
+++ b/wagon-providers/wagon-file/pom.xml
@@ -32,4 +32,10 @@ under the License.
   <description>
     Wagon provider that gets and puts artifacts using file system protocol
   </description>
+  <dependencies>
+    <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
+  </dependencies>
 </project>

http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/05a76e1e/wagon-providers/wagon-file/src/main/java/org/apache/maven/wagon/providers/file/FileWagon.java
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-file/src/main/java/org/apache/maven/wagon/providers/file/FileWagon.java b/wagon-providers/wagon-file/src/main/java/org/apache/maven/wagon/providers/file/FileWagon.java
index d6ce25f..edce708 100644
--- a/wagon-providers/wagon-file/src/main/java/org/apache/maven/wagon/providers/file/FileWagon.java
+++ b/wagon-providers/wagon-file/src/main/java/org/apache/maven/wagon/providers/file/FileWagon.java
@@ -30,6 +30,7 @@ import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.List;
 
+import org.apache.commons.lang.StringUtils;
 import org.apache.maven.wagon.ConnectionException;
 import org.apache.maven.wagon.InputData;
 import org.apache.maven.wagon.LazyFileOutputStream;
@@ -40,7 +41,6 @@ import org.apache.maven.wagon.TransferFailedException;
 import org.apache.maven.wagon.authorization.AuthorizationException;
 import org.apache.maven.wagon.resource.Resource;
 import org.codehaus.plexus.util.FileUtils;
-import org.codehaus.plexus.util.StringUtils;
 
 /**
  * Wagon Provider for Local File System

http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/05a76e1e/wagon-providers/wagon-ftp/pom.xml
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-ftp/pom.xml b/wagon-providers/wagon-ftp/pom.xml
index c25a5d7..a7bac49 100644
--- a/wagon-providers/wagon-ftp/pom.xml
+++ b/wagon-providers/wagon-ftp/pom.xml
@@ -54,6 +54,10 @@ under the License.
       <groupId>org.slf4j</groupId>
       <artifactId>slf4j-simple</artifactId>
     </dependency>
+    <dependency>
+      <groupId>commons-io</groupId>
+      <artifactId>commons-io</artifactId>
+    </dependency>
   </dependencies>
 
 </project>

http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/05a76e1e/wagon-providers/wagon-ftp/src/main/java/org/apache/maven/wagon/providers/ftp/FtpWagon.java
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-ftp/src/main/java/org/apache/maven/wagon/providers/ftp/FtpWagon.java b/wagon-providers/wagon-ftp/src/main/java/org/apache/maven/wagon/providers/ftp/FtpWagon.java
index 7e961f9..607c457 100644
--- a/wagon-providers/wagon-ftp/src/main/java/org/apache/maven/wagon/providers/ftp/FtpWagon.java
+++ b/wagon-providers/wagon-ftp/src/main/java/org/apache/maven/wagon/providers/ftp/FtpWagon.java
@@ -19,6 +19,7 @@ package org.apache.maven.wagon.providers.ftp;
  * under the License.
  */
 
+import org.apache.commons.io.IOUtils;
 import org.apache.commons.net.ProtocolCommandEvent;
 import org.apache.commons.net.ProtocolCommandListener;
 import org.apache.commons.net.ftp.FTP;
@@ -38,7 +39,6 @@ import org.apache.maven.wagon.authentication.AuthenticationInfo;
 import org.apache.maven.wagon.authorization.AuthorizationException;
 import org.apache.maven.wagon.repository.RepositoryPermissions;
 import org.apache.maven.wagon.resource.Resource;
-import org.codehaus.plexus.util.IOUtil;
 
 import java.io.File;
 import java.io.FileInputStream;
@@ -664,7 +664,7 @@ public class FtpWagon
             }
             finally
             {
-                IOUtil.close( sourceFileStream );
+                IOUtils.close( sourceFileStream );
             }
 
         }

http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/05a76e1e/wagon-providers/wagon-ftp/src/test/java/org/apache/maven/wagon/providers/ftp/FtpWagonTest.java
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-ftp/src/test/java/org/apache/maven/wagon/providers/ftp/FtpWagonTest.java b/wagon-providers/wagon-ftp/src/test/java/org/apache/maven/wagon/providers/ftp/FtpWagonTest.java
index fe61ac3..6c8f401 100644
--- a/wagon-providers/wagon-ftp/src/test/java/org/apache/maven/wagon/providers/ftp/FtpWagonTest.java
+++ b/wagon-providers/wagon-ftp/src/test/java/org/apache/maven/wagon/providers/ftp/FtpWagonTest.java
@@ -19,6 +19,7 @@ package org.apache.maven.wagon.providers.ftp;
  * under the License.
  */
 
+import org.apache.commons.io.FileUtils;
 import org.apache.ftpserver.FtpServer;
 import org.apache.ftpserver.FtpServerFactory;
 import org.apache.ftpserver.ftplet.Authority;
@@ -34,7 +35,7 @@ import org.apache.maven.wagon.authentication.AuthenticationException;
 import org.apache.maven.wagon.authentication.AuthenticationInfo;
 import org.apache.maven.wagon.repository.Repository;
 import org.apache.maven.wagon.resource.Resource;
-import org.codehaus.plexus.util.FileUtils;
+
 
 import java.io.File;
 import java.util.ArrayList;
@@ -199,7 +200,7 @@ public class FtpWagonTest
         {
             // do the cleanup first
             File destDir = new File( getRepositoryDirectory(), "dirExists" );
-            FileUtils.deleteDirectory(destDir);
+            FileUtils.deleteDirectory( destDir );
             destDir.mkdirs();
             destDir = new File( destDir, "not_yet_existing/also_not" );
 


[3/4] git commit: use commons-lang rather than plexus-utils

Posted by ol...@apache.org.
use commons-lang rather than plexus-utils


Project: http://git-wip-us.apache.org/repos/asf/maven-wagon/repo
Commit: http://git-wip-us.apache.org/repos/asf/maven-wagon/commit/02681881
Tree: http://git-wip-us.apache.org/repos/asf/maven-wagon/tree/02681881
Diff: http://git-wip-us.apache.org/repos/asf/maven-wagon/diff/02681881

Branch: refs/heads/master
Commit: 02681881b235c5ff4120ffb6b17219b79266682c
Parents: fdec430
Author: olivier lamy <ol...@apache.org>
Authored: Wed Mar 27 22:47:27 2013 +1100
Committer: olivier lamy <ol...@apache.org>
Committed: Wed Mar 27 22:47:27 2013 +1100

----------------------------------------------------------------------
 wagon-providers/wagon-http-shared/pom.xml          |    4 ++++
 .../wagon/shared/http/HtmlFileListParser.java      |    2 +-
 2 files changed, 5 insertions(+), 1 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/02681881/wagon-providers/wagon-http-shared/pom.xml
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-http-shared/pom.xml b/wagon-providers/wagon-http-shared/pom.xml
index ca63a2b..133a2c9 100644
--- a/wagon-providers/wagon-http-shared/pom.xml
+++ b/wagon-providers/wagon-http-shared/pom.xml
@@ -40,6 +40,10 @@ under the License.
       <version>1.7.2</version>
     </dependency>
     <dependency>
+      <groupId>commons-lang</groupId>
+      <artifactId>commons-lang</artifactId>
+    </dependency>
+    <dependency>
       <groupId>commons-io</groupId>
       <artifactId>commons-io</artifactId>
     </dependency>

http://git-wip-us.apache.org/repos/asf/maven-wagon/blob/02681881/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
----------------------------------------------------------------------
diff --git a/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java b/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
index 7448a6d..2008b99 100644
--- a/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
+++ b/wagon-providers/wagon-http-shared/src/main/java/org/apache/maven/wagon/shared/http/HtmlFileListParser.java
@@ -20,8 +20,8 @@ package org.apache.maven.wagon.shared.http;
  */
 
 import org.apache.commons.io.IOUtils;
+import org.apache.commons.lang.StringUtils;
 import org.apache.maven.wagon.TransferFailedException;
-import org.codehaus.plexus.util.StringUtils;
 import org.jsoup.Jsoup;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;