You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tomee.apache.org by db...@apache.org on 2011/09/28 00:36:06 UTC

svn commit: r1176650 - /openejb/trunk/sandbox/legal/src/main/java/org/apache/openejb/tools/legal/Main.java

Author: dblevins
Date: Tue Sep 27 22:36:05 2011
New Revision: 1176650

URL: http://svn.apache.org/viewvc?rev=1176650&view=rev
Log:
collects a full list of binaries in sorted order

Modified:
    openejb/trunk/sandbox/legal/src/main/java/org/apache/openejb/tools/legal/Main.java

Modified: openejb/trunk/sandbox/legal/src/main/java/org/apache/openejb/tools/legal/Main.java
URL: http://svn.apache.org/viewvc/openejb/trunk/sandbox/legal/src/main/java/org/apache/openejb/tools/legal/Main.java?rev=1176650&r1=1176649&r2=1176650&view=diff
==============================================================================
--- openejb/trunk/sandbox/legal/src/main/java/org/apache/openejb/tools/legal/Main.java (original)
+++ openejb/trunk/sandbox/legal/src/main/java/org/apache/openejb/tools/legal/Main.java Tue Sep 27 22:36:05 2011
@@ -22,8 +22,9 @@ import org.apache.http.impl.client.Defau
 import org.codehaus.swizzle.stream.StreamLexer;
 
 import java.io.IOException;
+import java.io.InputStream;
 import java.net.URI;
-import java.util.HashSet;
+import java.util.LinkedHashSet;
 import java.util.Set;
 
 /**
@@ -55,26 +56,48 @@ public class Main {
     }
 
     private Set<URI> crawl(URI index) throws IOException {
-        final Set<URI> resources = new HashSet<URI>();
+        final Set<URI> resources = new LinkedHashSet<URI>();
 
-        HttpGet request = new HttpGet(index);
+        final HttpGet request = new HttpGet(index);
         request.setHeader("User-Agent", "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13");
-        HttpResponse response = client.execute(request);
-        StreamLexer lexer = new StreamLexer(response.getEntity().getContent());
+        final HttpResponse response = client.execute(request);
+
+        final InputStream content = response.getEntity().getContent();
+        final StreamLexer lexer = new StreamLexer(content);
+
+        final Set<URI> crawl = new LinkedHashSet<URI>();
 
         //<a href="https://repository.apache.org/content/repositories/orgapacheopenejb-094/archetype-catalog.xml">archetype-catalog.xml</a>
-        while (lexer.readAndMark("<a ","/a>")) {
-            final String link = lexer.peek("href=\"", "\"");
-            final String name = lexer.peek(">", "<");
-
-            final URI uri = index.resolve(link);
-
-            if (name.endsWith("/")) {
-                resources.addAll(crawl(uri));
-            } else {
+        while (lexer.readAndMark("<a ", "/a>")) {
+
+            try {
+                final String link = lexer.peek("href=\"", "\"");
+                final String name = lexer.peek(">", "<");
+
+                final URI uri = index.resolve(link);
+
+                if (name.equals("../")) continue;
+                if (link.equals("../")) continue;
+
+                if (name.endsWith("/")) {
+                    crawl.add(uri);
+                    continue;
+                }
+
+                if (!uri.getPath().matches(".*(jar|zip|tar.gz)")) continue;
+
                 resources.add(uri);
+
+            } finally {
+                lexer.unmark();
             }
         }
+
+        content.close();
+
+        for (URI uri : crawl) {
+            resources.addAll(crawl(uri));
+        }
         return resources;
     }