You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tomee.apache.org by db...@apache.org on 2011/09/28 00:36:06 UTC
svn commit: r1176650 -
/openejb/trunk/sandbox/legal/src/main/java/org/apache/openejb/tools/legal/Main.java
Author: dblevins
Date: Tue Sep 27 22:36:05 2011
New Revision: 1176650
URL: http://svn.apache.org/viewvc?rev=1176650&view=rev
Log:
collects a full list of binaries in sorted order
Modified:
openejb/trunk/sandbox/legal/src/main/java/org/apache/openejb/tools/legal/Main.java
Modified: openejb/trunk/sandbox/legal/src/main/java/org/apache/openejb/tools/legal/Main.java
URL: http://svn.apache.org/viewvc/openejb/trunk/sandbox/legal/src/main/java/org/apache/openejb/tools/legal/Main.java?rev=1176650&r1=1176649&r2=1176650&view=diff
==============================================================================
--- openejb/trunk/sandbox/legal/src/main/java/org/apache/openejb/tools/legal/Main.java (original)
+++ openejb/trunk/sandbox/legal/src/main/java/org/apache/openejb/tools/legal/Main.java Tue Sep 27 22:36:05 2011
@@ -22,8 +22,9 @@ import org.apache.http.impl.client.Defau
import org.codehaus.swizzle.stream.StreamLexer;
import java.io.IOException;
+import java.io.InputStream;
import java.net.URI;
-import java.util.HashSet;
+import java.util.LinkedHashSet;
import java.util.Set;
/**
@@ -55,26 +56,48 @@ public class Main {
}
private Set<URI> crawl(URI index) throws IOException {
- final Set<URI> resources = new HashSet<URI>();
+ final Set<URI> resources = new LinkedHashSet<URI>();
- HttpGet request = new HttpGet(index);
+ final HttpGet request = new HttpGet(index);
request.setHeader("User-Agent", "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13");
- HttpResponse response = client.execute(request);
- StreamLexer lexer = new StreamLexer(response.getEntity().getContent());
+ final HttpResponse response = client.execute(request);
+
+ final InputStream content = response.getEntity().getContent();
+ final StreamLexer lexer = new StreamLexer(content);
+
+ final Set<URI> crawl = new LinkedHashSet<URI>();
//<a href="https://repository.apache.org/content/repositories/orgapacheopenejb-094/archetype-catalog.xml">archetype-catalog.xml</a>
- while (lexer.readAndMark("<a ","/a>")) {
- final String link = lexer.peek("href=\"", "\"");
- final String name = lexer.peek(">", "<");
-
- final URI uri = index.resolve(link);
-
- if (name.endsWith("/")) {
- resources.addAll(crawl(uri));
- } else {
+ while (lexer.readAndMark("<a ", "/a>")) {
+
+ try {
+ final String link = lexer.peek("href=\"", "\"");
+ final String name = lexer.peek(">", "<");
+
+ final URI uri = index.resolve(link);
+
+ if (name.equals("../")) continue;
+ if (link.equals("../")) continue;
+
+ if (name.endsWith("/")) {
+ crawl.add(uri);
+ continue;
+ }
+
+ if (!uri.getPath().matches(".*(jar|zip|tar.gz)")) continue;
+
resources.add(uri);
+
+ } finally {
+ lexer.unmark();
}
}
+
+ content.close();
+
+ for (URI uri : crawl) {
+ resources.addAll(crawl(uri));
+ }
return resources;
}