You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@creadur.apache.org by po...@apache.org on 2015/11/17 19:48:57 UTC
svn commit: r1714849 - in /creadur/tentacles/trunk: RELEASE_NOTES.txt
pom.xml src/main/java/org/apache/creadur/tentacles/IOSystem.java
src/main/java/org/apache/creadur/tentacles/NexusClient.java
Author: pottlinger
Date: Tue Nov 17 18:48:57 2015
New Revision: 1714849
URL: http://svn.apache.org/viewvc?rev=1714849&view=rev
Log:
TENTACLES-9: Add patch to have a retr strategy during crawl phase (patch)
Modified:
creadur/tentacles/trunk/RELEASE_NOTES.txt
creadur/tentacles/trunk/pom.xml
creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/IOSystem.java
creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/NexusClient.java
Modified: creadur/tentacles/trunk/RELEASE_NOTES.txt
URL: http://svn.apache.org/viewvc/creadur/tentacles/trunk/RELEASE_NOTES.txt?rev=1714849&r1=1714848&r2=1714849&view=diff
==============================================================================
--- creadur/tentacles/trunk/RELEASE_NOTES.txt (original)
+++ creadur/tentacles/trunk/RELEASE_NOTES.txt Tue Nov 17 18:48:57 2015
@@ -11,3 +11,4 @@ Tentacles 0.1 SNAPSHOT
* [TENTACLES-3] - provide help text if runtime parameters are missing
* [TENTACLES-2] - use proper escaping in Velocity template files.
* [TENTACLES-1] - allow filtering of directories in LicenseFilter
+ * [TENTACLES-9] - adding retry during crawl (thanks to Andy Gumbrecht)
Modified: creadur/tentacles/trunk/pom.xml
URL: http://svn.apache.org/viewvc/creadur/tentacles/trunk/pom.xml?rev=1714849&r1=1714848&r2=1714849&view=diff
==============================================================================
--- creadur/tentacles/trunk/pom.xml (original)
+++ creadur/tentacles/trunk/pom.xml Tue Nov 17 18:48:57 2015
@@ -20,7 +20,7 @@
<parent>
<groupId>org.apache</groupId>
<artifactId>apache</artifactId>
- <version>14</version>
+ <version>17</version>
</parent>
<groupId>org.apache.creadur.tentacles</groupId>
<artifactId>apache-tentacles</artifactId>
@@ -74,7 +74,7 @@
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<javaVersion>1.6</javaVersion>
- <httpClientVersion>4.3.5</httpClientVersion>
+ <httpClientVersion>4.3.6</httpClientVersion>
<apacheRatVersion>0.11</apacheRatVersion>
</properties>
<issueManagement>
Modified: creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/IOSystem.java
URL: http://svn.apache.org/viewvc/creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/IOSystem.java?rev=1714849&r1=1714848&r2=1714849&view=diff
==============================================================================
--- creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/IOSystem.java (original)
+++ creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/IOSystem.java Tue Nov 17 18:48:57 2015
@@ -16,26 +16,12 @@
*/
package org.apache.creadur.tentacles;
-import java.io.BufferedInputStream;
-import java.io.BufferedOutputStream;
-import java.io.BufferedWriter;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.Closeable;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.FileOutputStream;
-import java.io.FileWriter;
-import java.io.Flushable;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.OutputStream;
+import org.apache.log4j.Logger;
+
+import java.io.*;
import java.net.URL;
import java.util.zip.ZipInputStream;
-import org.apache.log4j.Logger;
-
/**
* @version $Rev$ $Date$
*/
@@ -117,12 +103,12 @@ public class IOSystem {
((Flushable) closeable).flush();
}
} catch (final IOException e) {
- LOG.error("Error when trying to flush before closing " + closeable, e);
+ LOG.trace("Error when trying to flush before closing " + closeable, e);
}
try {
closeable.close();
} catch (final IOException e) {
- LOG.error("Error when trying to close " + closeable, e);
+ LOG.trace("Error when trying to close " + closeable, e);
}
}
Modified: creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/NexusClient.java
URL: http://svn.apache.org/viewvc/creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/NexusClient.java?rev=1714849&r1=1714848&r2=1714849&view=diff
==============================================================================
--- creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/NexusClient.java (original)
+++ creadur/tentacles/trunk/src/main/java/org/apache/creadur/tentacles/NexusClient.java Tue Nov 17 18:48:57 2015
@@ -16,152 +16,171 @@
*/
package org.apache.creadur.tentacles;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URI;
-import java.util.LinkedHashSet;
-import java.util.Set;
-
import org.apache.http.Header;
import org.apache.http.HttpHeaders;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpHead;
+import org.apache.http.client.methods.HttpUriRequest;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.log4j.Logger;
import org.codehaus.swizzle.stream.StreamLexer;
-public class NexusClient {
-
- private static final Logger log = Logger.getLogger(NexusClient.class);
- private static final String SLASH = "/";
- private static final String ONE_UP = "../";
- private static final String USER_AGENT_CONTENTS = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13";
-
- private final CloseableHttpClient client;
- private final FileSystem fileSystem;
- private final IOSystem ioSystem;
-
- public NexusClient(final Platform platform) {
-
- System.setProperty("http.keepAlive", "false");
- System.setProperty("http.maxConnections", "50");
-
- this.client = HttpClientBuilder.create().disableContentCompression()
- .build();
- this.fileSystem = platform.getFileSystem();
- this.ioSystem = platform.getIoSystem();
- }
-
- public File download(final URI uri, final File file) throws IOException {
- if (file.exists()) {
-
- final long length = getContentLength(uri);
-
- if (file.length() == length) {
- log.info("Exists " + uri);
- return file;
- } else {
- log.info("Incomplete " + uri);
- }
- }
-
- log.info("Download " + uri);
-
- final CloseableHttpResponse response = get(uri);
-
- InputStream content = null;
- try {
- content = response.getEntity().getContent();
-
- this.fileSystem.mkparent(file);
-
- this.ioSystem.copy(content, file);
- } finally {
- if (content != null) {
- content.close();
- }
-
- response.close();
- }
-
- return file;
- }
-
- private Long getContentLength(final URI uri) throws IOException {
- final CloseableHttpResponse head = head(uri);
- final Header[] headers = head.getHeaders(HttpHeaders.CONTENT_LENGTH);
-
- if (headers != null && headers.length >= 1) {
- return Long.valueOf(headers[0].getValue());
- }
-
- head.close();
-
- return Long.valueOf(-1);
- }
-
- private CloseableHttpResponse get(final URI uri) throws IOException {
- final HttpGet request = new HttpGet(uri);
- request.setHeader(HttpHeaders.USER_AGENT, USER_AGENT_CONTENTS);
- return this.client.execute(request);
- }
-
- private CloseableHttpResponse head(final URI uri) throws IOException {
- final HttpHead request = new HttpHead(uri);
- request.setHeader(HttpHeaders.USER_AGENT, USER_AGENT_CONTENTS);
- return this.client.execute(request);
- }
-
- public Set<URI> crawl(final URI index) throws IOException {
- log.info("Crawl " + index);
- final Set<URI> resources = new LinkedHashSet<URI>();
-
- final CloseableHttpResponse response = get(index);
-
- final InputStream content = response.getEntity().getContent();
- final StreamLexer lexer = new StreamLexer(content);
-
- final Set<URI> crawl = new LinkedHashSet<URI>();
-
- // <a
- // href="https://repository.apache.org/content/repositories/orgapacheopenejb-094/archetype-catalog.xml">archetype-catalog.xml</a>
- while (lexer.readAndMark("<a ", "/a>")) {
-
- try {
- final String link = lexer.peek("href=\"", "\"");
- final String name = lexer.peek(">", "<");
-
- final URI uri = index.resolve(link);
-
- if (name.equals(ONE_UP)) {
- continue;
- }
- if (link.equals(ONE_UP)) {
- continue;
- }
-
- if (name.endsWith(SLASH)) {
- crawl.add(uri);
- continue;
- }
-
- resources.add(uri);
-
- } finally {
- lexer.unmark();
- }
- }
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URI;
+import java.util.LinkedHashSet;
+import java.util.Set;
- content.close();
- response.close();
+public class NexusClient {
- for (final URI uri : crawl) {
- resources.addAll(crawl(uri));
- }
+ private static final Logger log = Logger.getLogger(NexusClient.class);
+ private static final String SLASH = "/";
+ private static final String ONE_UP = "../";
+ private static final String USER_AGENT_CONTENTS = "Mozilla/5.0 (X11; U; Linux x86_64; en-US; rv:1.9.2.13) Gecko/20101206 Ubuntu/10.10 (maverick) Firefox/3.6.13";
+
+ private final CloseableHttpClient client;
+ private final FileSystem fileSystem;
+ private final IOSystem ioSystem;
+ private final int retries;
+
+ public NexusClient(final Platform platform) {
+
+ System.setProperty("http.keepAlive", "false");
+ System.setProperty("http.maxConnections", "50");
+
+ this.retries = Integer.parseInt(System.getProperty("NexusClient.retries", "5"));
+
+ this.client = HttpClientBuilder.create().disableContentCompression()
+ .build();
+ this.fileSystem = platform.getFileSystem();
+ this.ioSystem = platform.getIoSystem();
+ }
+
+ public File download(final URI uri, final File file) throws IOException {
+ if (file.exists()) {
+
+ final long length = getContentLength(uri);
+
+ if (file.length() == length) {
+ log.info("Exists " + uri);
+ return file;
+ } else {
+ log.info("Incomplete " + uri);
+ }
+ }
+
+ log.info("Download " + uri);
+
+ final CloseableHttpResponse response = get(uri);
+
+ InputStream content = null;
+ try {
+ content = response.getEntity().getContent();
+
+ this.fileSystem.mkparent(file);
+
+ this.ioSystem.copy(content, file);
+ } finally {
+ if (content != null) {
+ content.close();
+ }
+
+ response.close();
+ }
+
+ return file;
+ }
+
+ private Long getContentLength(final URI uri) throws IOException {
+ final CloseableHttpResponse head = head(uri);
+ final Header[] headers = head.getHeaders(HttpHeaders.CONTENT_LENGTH);
+
+ if (headers != null && headers.length >= 1) {
+ return Long.valueOf(headers[0].getValue());
+ }
+
+ head.close();
+
+ return (long) -1;
+ }
+
+ private CloseableHttpResponse get(final URI uri) throws IOException {
+ return get(new HttpGet(uri), this.retries);
+ }
+
+ private CloseableHttpResponse head(final URI uri) throws IOException {
+ return get(new HttpHead(uri), this.retries);
+ }
+
+ private CloseableHttpResponse get(final HttpUriRequest request, int tries) throws IOException {
+ try {
+ request.setHeader(HttpHeaders.USER_AGENT, USER_AGENT_CONTENTS);
+ return this.client.execute(request);
+ } catch (final IOException e) {
+ if (tries > 0) {
+ try {
+ Thread.sleep(250);
+ } catch (final InterruptedException ie) {
+ Thread.interrupted();
+ throw new IOException("Interrupted", ie);
+ }
+ return get(request, tries--);
+ } else {
+ throw e;
+ }
+ }
+ }
+
+ public Set<URI> crawl(final URI index) throws IOException {
+ log.info("Crawl " + index);
+ final Set<URI> resources = new LinkedHashSet<URI>();
+
+ final CloseableHttpResponse response = get(index);
+
+ final InputStream content = response.getEntity().getContent();
+ final StreamLexer lexer = new StreamLexer(content);
+
+ final Set<URI> crawl = new LinkedHashSet<URI>();
+
+ // <a
+ // href="https://repository.apache.org/content/repositories/orgapacheopenejb-094/archetype-catalog.xml">archetype-catalog.xml</a>
+ while (lexer.readAndMark("<a ", "/a>")) {
+
+ try {
+ final String link = lexer.peek("href=\"", "\"");
+ final String name = lexer.peek(">", "<");
+
+ final URI uri = index.resolve(link);
+
+ if (name.equals(ONE_UP)) {
+ continue;
+ }
+ if (link.equals(ONE_UP)) {
+ continue;
+ }
+
+ if (name.endsWith(SLASH)) {
+ crawl.add(uri);
+ continue;
+ }
+
+ resources.add(uri);
+
+ } finally {
+ lexer.unmark();
+ }
+ }
+
+ content.close();
+ response.close();
+
+ for (final URI uri : crawl) {
+ resources.addAll(crawl(uri));
+ }
- return resources;
- }
+ return resources;
+ }
}
\ No newline at end of file