You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@manifoldcf.apache.org by kw...@apache.org on 2015/01/30 03:27:53 UTC
svn commit: r1655914 - in /manifoldcf/trunk: CHANGES.txt
connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
Author: kwright
Date: Fri Jan 30 02:27:53 2015
New Revision: 1655914
URL: http://svn.apache.org/r1655914
Log:
Second fix for CONNECTORS-1154.
Modified:
manifoldcf/trunk/CHANGES.txt
manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
Modified: manifoldcf/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/CHANGES.txt?rev=1655914&r1=1655913&r2=1655914&view=diff
==============================================================================
--- manifoldcf/trunk/CHANGES.txt (original)
+++ manifoldcf/trunk/CHANGES.txt Fri Jan 30 02:27:53 2015
@@ -3,6 +3,11 @@ $Id$
======================= 2.1-dev =====================
+CONNECTORS-1154: Changes to the flow through the web connector's
+deflate/unzip logic, designed to prevent gzip headers from being read
+if the stream's contents are not read.
+(Li Minhui, Karl Wright)
+
CONNECTORS-1153: Broken authority name comparison is preventing
incremental when no authority is used.
(Aeham Abushwashi, Karl Wright)
Modified: manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java
URL: http://svn.apache.org/viewvc/manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java?rev=1655914&r1=1655913&r2=1655914&view=diff
==============================================================================
--- manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java (original)
+++ manifoldcf/trunk/connectors/webcrawler/connector/src/main/java/org/apache/manifoldcf/crawler/connectors/webcrawler/ThrottledFetcher.java Fri Jan 30 02:27:53 2015
@@ -904,7 +904,12 @@ public class ThrottledFetcher
throw new ManifoldCFException("Attempt to get an input stream when no method thread");
try
{
- return methodThread.getSafeInputStream();
+ InputStream bodyStream = methodThread.getSafeInputStream();
+ if (methodThread.isGZipStream())
+ bodyStream = new GZIPInputStream(bodyStream);
+ else if (methodThread.isDeflateStream())
+ bodyStream = new DeflateInputStream(bodyStream);
+ return bodyStream;
}
catch (InterruptedException e)
{
@@ -1374,6 +1379,8 @@ public class ThrottledFetcher
protected boolean streamCreated = false;
protected Throwable streamException = null;
protected boolean abortThread = false;
+ protected boolean gzip = false;
+ protected boolean deflate = false;
protected Throwable shutdownException = null;
@@ -1458,8 +1465,6 @@ public class ThrottledFetcher
{
try
{
- boolean gzip = false;
- boolean deflate = false;
Header ceheader = response.getEntity().getContentEncoding();
if (ceheader != null)
{
@@ -1484,10 +1489,6 @@ public class ThrottledFetcher
if (bodyStream != null)
{
bodyStream = new ThrottledInputstream(fetchThrottler.createFetchStream(),theConnection,bodyStream);
- if (gzip)
- bodyStream = new GZIPInputStream(bodyStream);
- else if (deflate)
- bodyStream = new DeflateInputStream(bodyStream);
threadStream = new XThreadInputStream(bodyStream);
}
streamCreated = true;
@@ -1645,6 +1646,46 @@ public class ThrottledFetcher
}
}
}
+
+ public boolean isGZipStream()
+ throws InterruptedException, IOException, HttpException
+ {
+ // Must wait until stream is created, or until we note an exception was thrown.
+ while (true)
+ {
+ synchronized (this)
+ {
+ if (responseException != null)
+ throw new IllegalStateException("Check for response before getting stream");
+ if (cookieException != null)
+ throw new IllegalStateException("Check for cookies before getting stream");
+ checkException(streamException);
+ if (streamCreated)
+ return gzip;
+ wait();
+ }
+ }
+ }
+
+ public boolean isDeflateStream()
+ throws InterruptedException, IOException, HttpException
+ {
+ // Must wait until stream is created, or until we note an exception was thrown.
+ while (true)
+ {
+ synchronized (this)
+ {
+ if (responseException != null)
+ throw new IllegalStateException("Check for response before getting stream");
+ if (cookieException != null)
+ throw new IllegalStateException("Check for cookies before getting stream");
+ checkException(streamException);
+ if (streamCreated)
+ return deflate;
+ wait();
+ }
+ }
+ }
public InputStream getSafeInputStream()
throws InterruptedException, IOException, HttpException