You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@jena.apache.org by rv...@apache.org on 2015/11/06 22:00:30 UTC

jena git commit: Fix for JENA-1063

Repository: jena
Updated Branches:
  refs/heads/master be12606ca -> e059b78a0


Fix for JENA-1063

This commit changes the behaviour of QueryEngineHTTP to avoid an
undesireable behaviour from Apache HTTP Client which we use for our HTTP
communications.

HTTP Client tries to re-use connections by default
which means that it must finish consuming responses before it can close
the InputStream associated with a specific HTTP response.  However this
can cause a hang and leave both the client and remote server stuck doing
unecessary work since most of the time we are going to clean up the HTTP
Client anyway so leaving the connections available for re-use makes no
sense for us.

The change is essentially to check whether we are going to clean up the
HTTP Client and if so do that first so that when we clean up the
associated InputStream the underlying connection is already closed and
it can't and won't waste time trying to consume the remaining response.


Project: http://git-wip-us.apache.org/repos/asf/jena/repo
Commit: http://git-wip-us.apache.org/repos/asf/jena/commit/e059b78a
Tree: http://git-wip-us.apache.org/repos/asf/jena/tree/e059b78a
Diff: http://git-wip-us.apache.org/repos/asf/jena/diff/e059b78a

Branch: refs/heads/master
Commit: e059b78a0dbc0851ead9411767053f077c0fff23
Parents: be12606
Author: Rob Vesse <rv...@apache.org>
Authored: Fri Nov 6 12:56:37 2015 -0800
Committer: Rob Vesse <rv...@apache.org>
Committed: Fri Nov 6 12:56:37 2015 -0800

----------------------------------------------------------------------
 .../sparql/engine/http/QueryEngineHTTP.java     | 46 ++++++++++++++++----
 1 file changed, 37 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/jena/blob/e059b78a/jena-arq/src/main/java/org/apache/jena/sparql/engine/http/QueryEngineHTTP.java
----------------------------------------------------------------------
diff --git a/jena-arq/src/main/java/org/apache/jena/sparql/engine/http/QueryEngineHTTP.java b/jena-arq/src/main/java/org/apache/jena/sparql/engine/http/QueryEngineHTTP.java
index 46d7f7b..8615844 100644
--- a/jena-arq/src/main/java/org/apache/jena/sparql/engine/http/QueryEngineHTTP.java
+++ b/jena-arq/src/main/java/org/apache/jena/sparql/engine/http/QueryEngineHTTP.java
@@ -27,6 +27,7 @@ import java.util.Map ;
 import java.util.concurrent.TimeUnit ;
 
 import org.apache.http.client.HttpClient ;
+import org.apache.jena.atlas.RuntimeIOException;
 import org.apache.jena.atlas.io.IO ;
 import org.apache.jena.atlas.lib.Pair ;
 import org.apache.jena.atlas.web.auth.HttpAuthenticator ;
@@ -694,15 +695,17 @@ public class QueryEngineHTTP implements QueryExecution {
     @Override
     public void close() {
         closed = true ;
-        if (retainedConnection != null) {
-            try {
-                retainedConnection.close();
-            } catch (java.io.IOException e) {
-                log.warn("Failed to close connection", e);
-            } finally {
-                retainedConnection = null;
-            }
-        }
+
+        // JENA-1063
+        // If we are going to shut down the HTTP client do this first as otherwise
+        // HTTP Client will by default try to re-use the connection and it will
+        // consume any outstanding response data in order to do this which can cause 
+        // the close() on the InputStream to hang for an extremely long time
+        // This also causes resources to continue to be consumed on the server regardless
+        // of the fact that the client has called our close() method and so clearly
+        // does not care about any remaining response
+        // i.e. if we don't do this we are badly behaved towards both the caller and 
+        // the remote server we're interacting with
         if (retainedClient != null) {
             try {
                 retainedClient.getConnectionManager().shutdown();
@@ -712,6 +715,31 @@ public class QueryEngineHTTP implements QueryExecution {
                 retainedClient = null;
             }
         }
+        
+        if (retainedConnection != null) {
+            try {
+                // JENA-1063 - WARNING
+                // This call may take a long time if the response has not been consumed
+                // as HTTP client will consume the remaining response so it can re-use the
+                // connection
+                // If we're closing when we're not at the end of the stream then issue a
+                // warning to the logs
+                if (retainedConnection.read() != -1)
+                    log.warn("HTTP response not fully consumed, if HTTP Client is reusing connections (its default behaviour) then it will consume the remaining response data which may take a long time and cause this application to become unresponsive");
+                
+                retainedConnection.close();
+            } catch (RuntimeIOException e) {
+                // If we are closing early and the underlying stream is chunk encoded
+                // the close() can result in a IOException.  Unfortunately our TypedInputStream
+                // catches and re-wraps that and we want to suppress it when we are cleaning up
+                // and so we catch the wrapped exception and log it instead
+                log.warn("Failed to close connection", e);
+            } catch (java.io.IOException e) {
+                log.warn("Failed to close connection", e);
+            } finally {
+                retainedConnection = null;
+            }
+        }
     }
 
     @Override