You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by ti...@apache.org on 2018/06/13 15:51:43 UTC

[drill] 01/02: DRILL-6477: Drillbit crashes with OOME (Heap) for a large WebUI query

This is an automated email from the ASF dual-hosted git repository.

timothyfarkas pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 7be1e01a71245f7234fe347decf9efa4acc36e52
Author: Kunal Khatua <ku...@apache.org>
AuthorDate: Thu Jun 7 13:32:00 2018 -0700

    DRILL-6477: Drillbit crashes with OOME (Heap) for a large WebUI query
    
    For queries submitted through the WebUI and retrieving a large result-set, the Drillbit often hangs or crashes due to the (foreman) Drillbit running out of Heap memory.
    
    This is because the Web client translates the result set into a massive object in the heap-space and tries to send that back to the browser. This results in the VM thread actively trying to perform GC if the memory is not sufficient.
    
    The workaround is to have the active webConnection of the query periodically timeout to allow for checking the consumed heap-space. A level of 0.85 (i.e. 85%) is set as default threshold, crossing which, a query submitted through the REST API is marked and failed.
    In addition, a user exception is thrown, indicting the cause of the query failing, along with alternative suggestions for re-executing the query.
    
    closes #1309
---
 .../java/org/apache/drill/exec/ExecConstants.java  |  1 -
 .../drill/exec/server/rest/QueryWrapper.java       | 48 +++++++++++++++++++++-
 2 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
index 1070d76..776c469 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java
@@ -204,7 +204,6 @@ public final class ExecConstants {
   public static final String SERVICE_KEYTAB_LOCATION = SERVICE_LOGIN_PREFIX + ".keytab";
   public static final String KERBEROS_NAME_MAPPING = SERVICE_LOGIN_PREFIX + ".auth_to_local";
 
-
   public static final String USER_SSL_ENABLED = "drill.exec.security.user.encryption.ssl.enabled";
   public static final String BIT_ENCRYPTION_SASL_ENABLED = "drill.exec.security.bit.encryption.sasl.enabled";
   public static final String BIT_ENCRYPTION_SASL_MAX_WRAPPED_SIZE = "drill.exec.security.bit.encryption.sasl.max_wrapped_size";
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/QueryWrapper.java b/exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/QueryWrapper.java
index 911ac0f..cf74937 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/QueryWrapper.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/server/rest/QueryWrapper.java
@@ -20,7 +20,10 @@ package org.apache.drill.exec.server.rest;
 import com.fasterxml.jackson.annotation.JsonCreator;
 import com.fasterxml.jackson.annotation.JsonProperty;
 import com.google.common.collect.Maps;
+
+import org.apache.drill.common.exceptions.UserException;
 import org.apache.drill.exec.proto.UserBitShared.QueryId;
+import org.apache.drill.exec.proto.UserBitShared.QueryResult.QueryState;
 import org.apache.drill.exec.proto.UserBitShared.QueryType;
 import org.apache.drill.exec.proto.UserProtos.RunQuery;
 import org.apache.drill.exec.proto.helper.QueryIdHelper;
@@ -28,18 +31,26 @@ import org.apache.drill.exec.proto.UserProtos.QueryResultsMode;
 import org.apache.drill.exec.work.WorkManager;
 
 import javax.xml.bind.annotation.XmlRootElement;
+
+import java.lang.management.ManagementFactory;
+import java.lang.management.MemoryMXBean;
 import java.util.Collection;
 import java.util.List;
 import java.util.Map;
+import java.util.concurrent.TimeUnit;
 
 @XmlRootElement
 public class QueryWrapper {
   private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(QueryWrapper.class);
+  // Heap usage threshold/trigger to provide resiliency on web server for queries submitted via HTTP
+  private static final double HEAP_MEMORY_FAILURE_THRESHOLD = 0.85;
 
   private final String query;
 
   private final String queryType;
 
+  private static MemoryMXBean memMXBean = ManagementFactory.getMemoryMXBean();
+
   @JsonCreator
   public QueryWrapper(@JsonProperty("query") String query, @JsonProperty("queryType") String queryType) {
     this.query = query;
@@ -59,7 +70,6 @@ public class QueryWrapper {
   }
 
   public QueryResult run(final WorkManager workManager, final WebUserConnection webUserConnection) throws Exception {
-
     final RunQuery runQuery = RunQuery.newBuilder().setType(getType())
         .setPlan(getQuery())
         .setResultsMode(QueryResultsMode.STREAM_FULL)
@@ -68,8 +78,37 @@ public class QueryWrapper {
     // Submit user query to Drillbit work queue.
     final QueryId queryId = workManager.getUserWorker().submitWork(webUserConnection, runQuery);
 
+    boolean isComplete = false;
+    boolean nearlyOutOfHeapSpace = false;
+    float usagePercent = getHeapUsage();
+
     // Wait until the query execution is complete or there is error submitting the query
-    webUserConnection.await();
+    logger.debug("Wait until the query execution is complete or there is error submitting the query");
+    do {
+      try {
+        isComplete = webUserConnection.await(TimeUnit.SECONDS.toMillis(1)); /*periodically timeout to check heap*/
+      } catch (Exception e) { }
+
+      usagePercent = getHeapUsage();
+      if (usagePercent >  HEAP_MEMORY_FAILURE_THRESHOLD) {
+        nearlyOutOfHeapSpace = true;
+      }
+    } while (!isComplete && !nearlyOutOfHeapSpace);
+
+    //Fail if nearly out of heap space
+    if (nearlyOutOfHeapSpace) {
+      workManager.getBee().getForemanForQueryId(queryId)
+        .addToEventQueue(QueryState.FAILED,
+            UserException.resourceError(
+                new Throwable(
+                    "There is not enough heap memory to run this query using the web interface. "
+                    + "Please try a query with fewer columns or with a filter or limit condition to limit the data returned. "
+                    + "You can also try an ODBC/JDBC client. "
+                    )
+                )
+              .build(logger)
+            );
+    }
 
     if (logger.isTraceEnabled()) {
       logger.trace("Query {} is completed ", queryId);
@@ -83,6 +122,11 @@ public class QueryWrapper {
     return new QueryResult(queryId, webUserConnection.columns, webUserConnection.results);
   }
 
+  //Detect possible excess heap
+  private float getHeapUsage() {
+    return (float) memMXBean.getHeapMemoryUsage().getUsed() / memMXBean.getHeapMemoryUsage().getMax();
+  }
+
   public static class QueryResult {
     private final String queryId;
     public final Collection<String> columns;

-- 
To stop receiving notification emails like this one, please contact
timothyfarkas@apache.org.