You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by am...@apache.org on 2017/05/20 14:26:02 UTC

[3/6] drill git commit: DRILL-5496: Fix for failed Hive connection

DRILL-5496: Fix for failed Hive connection

If the Hive server restarts, Drill either hangs or continually reports
errors when retrieving schemas. The problem is that the Hive plugin
tries to handle connection failures, but does not do so correctly for
the secure connection case. The problem is complex, see DRILL-5496 for
details.

This is a workaround: we discard the entire Hive schema cache when we
encounter an unhandled connection exception, then we rebuild a new one.

This is not a proper fix; for that we'd have to restructure the code.

This will, however, solve the immediate problem until we do the needed
restructuring.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/aa39c66d
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/aa39c66d
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/aa39c66d

Branch: refs/heads/master
Commit: aa39c66d92ec03c90f3715df7cc4fde49d76a334
Parents: 9972669
Author: Paul Rogers <pr...@maprtech.com>
Authored: Fri May 12 11:01:54 2017 -0700
Committer: Aman Sinha <as...@maprtech.com>
Committed: Fri May 19 09:53:18 2017 -0700

----------------------------------------------------------------------
 contrib/storage-hive/core/pom.xml               |  6 ++
 .../store/hive/DrillHiveMetaStoreClient.java    | 16 ++++-
 .../exec/store/hive/HiveStoragePlugin.java      | 68 +++++++++++++++++++-
 .../store/hive/schema/HiveSchemaFactory.java    | 23 +++++--
 contrib/storage-hive/hive-exec-shade/pom.xml    |  1 -
 .../apache/drill/exec/store/SchemaFactory.java  |  4 +-
 6 files changed, 104 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/drill/blob/aa39c66d/contrib/storage-hive/core/pom.xml
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/core/pom.xml b/contrib/storage-hive/core/pom.xml
index adae9a1..2b41fad 100644
--- a/contrib/storage-hive/core/pom.xml
+++ b/contrib/storage-hive/core/pom.xml
@@ -57,6 +57,12 @@
     <dependency>
       <groupId>org.apache.hive</groupId>
       <artifactId>hive-hbase-handler</artifactId>
+      <exclusions>
+        <exclusion>
+          <groupId>org.apache.hive</groupId>
+          <artifactId>hive-exec</artifactId>
+        </exclusion>
+      </exclusions>
     </dependency>
     <dependency>
       <groupId>org.apache.hbase</groupId>

http://git-wip-us.apache.org/repos/asf/drill/blob/aa39c66d/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/DrillHiveMetaStoreClient.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/DrillHiveMetaStoreClient.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/DrillHiveMetaStoreClient.java
index 92efdc7..53b966b 100644
--- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/DrillHiveMetaStoreClient.java
+++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/DrillHiveMetaStoreClient.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -214,7 +214,19 @@ public abstract class DrillHiveMetaStoreClient extends HiveMetaStoreClient {
       } catch (Exception ex) {
         logger.warn("Failure while attempting to close existing hive metastore connection. May leak connection.", ex);
       }
-      mClient.reconnect();
+
+      // Attempt to reconnect. If this is a secure connection, this will fail due
+      // to the invalidation of the security token. In that case, throw the original
+      // exception and let a higher level clean up. Ideally we'd get a new token
+      // here, but doing so requires the use of a different connection, and that
+      // one has also become invalid. This code needs a rework; this is just a
+      // work-around.
+
+      try {
+        mClient.reconnect();
+      } catch (Exception e1) {
+        throw e;
+      }
       return mClient.getAllDatabases();
     }
   }

http://git-wip-us.apache.org/repos/asf/drill/blob/aa39c66d/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveStoragePlugin.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveStoragePlugin.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveStoragePlugin.java
index f99a934..8a842b2 100644
--- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveStoragePlugin.java
+++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/HiveStoragePlugin.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -29,6 +29,7 @@ import org.apache.calcite.schema.Schema.TableType;
 import org.apache.calcite.schema.SchemaPlus;
 
 import org.apache.drill.common.JSONOptions;
+import org.apache.drill.common.exceptions.DrillRuntimeException;
 import org.apache.drill.common.exceptions.ExecutionSetupException;
 import org.apache.drill.common.expression.SchemaPath;
 import org.apache.drill.exec.ExecConstants;
@@ -45,13 +46,16 @@ import com.fasterxml.jackson.core.type.TypeReference;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
+import org.apache.hadoop.hive.metastore.api.MetaException;
+import org.apache.thrift.TException;
+import org.apache.thrift.transport.TTransportException;
 
 public class HiveStoragePlugin extends AbstractStoragePlugin {
 
   static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(HiveStoragePlugin.class);
 
   private final HiveStoragePluginConfig config;
-  private final HiveSchemaFactory schemaFactory;
+  private HiveSchemaFactory schemaFactory;
   private final DrillbitContext context;
   private final String name;
   private final HiveConf hiveConf;
@@ -95,9 +99,67 @@ public class HiveStoragePlugin extends AbstractStoragePlugin {
     }
   }
 
+  // Forced to synchronize this method to allow error recovery
+  // in the multi-threaded case. Can remove synchronized only
+  // by restructuring connections and cache to allow better
+  // recovery from failed secure connections.
+
   @Override
-  public void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent) throws IOException {
+  public synchronized void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent) throws IOException {
+    try {
+      schemaFactory.registerSchemas(schemaConfig, parent);
+      return;
+
+    // Hack. We may need to retry the connection. But, we can't because
+    // the retry logic is implemented in the very connection we need to
+    // discard and rebuild. To work around, we discard the entire schema
+    // factory, and all its invalid connections. Very crude, but the
+    // easiest short-term solution until we refactor the code to do the
+    // job properly. See DRILL-5510.
+
+    } catch (Throwable e) {
+      // Unwrap exception
+      Throwable ex = e;
+      for (;;) {
+        // Case for failing on an invalid cached connection
+        if (ex instanceof MetaException ||
+            // Case for a timed-out impersonated connection, and
+            // an invalid non-secure connection used to get security
+            // tokens.
+            ex instanceof TTransportException) {
+          break;
+        }
+
+        // All other exceptions are not handled, just pass along up
+        // the stack.
+
+        if (ex.getCause() == null  ||  ex.getCause() == ex) {
+          logger.error("Hive metastore register schemas failed", e);
+          throw new DrillRuntimeException("Unknown Hive error", e);
+        }
+        ex = ex.getCause();
+      }
+    }
+
+    // Build a new factory which will cause an all new set of
+    // Hive metastore connections to be created.
+
+    try {
+      schemaFactory.close();
+    } catch (Throwable t) {
+      // Ignore, we're in a bad state.
+      logger.warn("Schema factory forced close failed, error ignored", t);
+    }
+    try {
+      schemaFactory = new HiveSchemaFactory(this, name, hiveConf);
+    } catch (ExecutionSetupException e) {
+      throw new DrillRuntimeException(e);
+    }
+
+    // Try the schemas again. If this fails, just give up.
+
     schemaFactory.registerSchemas(schemaConfig, parent);
+    logger.debug("Successfully recovered from a Hive metastore connection failure.");
   }
 
   @Override

http://git-wip-us.apache.org/repos/asf/drill/blob/aa39c66d/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/schema/HiveSchemaFactory.java
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/schema/HiveSchemaFactory.java b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/schema/HiveSchemaFactory.java
index 9cfaeae..d3115b8 100644
--- a/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/schema/HiveSchemaFactory.java
+++ b/contrib/storage-hive/core/src/main/java/org/apache/drill/exec/store/hive/schema/HiveSchemaFactory.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -29,7 +29,7 @@ import com.google.common.cache.LoadingCache;
 import com.google.common.cache.RemovalListener;
 import com.google.common.cache.RemovalNotification;
 import org.apache.calcite.schema.SchemaPlus;
-
+import org.apache.drill.common.exceptions.DrillRuntimeException;
 import org.apache.drill.common.exceptions.ExecutionSetupException;
 import org.apache.drill.exec.ExecConstants;
 import org.apache.drill.exec.planner.logical.DrillTable;
@@ -105,6 +105,20 @@ public class HiveSchemaFactory implements SchemaFactory {
     return isDrillImpersonationEnabled && isHS2DoAsSet;
   }
 
+  /**
+   * Close this schema factory in preparation for retrying. Attempt to close
+   * connections, but just ignore any errors.
+   */
+
+  public void close() {
+    try {
+      processUserMetastoreClient.close();
+    } catch (Exception e) { }
+    try {
+      metaStoreClientLoadingCache.invalidateAll();
+    } catch (Exception e) { }
+  }
+
   @Override
   public void registerSchemas(SchemaConfig schemaConfig, SchemaPlus parent) throws IOException {
     DrillHiveMetaStoreClient mClientForSchemaTree = processUserMetastoreClient;
@@ -146,9 +160,8 @@ public class HiveSchemaFactory implements SchemaFactory {
           this.defaultSchema = schema;
         }
         return schema;
-      } catch (final TException e) {
-        logger.warn("Failure while attempting to access HiveDatabase '{}'.", name, e.getCause());
-        return null;
+      } catch (TException e) {
+        throw new DrillRuntimeException(e);
       }
     }
 

http://git-wip-us.apache.org/repos/asf/drill/blob/aa39c66d/contrib/storage-hive/hive-exec-shade/pom.xml
----------------------------------------------------------------------
diff --git a/contrib/storage-hive/hive-exec-shade/pom.xml b/contrib/storage-hive/hive-exec-shade/pom.xml
index 6495739..bec13fd 100644
--- a/contrib/storage-hive/hive-exec-shade/pom.xml
+++ b/contrib/storage-hive/hive-exec-shade/pom.xml
@@ -105,7 +105,6 @@
       <plugin>
         <groupId>org.apache.maven.plugins</groupId>
         <artifactId>maven-dependency-plugin</artifactId>
-        <version>2.8</version>
         <executions>
           <execution>
             <id>unpack</id>

http://git-wip-us.apache.org/repos/asf/drill/blob/aa39c66d/exec/java-exec/src/main/java/org/apache/drill/exec/store/SchemaFactory.java
----------------------------------------------------------------------
diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/store/SchemaFactory.java b/exec/java-exec/src/main/java/org/apache/drill/exec/store/SchemaFactory.java
index b28c3c2..2027527 100644
--- a/exec/java-exec/src/main/java/org/apache/drill/exec/store/SchemaFactory.java
+++ b/exec/java-exec/src/main/java/org/apache/drill/exec/store/SchemaFactory.java
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
@@ -19,8 +19,6 @@ package org.apache.drill.exec.store;
 
 import org.apache.calcite.schema.SchemaPlus;
 
-import org.apache.drill.exec.ops.QueryContext;
-
 import java.io.IOException;
 
 /**