You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sentry.apache.org by ha...@apache.org on 2017/02/07 22:51:35 UTC

sentry git commit: SENTRY-1605: SENTRY-1508 need to be fixed because of Kerberos initialization issue (Vadim Spector, Reviewed by: Vamsee Yarlagadda and Hao Hao)

Repository: sentry
Updated Branches:
  refs/heads/master 17bf43f38 -> c4afd37b4


SENTRY-1605: SENTRY-1508 need to be fixed because of Kerberos initialization issue (Vadim Spector, Reviewed by: Vamsee Yarlagadda and Hao Hao)

Change-Id: Ib0ae80e75256f0d3156ef6c60384bf129b553206


Project: http://git-wip-us.apache.org/repos/asf/sentry/repo
Commit: http://git-wip-us.apache.org/repos/asf/sentry/commit/c4afd37b
Tree: http://git-wip-us.apache.org/repos/asf/sentry/tree/c4afd37b
Diff: http://git-wip-us.apache.org/repos/asf/sentry/diff/c4afd37b

Branch: refs/heads/master
Commit: c4afd37b4a31e04c815896f8e3dc7c0c881b2808
Parents: 17bf43f
Author: hahao <ha...@cloudera.com>
Authored: Tue Feb 7 14:50:49 2017 -0800
Committer: hahao <ha...@cloudera.com>
Committed: Tue Feb 7 14:50:49 2017 -0800

----------------------------------------------------------------------
 .../org/apache/sentry/hdfs/MetastorePlugin.java | 122 ++++++++++++++-----
 1 file changed, 90 insertions(+), 32 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/sentry/blob/c4afd37b/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/MetastorePlugin.java
----------------------------------------------------------------------
diff --git a/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/MetastorePlugin.java b/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/MetastorePlugin.java
index f6661fd..7a21871 100644
--- a/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/MetastorePlugin.java
+++ b/sentry-hdfs/sentry-hdfs-service/src/main/java/org/apache/sentry/hdfs/MetastorePlugin.java
@@ -57,7 +57,7 @@ import com.google.common.collect.Lists;
  *  <li> At the construction time:
  *   <ul>
  *    <li> Initializes local HMS cache with HMS paths information.
- *    <li> Sends initial HMS paths information to the Sentry daemon.
+ *    <li> Starts housekeeping thread to run periodically SyncTask - see below what it does.
  *   </ul>
  *  </li>
  *  <li> Upon receiving path update notification from the hosting client code, via addPath(),
@@ -65,6 +65,8 @@ import com.google.common.collect.Lists;
  *   <ul>
  *    <li> Updates local HMS cache accordingly.
  *    <li> Sends partial update with the assigned sequence number to the Sentry daemon.
+ *    <li> Special case - skip sending partial update to Sentry if the initial full update from
+ *         housekeeping thread is still in progress (firstSync == false). Still update the local cache.
  *    <li> Maintains the latest Sentry partial update sequence number, incrementing it by 1 on each update.
  *   </ul>
  *  </li>
@@ -73,6 +75,9 @@ import com.google.common.collect.Lists;
  *    <li> Contacts the Sentry daemon to ask for the sequence number of the latest received update.
  *    <li> If the sequence number returned by the Sentry daemon does not match the sequence number of the
  *         latest update sent from MetastorePlugin, send the full HMS paths image to the Sentry daemon.
+ *    <li> After the very first successful full HMS paths update to Sentry, set firstSync to true.
+ *         to signal the rest of the code that from this moment all partial updates should be sent to Sentry,
+ *         in addition to updating the local cache.
  *   </ul>
  *  </li>
  * </ul>
@@ -87,16 +92,15 @@ import com.google.common.collect.Lists;
  *
  * <p>
  * <li>MetastorePlugin is always created, even though ininitialization may fail.<br>
- * MetastorePlugin initialization (object construction) may fail for two reasons:
+ * MetastorePlugin initialization (object construction) failure reasons:
  * <ul>
  *  <li> HMS cache cannot be initialized, usually due to some invalid HMS path entries.
- *  <li> Initial cache cannot be sent to Sentry, e.g. due to the communication problems.
  * </ul>
  *
  * <p>
- * In either case, MetastorePlugin is still constructed, in consideration with the design of
- * the existing client code. However, such an instance is marked as invalid; all update APIs
- * throw IllegalStateException with the appropriate error message and root cause exception.
+ * MetastorePlugin is still constructed, in consideration with the design of the existing client code.
+ * However, such an instance is marked as invalid; all update APIs throw IllegalStateException with
+ * the appropriate error message and root cause exception.
  * <br>TODO: failing to construct MetastorePlugin on initialization failure would be much cleaner,
  *       but it has to be done in coordination with the HMS client code.
  *
@@ -113,7 +117,8 @@ import com.google.common.collect.Lists;
  * Update sequence number is created at first step, and then it travels as part of the update information,
  * to the Sentry daemon on the second step. Therefore, the sequence of both steps must be
  * atomic, to guarantee that updates arrive to the Sentry daemon in the right order,
- * with sequential update number. This is achieved by using notificationLock. The same lock is used
+ * with sequential update number. This is achieved by using notificationLock over two operations:
+ * assigning sequence number to the update, and sending this update to Sentry. The same lock is used
  * inside the SyncTask during full Sentry update, when the local and Sentry-side update sequence
  * numbers are out of sync.
  *
@@ -155,6 +160,13 @@ public class MetastorePlugin extends SentryMetastoreListenerPlugin {
   // Has to match the value of seqNum
   // This code ensures that access to lastSentSeqNum is protected by notificationLock
   protected long lastSentSeqNum;
+  /*
+   * The firstSync value, set initially to false, is set to true by SyncTask from
+   * housekeeping thread, immediately after pushing the first successsful full update to Sentry.
+   * Prior to firstSync == true, all partial updates happen only in the local cache.
+   * This code ensures that access to firstSync is protected by notificationLock
+   */
+  protected boolean firstSync;
 
   // pathUpdateLock guards access to UpdateableAuthzPaths which is not thread-safe
   private final ReentrantReadWriteLock pathUpdateLock = new ReentrantReadWriteLock();
@@ -170,9 +182,25 @@ public class MetastorePlugin extends SentryMetastoreListenerPlugin {
   private static volatile ScheduledExecutorService lastThreadPool = null;
 
   /*
-   * This task is scheduled to run periodically, to make sure Sentry has all updates
-   * -- only if MetastorePlugin has been successfully initialized.
+   * This task is scheduled to run periodically, to make sure Sentry (SentryPlugin
+   * class to be exact) has all the latest HMS path updates.
+   *
+   * It compares the last sequence number of a partial update sent to Sentry
+   * against the sequence number of last update as reported by Sentry.
+   * If the two are different, full update is triggered.
+   *
+   * The initial full update must reach Sentry before any partial updates.
+   * Since any full update would also include the all the subsequent partial
+   * updates as well anyway, sending partial updates before the first full
+   * update would be redundant. It would also require more care on SentryPlugin
+   * side to handle updates with potentially duplicate sequence numbers.
+   *
+   * The firstSync variable initialized to false in the constructor,
+   * will be set to true in the run() method on the first successfull full
+   * update. Prior to that event, all partial updates will only be commited
+   * to the local memory cache.
    */
+
   class SyncTask implements Runnable {
     @Override
     public void run() {
@@ -183,10 +211,44 @@ public class MetastorePlugin extends SentryMetastoreListenerPlugin {
       try {
         long lastSeenBySentry = getLastSeenHMSPathSeqNum();
         long lastSent = lastSentSeqNum;
-        if (lastSeenBySentry != lastSent) {
-          LOGGER.warn("#### Sentry not in sync with HMS [" + lastSeenBySentry + ", "
+        /*
+         * Most common branch, after the first full update (firstSync == true).
+         * Do full update only if out-of-sync is detected.
+         */ 
+        if (firstSync) {
+          // Out-of-sync! Normally should not happen, so worth logging as a warning.
+          if (lastSeenBySentry != lastSent) {
+            LOGGER.warn("#### Sentry not in sync with HMS [" + lastSeenBySentry + ", "
               + lastSent + "]");
+            notifySentryFullUpdate(lastSent);
+          } // else we are ok, which is common, so we don't want to log anything
+        /*
+         * Less common branch - only before the first successful full update.
+         * Do full update regardless of the sequence numbers on both sides.
+         * Don't want to depend on how specifically SentryPlugin and MetastorePlugin
+         * logic initialize their sequence numbers to guarantee that they are
+         * initially different. First full update is always mandatory.
+         *
+         * TODO: if at least MetastorePlugin implements persistent incrementing
+         * sequence numbers, this logic can be optimized to avoid the initial fill
+         * update. It can optimize the situation when MetastorePlugin is restarted
+         * while Sentry keeps running and allready has the latest updates.
+         */
+        } else { // firstSync == false
+          // still print both sequence numbers out of curiosity, to see how they get initialized
+          LOGGER.info("#### Trying to send first full update to Sentry [" + lastSeenBySentry + ", "
+            + lastSent + "]");
           notifySentryFullUpdate(lastSent);
+          LOGGER.info("#### First successful full update with Sentry");
+          /*
+           * If initial full update succeeded, set firstSync to true to never do unconditional
+           * full update ever again. Also, firstSync == true signals to the rest of the code
+           * that from this point on partial updates will be sent to Sentry, not only update
+           * the local cache.
+           * If notifySentryFullUpdate() fails, firstSync remains false, so another full
+           * update will be attempted.
+           */
+          firstSync = true;
         }
       } catch (Exception ignore) {
         // all methods inside try {} log errors anyway
@@ -287,33 +349,21 @@ public class MetastorePlugin extends SentryMetastoreListenerPlugin {
       return;
     }
 
-    /* Initialization Step #2: push initial HMS state to Sentry.
-     * Synchronization by notificationLock is for visibility of changes to sentryClient.
+    /* Initialization Step #2: set the state to perform sync with Sentry.
+     * Kerberos may be initialized only AFTER constructing MetastorePlugin,
+     * so the initial full update would be impossible at this point.
+     * We rely on SyncTask to eventually establish connection and
+     * push the first full update to Sentry, signaled by setting firstSync to true.
      */
     notificationLock.lock();
     try {
       this.lastSentSeqNum = seqNum.get();
-      notifySentryFullUpdate(lastSentSeqNum);
-      LOGGER.info("#### Metastore Plugin Sentry full initial update complete");
-    } catch (Throwable e) {
-      tmpInitError = e;
-      tmpInitErrorMsg = SENTRY_INIT_UPDATE_FAILURE_MSG;
-      LOGGER.error("#### " + tmpInitErrorMsg, e);
+      this.firstSync = false;
     } finally {
       notificationLock.unlock();
     }
-
-    this.initError = tmpInitError;
-    this.initErrorMsg = tmpInitErrorMsg;
-
-    /* If sending HMS state to Sentry failed, further initialization shall be skipped.
-     * MetastorePlugin is considered non-operational, and all of its public APIs
-     * shall be throwing an exception.
-     */
-    if (this.initError != null) {
-      this.threadPool = null;
-      return;
-    }
+    this.initError = null;
+    this.initErrorMsg = null;
 
     /* Initialization Step #3: schedulle SyncTask to run periodically, to make
      * sure Sentry has the current HMS state.
@@ -556,7 +606,15 @@ public class MetastorePlugin extends SentryMetastoreListenerPlugin {
    */
   protected void notifySentry(PathsUpdate update) {
     try {
-      notifySentry_NoSeqNumIncr(update);
+      /* Send updates to Sentry only after the first full update from SyncTask
+       * Note, that full updates from SyncTask happen by calling notifySentry_NoSeqNumIncr()
+       * directly, via notifySentryFullUpdate(), so firstSync value is not consulted.
+       */
+      if (firstSync) {
+        notifySentry_NoSeqNumIncr(update);
+      } else {
+        LOGGER.warn("#### Caching partial Sentry update " + update.getSeqNum() + "; initial full update still in progress");
+      }
     } finally {
       lastSentSeqNum = update.getSeqNum();
       LOGGER.debug("#### HMS Path Last update sent : ["+ lastSentSeqNum + "]");