You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by mb...@apache.org on 2018/08/14 20:29:08 UTC

asterixdb git commit: [NO ISSUE][ING] Halt on active suspend or resume failures

Repository: asterixdb
Updated Branches:
  refs/heads/master 1619b8881 -> 1e6489077


[NO ISSUE][ING] Halt on active suspend or resume failures

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
- Failures during active handler suspend or resume leaves the
  system in an inconsistent state.
- When that happens, we halt and rely on the recovery to go
  back to a consistent state.

Change-Id: I00d31f704f2fa22a5e14c711b6771345ca7d000a
Reviewed-on: https://asterix-gerrit.ics.uci.edu/2889
Reviewed-by: Michael Blow <mb...@apache.org>
Tested-by: Michael Blow <mb...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/asterixdb/repo
Commit: http://git-wip-us.apache.org/repos/asf/asterixdb/commit/1e648907
Tree: http://git-wip-us.apache.org/repos/asf/asterixdb/tree/1e648907
Diff: http://git-wip-us.apache.org/repos/asf/asterixdb/diff/1e648907

Branch: refs/heads/master
Commit: 1e648907751c8bd722b6e9eccc1cd21e11c1d1b2
Parents: 1619b88
Author: Abdullah Alamoudi <ba...@gmail.com>
Authored: Mon Aug 13 11:14:32 2018 -0700
Committer: Michael Blow <mb...@apache.org>
Committed: Tue Aug 14 13:28:50 2018 -0700

----------------------------------------------------------------------
 .../app/active/ActiveNotificationHandler.java   | 67 ++++++++++++--------
 .../java/org/apache/hyracks/util/ExitUtil.java  |  2 +
 2 files changed, 41 insertions(+), 28 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/asterixdb/blob/1e648907/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveNotificationHandler.java
----------------------------------------------------------------------
diff --git a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveNotificationHandler.java b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveNotificationHandler.java
index 5faa980..6eba4ea 100644
--- a/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveNotificationHandler.java
+++ b/asterixdb/asterix-app/src/main/java/org/apache/asterix/app/active/ActiveNotificationHandler.java
@@ -44,6 +44,7 @@ import org.apache.hyracks.api.job.JobId;
 import org.apache.hyracks.api.job.JobSpecification;
 import org.apache.hyracks.api.job.JobStatus;
 import org.apache.hyracks.api.util.SingleThreadEventProcessor;
+import org.apache.hyracks.util.ExitUtil;
 import org.apache.logging.log4j.Level;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
@@ -259,41 +260,51 @@ public class ActiveNotificationHandler extends SingleThreadEventProcessor<Active
             LOGGER.log(level, "Suspending active events handler");
             suspended = true;
         }
-        IMetadataLockManager lockManager = mdProvider.getApplicationContext().getMetadataLockManager();
-        Collection<IActiveEntityEventsListener> registeredListeners = entityEventListeners.values();
-        for (IActiveEntityEventsListener listener : registeredListeners) {
-            // write lock the listener
-            // exclusive lock all the datasets
-            String dataverseName = listener.getEntityId().getDataverse();
-            String entityName = listener.getEntityId().getEntityName();
-            if (LOGGER.isEnabled(level)) {
-                LOGGER.log(level, "Suspending " + listener.getEntityId());
-            }
-            LOGGER.log(level, "Acquiring locks");
-            lockManager.acquireActiveEntityWriteLock(mdProvider.getLocks(), dataverseName + '.' + entityName);
-            List<Dataset> datasets = ((ActiveEntityEventsListener) listener).getDatasets();
-            for (Dataset dataset : datasets) {
-                lockManager.acquireDatasetExclusiveModificationLock(mdProvider.getLocks(),
-                        DatasetUtil.getFullyQualifiedName(dataset));
-            }
-            LOGGER.log(level, "locks acquired");
-            ((ActiveEntityEventsListener) listener).suspend(mdProvider);
-            if (LOGGER.isEnabled(level)) {
-                LOGGER.log(level, listener.getEntityId() + " suspended");
+        try {
+            IMetadataLockManager lockManager = mdProvider.getApplicationContext().getMetadataLockManager();
+            Collection<IActiveEntityEventsListener> registeredListeners = entityEventListeners.values();
+            for (IActiveEntityEventsListener listener : registeredListeners) {
+                // write lock the listener
+                // exclusive lock all the datasets
+                String dataverseName = listener.getEntityId().getDataverse();
+                String entityName = listener.getEntityId().getEntityName();
+                if (LOGGER.isEnabled(level)) {
+                    LOGGER.log(level, "Suspending " + listener.getEntityId());
+                }
+                LOGGER.log(level, "Acquiring locks");
+                lockManager.acquireActiveEntityWriteLock(mdProvider.getLocks(), dataverseName + '.' + entityName);
+                List<Dataset> datasets = ((ActiveEntityEventsListener) listener).getDatasets();
+                for (Dataset dataset : datasets) {
+                    lockManager.acquireDatasetExclusiveModificationLock(mdProvider.getLocks(),
+                            DatasetUtil.getFullyQualifiedName(dataset));
+                }
+                LOGGER.log(level, "locks acquired");
+                ((ActiveEntityEventsListener) listener).suspend(mdProvider);
+                if (LOGGER.isEnabled(level)) {
+                    LOGGER.log(level, listener.getEntityId() + " suspended");
+                }
             }
+        } catch (Throwable th) {
+            LOGGER.error("Suspend active failed", th);
+            ExitUtil.halt(ExitUtil.EC_ACTIVE_SUSPEND_FAILURE);
         }
     }
 
     public void resume(MetadataProvider mdProvider) throws HyracksDataException {
         LOGGER.log(level, "Resuming active events handler");
-        for (IActiveEntityEventsListener listener : entityEventListeners.values()) {
-            if (LOGGER.isEnabled(level)) {
-                LOGGER.log(level, "Resuming " + listener.getEntityId());
-            }
-            ((ActiveEntityEventsListener) listener).resume(mdProvider);
-            if (LOGGER.isEnabled(level)) {
-                LOGGER.log(level, listener.getEntityId() + " resumed");
+        try {
+            for (IActiveEntityEventsListener listener : entityEventListeners.values()) {
+                if (LOGGER.isEnabled(level)) {
+                    LOGGER.log(level, "Resuming " + listener.getEntityId());
+                }
+                ((ActiveEntityEventsListener) listener).resume(mdProvider);
+                if (LOGGER.isEnabled(level)) {
+                    LOGGER.log(level, listener.getEntityId() + " resumed");
+                }
             }
+        } catch (Throwable th) {
+            LOGGER.error("Resume active failed", th);
+            ExitUtil.halt(ExitUtil.EC_ACTIVE_RESUME_FAILURE);
         }
         synchronized (this) {
             suspended = false;

http://git-wip-us.apache.org/repos/asf/asterixdb/blob/1e648907/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
----------------------------------------------------------------------
diff --git a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
index 9604c30..f9d9b1b 100644
--- a/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
+++ b/hyracks-fullstack/hyracks/hyracks-util/src/main/java/org/apache/hyracks/util/ExitUtil.java
@@ -48,6 +48,8 @@ public class ExitUtil {
     public static final int EC_TXN_LOG_FLUSHER_FAILURE = 14;
     public static final int EC_NODE_REGISTRATION_FAILURE = 15;
     public static final int EC_NETWORK_FAILURE = 16;
+    public static final int EC_ACTIVE_SUSPEND_FAILURE = 17;
+    public static final int EC_ACTIVE_RESUME_FAILURE = 18;
     public static final int EC_FAILED_TO_CANCEL_ACTIVE_START_STOP = 22;
     public static final int EC_IMMEDIATE_HALT = 33;
     public static final int EC_HALT_ABNORMAL_RESERVED_44 = 44;