You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@asterixdb.apache.org by im...@apache.org on 2019/04/10 15:58:29 UTC

[asterixdb] branch master updated: [ASTERIXDB-2388] Add nodeIDs to cluster state query failure exception

This is an automated email from the ASF dual-hosted git repository.

imaxon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/asterixdb.git


The following commit(s) were added to refs/heads/master by this push:
     new a0e77ab  [ASTERIXDB-2388] Add nodeIDs to cluster state query failure exception
a0e77ab is described below

commit a0e77abc4965d087068d099e0d928dcd05e6c67f
Author: Ian Maxon <im...@apache.org>
AuthorDate: Tue Apr 9 13:19:13 2019 -0700

    [ASTERIXDB-2388] Add nodeIDs to cluster state query failure exception
    
    Change-Id: I896fd0482e1db8a04dece058aa0975d3d961e731
    Reviewed-on: https://asterix-gerrit.ics.uci.edu/3317
    Tested-by: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Integration-Tests: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Contrib: Jenkins <je...@fulliautomatix.ics.uci.edu>
    Reviewed-by: Murtadha Hubail <mh...@apache.org>
---
 .../asterix/translator/AbstractLangTranslator.java | 31 ++++++++++++++--------
 .../asterix/common/exceptions/ErrorCode.java       | 17 +++++++-----
 .../src/main/resources/asx_errormsg/en.properties  |  1 +
 3 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/AbstractLangTranslator.java b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/AbstractLangTranslator.java
index 0eb8e0a..967b3ad 100644
--- a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/AbstractLangTranslator.java
+++ b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/translator/AbstractLangTranslator.java
@@ -18,15 +18,20 @@
  */
 package org.apache.asterix.translator;
 
+import java.util.Arrays;
+import java.util.HashSet;
 import java.util.Map;
 import java.util.Map.Entry;
+import java.util.Set;
 import java.util.concurrent.TimeUnit;
 
 import org.apache.asterix.common.api.IClusterManagementWork.ClusterState;
+import org.apache.asterix.common.cluster.ClusterPartition;
 import org.apache.asterix.common.cluster.IClusterStateManager;
 import org.apache.asterix.common.cluster.IGlobalRecoveryManager;
 import org.apache.asterix.common.dataflow.ICcApplicationContext;
 import org.apache.asterix.common.exceptions.AsterixException;
+import org.apache.asterix.common.exceptions.ErrorCode;
 import org.apache.asterix.lang.common.base.Statement;
 import org.apache.asterix.lang.common.statement.DatasetDecl;
 import org.apache.asterix.lang.common.statement.DataverseDropStatement;
@@ -68,21 +73,25 @@ public abstract class AbstractLangTranslator {
                 }
                 Thread.currentThread().interrupt();
             }
-            if (!clusterStateManager.getState().equals(ClusterState.ACTIVE)) {
-                throw new AsterixException("Cluster is in " + ClusterState.UNUSABLE + " state."
-                        + "\n One or more Node Controllers have left or haven't joined yet.\n");
-            } else {
-                if (LOGGER.isInfoEnabled()) {
-                    LOGGER.info("Cluster is now " + ClusterState.ACTIVE);
+            synchronized (clusterStateManager) {
+                if (!clusterStateManager.getState().equals(ClusterState.ACTIVE)) {
+                    ClusterPartition[] configuredPartitions = clusterStateManager.getClusterPartitons();
+                    Set<String> inactiveNodes = new HashSet<>();
+                    for (ClusterPartition cp : configuredPartitions) {
+                        if (!cp.isActive()) {
+                            inactiveNodes.add(cp.getNodeId());
+                        }
+                    }
+                    throw AsterixException.create(ErrorCode.CLUSTER_STATE_UNUSABLE,
+                            Arrays.toString(inactiveNodes.toArray()));
+                } else {
+                    if (LOGGER.isInfoEnabled()) {
+                        LOGGER.info("Cluster is now " + ClusterState.ACTIVE);
+                    }
                 }
             }
         }
 
-        if (clusterStateManager.getState().equals(ClusterState.UNUSABLE)) {
-            throw new AsterixException("Cluster is in " + ClusterState.UNUSABLE + " state."
-                    + "\n One or more Node Controllers have left.\n");
-        }
-
         if (!globalRecoveryManager.isRecoveryCompleted()) {
             int maxWaitCycles = appCtx.getExternalProperties().getMaxWaitClusterActive();
             int waitCycleCount = 0;
diff --git a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
index 459773b..c9f1c48 100644
--- a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
+++ b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/exceptions/ErrorCode.java
@@ -33,13 +33,6 @@ public class ErrorCode {
     private static final String RESOURCE_PATH = "asx_errormsg/en.properties";
     public static final String ASTERIX = "ASX";
 
-    // Extension errors
-    public static final int EXTENSION_ID_CONFLICT = 4001;
-    public static final int EXTENSION_COMPONENT_CONFLICT = 4002;
-    public static final int UNSUPPORTED_MESSAGE_TYPE = 4003;
-    public static final int INVALID_CONFIGURATION = 4004;
-    public static final int UNSUPPORTED_REPLICATION_STRATEGY = 4005;
-
     // Runtime errors
     public static final int CASTING_FIELD = 1;
     public static final int TYPE_MISMATCH_FUNCTION = 2;
@@ -302,6 +295,16 @@ public class ErrorCode {
     // Lifecycle management errors
     public static final int DUPLICATE_PARTITION_ID = 4000;
 
+    // Extension errors
+    public static final int EXTENSION_ID_CONFLICT = 4001;
+    public static final int EXTENSION_COMPONENT_CONFLICT = 4002;
+    public static final int UNSUPPORTED_MESSAGE_TYPE = 4003;
+    public static final int INVALID_CONFIGURATION = 4004;
+    public static final int UNSUPPORTED_REPLICATION_STRATEGY = 4005;
+
+    // Lifecycle management errors pt.2
+    public static final int CLUSTER_STATE_UNUSABLE = 4006;
+
     private ErrorCode() {
     }
 
diff --git a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
index 94d6942..ddaf271 100644
--- a/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
+++ b/asterixdb/asterix-common/src/main/resources/asx_errormsg/en.properties
@@ -291,3 +291,4 @@
 
 # Lifecycle management errors
 4000 = Partition id %1$s for node %2$s already in use by node %3$s
+4006 = Not all node controllers required for request execution have joined the cluster. Nodes %1$s appear missing, double check the logs on these machines and the cluster configuration