You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@ignite.apache.org by GitBox <gi...@apache.org> on 2022/05/19 14:57:15 UTC

[GitHub] [ignite-3] rpuch commented on a diff in pull request #795: IGNITE-16717 Implement node validation

rpuch commented on code in PR #795:
URL: https://github.com/apache/ignite-3/pull/795#discussion_r876906720


##########
modules/cluster-management/src/integrationTest/java/org/apache/ignite/internal/cluster/management/raft/ItCmgRaftServiceTest.java:
##########
@@ -240,18 +270,128 @@ void testClusterState() {
         assertThat(node1.raftService.readClusterState(), willCompleteSuccessfully());
         assertThat(node2.raftService.readClusterState(), willCompleteSuccessfully());
 
-        ClusterState state = new ClusterState(List.of("foo"), List.of("bar"));
+        ClusterState state = new ClusterState(
+                List.of("foo"),
+                List.of("bar"),
+                IgniteProductVersion.CURRENT_VERSION,
+                new ClusterTag("cluster")
+        );
 
         assertThat(node1.raftService.writeClusterState(state), willCompleteSuccessfully());
 
         assertThat(node1.raftService.readClusterState(), willBe(state));
         assertThat(node2.raftService.readClusterState(), willBe(state));
 
-        state = new ClusterState(List.of("baz"), List.of("quux"));
+        state = new ClusterState(
+                List.of("baz"),
+                List.of("quux"),
+                IgniteProductVersion.fromString("3.3.3"),
+                new ClusterTag("new cluster")
+        );
 
         assertThat(node2.raftService.writeClusterState(state), willCompleteSuccessfully());
 
         assertThat(node1.raftService.readClusterState(), willBe(state));
         assertThat(node2.raftService.readClusterState(), willBe(state));
     }
+
+    /**
+     * Test validation of the Cluster Tag.
+     */
+    @Test
+    void testClusterTagValidation() {
+        Node node1 = cluster.get(0);
+        Node node2 = cluster.get(1);
+
+        ClusterState state = new ClusterState(
+                List.of("foo"),
+                List.of("bar"),
+                IgniteProductVersion.CURRENT_VERSION,
+                new ClusterTag("cluster")
+        );
+
+        assertThat(node1.raftService.writeClusterState(state), willCompleteSuccessfully());

Review Comment:
   I suggest extracting this to a method like `writeClusterState(node1, state)` so that less gory details distract the reader from the test logic.



##########
modules/cluster-management/src/integrationTest/java/org/apache/ignite/internal/cluster/management/raft/ItCmgRaftServiceTest.java:
##########
@@ -240,18 +270,128 @@ void testClusterState() {
         assertThat(node1.raftService.readClusterState(), willCompleteSuccessfully());
         assertThat(node2.raftService.readClusterState(), willCompleteSuccessfully());
 
-        ClusterState state = new ClusterState(List.of("foo"), List.of("bar"));
+        ClusterState state = new ClusterState(
+                List.of("foo"),
+                List.of("bar"),
+                IgniteProductVersion.CURRENT_VERSION,
+                new ClusterTag("cluster")
+        );
 
         assertThat(node1.raftService.writeClusterState(state), willCompleteSuccessfully());
 
         assertThat(node1.raftService.readClusterState(), willBe(state));
         assertThat(node2.raftService.readClusterState(), willBe(state));
 
-        state = new ClusterState(List.of("baz"), List.of("quux"));
+        state = new ClusterState(
+                List.of("baz"),
+                List.of("quux"),
+                IgniteProductVersion.fromString("3.3.3"),
+                new ClusterTag("new cluster")
+        );
 
         assertThat(node2.raftService.writeClusterState(state), willCompleteSuccessfully());
 
         assertThat(node1.raftService.readClusterState(), willBe(state));
         assertThat(node2.raftService.readClusterState(), willBe(state));
     }
+
+    /**
+     * Test validation of the Cluster Tag.
+     */
+    @Test
+    void testClusterTagValidation() {
+        Node node1 = cluster.get(0);
+        Node node2 = cluster.get(1);
+
+        ClusterState state = new ClusterState(
+                List.of("foo"),
+                List.of("bar"),
+                IgniteProductVersion.CURRENT_VERSION,
+                new ClusterTag("cluster")
+        );
+
+        assertThat(node1.raftService.writeClusterState(state), willCompleteSuccessfully());
+
+        // empty tag
+        assertThat(node2.raftService.startJoinCluster(null), willCompleteSuccessfully());
+
+        // correct tag
+        assertThat(node2.raftService.startJoinCluster(state.clusterTag()), willCompleteSuccessfully());
+
+        // incorrect tag
+        assertThrowsWithCause(
+                () -> node2.raftService.startJoinCluster(new ClusterTag("invalid")).get(10, TimeUnit.SECONDS),
+                IgniteInternalException.class,
+                "Join request denied, reason: Cluster tags do not match"
+        );
+    }
+
+    /**
+     * Test validation of Ignite Product Version upon join.
+     */
+    @Test
+    void testIgniteVersionValidation() {
+        CmgRaftService raftService = cluster.get(0).raftService;
+
+        ClusterState state = new ClusterState(
+                List.of("foo"),
+                List.of("bar"),
+                IgniteProductVersion.fromString("1.2.3"),
+                new ClusterTag("cluster")
+        );
+
+        assertThat(raftService.writeClusterState(state), willCompleteSuccessfully());
+
+        assertThrowsWithCause(
+                () -> raftService.startJoinCluster(null).get(10, TimeUnit.SECONDS),
+                IgniteInternalException.class,
+                String.format(
+                        "Join request denied, reason: Ignite versions do not match. Version: %s, version stored in CMG: 1.2.3",
+                        IgniteProductVersion.CURRENT_VERSION
+                )
+        );
+    }
+
+    /**
+     * Test validation token logic.
+     */
+    @Test
+    void testValidationToken() {
+        CmgRaftService raftService = cluster.get(0).raftService;
+
+        ClusterState state = new ClusterState(
+                List.of("foo"),
+                List.of("bar"),
+                IgniteProductVersion.CURRENT_VERSION,
+                new ClusterTag("cluster")
+        );
+
+        assertThat(raftService.writeClusterState(state), willCompleteSuccessfully());
+
+        CompletableFuture<UUID> joinFuture = raftService.startJoinCluster(null);
+
+        assertThat(joinFuture, willCompleteSuccessfully());
+
+        // incorrect token
+        assertThrowsWithCause(
+                () -> raftService.completeJoinCluster(UUID.randomUUID()).get(10, TimeUnit.SECONDS),
+                IgniteInternalException.class,
+                "JoinReady request denied, reason: Incorrect validation token"
+        );
+
+        UUID token = joinFuture.join();
+
+        // correct token
+        assertThat(raftService.completeJoinCluster(token), willCompleteSuccessfully());
+
+        // token should be invalidated after a successful join
+        assertThrowsWithCause(
+                () -> raftService.completeJoinCluster(token).get(10, TimeUnit.SECONDS),
+                IgniteInternalException.class,
+                String.format(
+                        "JoinReady request denied, reason: Node \"%s\" has not yet passed the validation step",

Review Comment:
   The test demonstrates that when we try to reuse the token, we get a message saying that the first join step was not executed, this seems confusing. I suggest extending the message like '... has not yet passed the validation step, or the join has already been completed'



##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/raft/CmgRaftGroupListener.java:
##########
@@ -73,15 +81,42 @@ public void onWrite(Iterator<CommandClosure<WriteCommand>> iterator) {
 
             if (command instanceof WriteStateCommand) {
                 storage.putClusterState(((WriteStateCommand) command).clusterState());
+
+                clo.result(null);
             } else if (command instanceof JoinRequestCommand) {
-                // TODO: perform validation https://issues.apache.org/jira/browse/IGNITE-16717
+                Serializable response = validationTokenManager.validateNode((JoinRequestCommand) command);
+
+                clo.result(response);
             } else if (command instanceof JoinReadyCommand) {
-                storage.putLogicalTopologyNode(((JoinReadyCommand) command).node());
+                Serializable response = validationTokenManager.completeValidation((JoinReadyCommand) command);
+
+                // Non-null response means that the node has not passed the validation step.
+                if (response == null) {

Review Comment:
   Keeping 'non-null result means validation problem' causes a code that is less obvious. I suggest to add a simple class `ValidationResult` with just one field called `violation` (of type `Serializable`) and two methods: `isValid()` (equivalent to `violation == null`) and `violation()` (returning the `violation` value). In such case, the `if` would look like `if (result.isValid())`, which seems more straightforward (and the comment would not be needed).



##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/LocalStateStorage.java:
##########
@@ -30,7 +30,26 @@
  * <p>This state has nothing to do with local Raft state and is only used on node startup, when Raft groups have not yet been started.
  */
 class LocalStateStorage {
-    private static final ByteArray CMG_NODES_VAULT_KEY = ByteArray.fromString("cmg_nodes");
+    private static final ByteArray CMG_STATE_VAULT_KEY = ByteArray.fromString("cmg_state");
+
+    static class LocalState implements Serializable {
+        private final Set<String> cmgNodeNames;
+
+        private final ClusterTag clusterTag;
+
+        LocalState(Set<String> cmgNodeNames, ClusterTag clusterTag) {
+            this.cmgNodeNames = cmgNodeNames;

Review Comment:
   My favorite: a defensive copy? :)



##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/raft/CmgRaftService.java:
##########
@@ -88,31 +106,40 @@ public CompletableFuture<Void> writeClusterState(ClusterState clusterState) {
     /**
      * Sends a {@link JoinRequestCommand}, starting the validation procedure.
      *

Review Comment:
   Let's add a `@param` to explain the parameter



##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/validation/ValidationInfo.java:
##########
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.cluster.management.validation;
+
+import org.apache.ignite.internal.cluster.management.ClusterTag;
+import org.apache.ignite.internal.properties.IgniteProductVersion;
+import org.jetbrains.annotations.Nullable;
+
+/**
+ * Class containing information necessary for performing validation of a node.
+ */
+public class ValidationInfo {

Review Comment:
   How about a different name, like `NodeValidatedAttributes` (meaning, attributes under validation)? `Info` suffix always looks suspicious because it does not tell anything by itself, it does not add any information (no pun intended) about the purpose of the type.



##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/raft/commands/JoinRequestCommand.java:
##########
@@ -17,22 +17,34 @@
 
 package org.apache.ignite.internal.cluster.management.raft.commands;
 
+import org.apache.ignite.internal.cluster.management.ClusterTag;
+import org.apache.ignite.internal.properties.IgniteProductVersion;
 import org.apache.ignite.network.ClusterNode;
 import org.apache.ignite.raft.client.WriteCommand;
+import org.jetbrains.annotations.Nullable;
 
 /**
  * Command sent by a node that intends to join a cluster. This command will trigger node validation.
  */
 public class JoinRequestCommand implements WriteCommand {
     private final ClusterNode node;
 
+    private final IgniteProductVersion igniteVersion;
+
+    @Nullable
+    private final ClusterTag clusterTag;

Review Comment:
   Could you please document what it means that we have a `clusterTag` in a request and when we don't have it?



##########
modules/cluster-management/src/test/java/org/apache/ignite/internal/cluster/management/ClusterInitializerTest.java:
##########
@@ -154,6 +166,39 @@ void testInitCancel() {
         verify(messagingService, never()).send(eq(metastorageNode), any(CancelInitMessage.class));
     }
 
+    /**
+     * Tests a situation when the init command fails non critically, so that initialization is not cancelled.

Review Comment:
   Should there be a hyphen after 'non' (as 'non' seems to always be a part of a word, but not a proper word itself)?



##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/validation/NodeValidator.java:
##########
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.cluster.management.validation;
+
+import org.apache.ignite.internal.cluster.management.ClusterState;
+import org.apache.ignite.internal.cluster.management.ClusterTag;
+import org.jetbrains.annotations.Nullable;
+
+/**
+ * Class that encapsulates common validation logic.
+ */
+public class NodeValidator {
+    /**
+     * Validates a given node (represented by {@code nodeInfo}) against the CMG state.
+     *
+     * @param cmgState CMG state.
+     * @param nodeInfo Node state.
+     * @return {@code null} in case of successful validation or a {@link ValidationError} containing some error information.
+     */
+    @Nullable
+    public static ValidationError validateNode(ClusterState cmgState, ValidationInfo nodeInfo) {

Review Comment:
   Would it make sense to swap argument positions? We are validating the node, so `nodeInfo` is probably more important, while `cmgState` plays the role of a context.



##########
modules/core/src/main/java/org/apache/ignite/internal/properties/IgniteProductVersion.java:
##########
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.properties;
+
+import java.io.Serializable;
+import java.util.Objects;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Class representing an Ignite version.
+ */
+public class IgniteProductVersion implements Serializable {
+    private static final Pattern VERSION_PATTERN =
+            Pattern.compile("(?<major>\\d+)\\.(?<minor>\\d+)\\.(?<maintenance>\\d+)(?<snapshot>-SNAPSHOT)?");
+
+    /**
+     * Version of the current node.
+     */
+    public static final IgniteProductVersion CURRENT_VERSION = fromString(IgniteProperties.get(IgniteProperties.VERSION));
+
+    /** Major version number. */
+    private final byte major;
+
+    /** Minor version number. */
+    private final byte minor;
+
+    /** Maintenance version number. */
+    private final byte maintenance;
+
+    /** Flag indicating if this is a snapshot release. */
+    private final boolean isSnapshot;
+
+    private IgniteProductVersion(byte major, byte minor, byte maintenance, boolean isSnapshot) {
+        this.major = major;
+        this.minor = minor;
+        this.maintenance = maintenance;
+        this.isSnapshot = isSnapshot;
+    }
+
+    /**
+     * Parses Ignite version in the {@code "X.X.X-SNAPSHOT"} format.

Review Comment:
   This makes me think that '-SNAPSHOT' is mandatory. Probably it makes sense to mark the fragment as optional or specify both patterns (with and without '-SNAPSHOT').



##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/raft/CmgRaftService.java:
##########
@@ -153,11 +180,30 @@ public Set<String> nodeNames() {
                 .collect(Collectors.toSet());
     }
 
+    /**
+     * Converts a {@link Peer} into a {@link ClusterNode}.
+     *
+     * <p>This method tries to resolve the given {@code peer} multiple times, because it might be offline temporarily.
+     */
     private ClusterNode resolvePeer(Peer peer) {
-        ClusterNode node = clusterService.topologyService().getByAddress(peer.address());
+        NetworkAddress addr = peer.address();
+
+        for (int i = 0; i < MAX_RESOLVE_ATTEMPTS; ++i) {

Review Comment:
   Could you please put `++` on the right of `i`? Otherwise, it makes me wonder whether this even matters. Let's strive for less confusion :)



##########
modules/cluster-management/src/integrationTest/java/org/apache/ignite/internal/cluster/management/raft/ItCmgRaftServiceTest.java:
##########
@@ -240,18 +270,128 @@ void testClusterState() {
         assertThat(node1.raftService.readClusterState(), willCompleteSuccessfully());
         assertThat(node2.raftService.readClusterState(), willCompleteSuccessfully());
 
-        ClusterState state = new ClusterState(List.of("foo"), List.of("bar"));
+        ClusterState state = new ClusterState(
+                List.of("foo"),
+                List.of("bar"),
+                IgniteProductVersion.CURRENT_VERSION,
+                new ClusterTag("cluster")
+        );
 
         assertThat(node1.raftService.writeClusterState(state), willCompleteSuccessfully());
 
         assertThat(node1.raftService.readClusterState(), willBe(state));
         assertThat(node2.raftService.readClusterState(), willBe(state));
 
-        state = new ClusterState(List.of("baz"), List.of("quux"));
+        state = new ClusterState(
+                List.of("baz"),
+                List.of("quux"),
+                IgniteProductVersion.fromString("3.3.3"),
+                new ClusterTag("new cluster")
+        );
 
         assertThat(node2.raftService.writeClusterState(state), willCompleteSuccessfully());
 
         assertThat(node1.raftService.readClusterState(), willBe(state));
         assertThat(node2.raftService.readClusterState(), willBe(state));
     }
+
+    /**
+     * Test validation of the Cluster Tag.
+     */
+    @Test
+    void testClusterTagValidation() {
+        Node node1 = cluster.get(0);
+        Node node2 = cluster.get(1);
+
+        ClusterState state = new ClusterState(
+                List.of("foo"),
+                List.of("bar"),
+                IgniteProductVersion.CURRENT_VERSION,
+                new ClusterTag("cluster")
+        );
+
+        assertThat(node1.raftService.writeClusterState(state), willCompleteSuccessfully());
+
+        // empty tag
+        assertThat(node2.raftService.startJoinCluster(null), willCompleteSuccessfully());
+
+        // correct tag
+        assertThat(node2.raftService.startJoinCluster(state.clusterTag()), willCompleteSuccessfully());
+
+        // incorrect tag
+        assertThrowsWithCause(
+                () -> node2.raftService.startJoinCluster(new ClusterTag("invalid")).get(10, TimeUnit.SECONDS),
+                IgniteInternalException.class,
+                "Join request denied, reason: Cluster tags do not match"
+        );
+    }
+
+    /**
+     * Test validation of Ignite Product Version upon join.
+     */
+    @Test
+    void testIgniteVersionValidation() {
+        CmgRaftService raftService = cluster.get(0).raftService;
+
+        ClusterState state = new ClusterState(
+                List.of("foo"),
+                List.of("bar"),
+                IgniteProductVersion.fromString("1.2.3"),

Review Comment:
   I suggest to use here a version that Ignite will never have, like 999.999.999. Otherwise, if (when?) we change Ignite version to exactly 1.2.3 (and decide to stop putting '-SNAPSHOT' there, if we decide so), the test will suddenly start failing.



##########
modules/core/src/main/java/org/apache/ignite/internal/properties/IgniteProductVersion.java:
##########
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.properties;
+
+import java.io.Serializable;
+import java.util.Objects;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Class representing an Ignite version.
+ */
+public class IgniteProductVersion implements Serializable {
+    private static final Pattern VERSION_PATTERN =
+            Pattern.compile("(?<major>\\d+)\\.(?<minor>\\d+)\\.(?<maintenance>\\d+)(?<snapshot>-SNAPSHOT)?");
+
+    /**
+     * Version of the current node.
+     */
+    public static final IgniteProductVersion CURRENT_VERSION = fromString(IgniteProperties.get(IgniteProperties.VERSION));
+
+    /** Major version number. */
+    private final byte major;
+
+    /** Minor version number. */
+    private final byte minor;

Review Comment:
   Why are we restricting version components with `byte`? Is it imposed by the semver spec? If not, let's make it `int` just in case.



##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/raft/RaftStorageManager.java:
##########
@@ -35,15 +37,14 @@
  * A wrapper around a {@link ClusterStateStorage} which provides convenient methods.
  */
 class RaftStorageManager {
-    /**
-     * Storage key for the CMG state.
-     */
-    private static final byte[] CMG_STATE_KEY = "cmg_state".getBytes(StandardCharsets.UTF_8);
+    /** Storage key for the CMG state. */
+    private static final byte[] CMG_STATE_KEY = "cmg_state".getBytes(UTF_8);
 
-    /**
-     * Prefix for the keys for logical topology nodes.
-     */
-    private static final byte[] LOGICAL_TOPOLOGY_PREFIX = "logical_".getBytes(StandardCharsets.UTF_8);
+    /** Prefix for the keys for logical topology nodes. */
+    private static final byte[] LOGICAL_TOPOLOGY_PREFIX = "logical_".getBytes(UTF_8);
+
+    /** Prefix for validation tokens. */
+    private static final byte[] VALIDATION_TOKEN_PREFIX = "validation_".getBytes(UTF_8);

Review Comment:
   `validation_token_`?



##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/ClusterManagementGroupManager.java:
##########
@@ -198,14 +218,14 @@ public void start() {
     /**
      * Extracts the local state (if any) and starts the CMG.
      *
-     * @return Future that resolves into the CMG Raft service or {@code null} if the local state is empty.
+     * @return Future, that resolves into the CMG Raft service, or {@code null} if the local state is empty.

Review Comment:
   Would they really put that comma after 'Future' in English?



##########
modules/core/src/main/java/org/apache/ignite/internal/properties/IgniteProductVersion.java:
##########
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.internal.properties;
+
+import java.io.Serializable;
+import java.util.Objects;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Class representing an Ignite version.
+ */
+public class IgniteProductVersion implements Serializable {
+    private static final Pattern VERSION_PATTERN =
+            Pattern.compile("(?<major>\\d+)\\.(?<minor>\\d+)\\.(?<maintenance>\\d+)(?<snapshot>-SNAPSHOT)?");
+
+    /**
+     * Version of the current node.
+     */
+    public static final IgniteProductVersion CURRENT_VERSION = fromString(IgniteProperties.get(IgniteProperties.VERSION));
+
+    /** Major version number. */
+    private final byte major;
+
+    /** Minor version number. */
+    private final byte minor;
+
+    /** Maintenance version number. */
+    private final byte maintenance;
+
+    /** Flag indicating if this is a snapshot release. */
+    private final boolean isSnapshot;
+
+    private IgniteProductVersion(byte major, byte minor, byte maintenance, boolean isSnapshot) {
+        this.major = major;
+        this.minor = minor;
+        this.maintenance = maintenance;
+        this.isSnapshot = isSnapshot;
+    }
+
+    /**
+     * Parses Ignite version in the {@code "X.X.X-SNAPSHOT"} format.
+     *
+     * @param versionStr String representation of an Ignite version.
+     * @return Parsed Ignite version.
+     * @throws IllegalArgumentException If the given string is empty or does not match the required format.
+     */
+    public static IgniteProductVersion fromString(String versionStr) {
+        if (versionStr.isBlank()) {

Review Comment:
   Should we also check for `null`?



##########
modules/cluster-management/src/main/java/org/apache/ignite/internal/cluster/management/ClusterManagementGroupManager.java:
##########
@@ -214,27 +234,61 @@ private CompletableFuture<CmgRaftService> recoverLocalState() {
             throw new IgniteInternalException("Error while retrieving local CMG state", e);
         }
 
-        if (cmgNodes.isEmpty()) {
+        if (localState == null) {
             return null;
         }
 
         log.info("Local CMG state recovered, starting the CMG");
 
-        return startCmgRaftService(cmgNodes)
+        return startCmgRaftService(localState.cmgNodeNames())
                 .thenCompose(service -> service.isCurrentNodeLeader()
                         .thenCompose(isLeader -> {
-                            if (isLeader) {
-                                return service.readClusterState()
-                                        // Raft state might not have been initialized in case of leader failure during cluster init
-                                        // TODO: properly handle this case, see https://issues.apache.org/jira/browse/IGNITE-16819
-                                        .thenCompose(state -> state == null ? completedFuture(null) : onLeaderElected(service, state));
-                            } else {
+                            if (!isLeader) {
                                 return completedFuture(null);
                             }
+
+                            return service.readClusterState()
+                                    .thenCompose(state -> {
+                                        if (state == null) {
+                                            // Raft state might not have been initialized in case of leader failure during cluster init
+                                            // TODO: properly handle this case, see https://issues.apache.org/jira/browse/IGNITE-16819
+
+                                            return completedFuture(null);
+                                        } else {
+                                            // CMG leader must validate itself on start
+                                            validateSelf(state, localState);
+
+                                            return onLeaderElected(service, state);
+                                        }
+                                    });
                         })
                         .thenApply(v -> service));
     }
 
+    /**
+     * Validates this node against the CMG state.
+     *
+     * @param state CMG state.
+     * @param localState Local node state.
+     * @throws IgniteInternalException If the node does not pass the validation.
+     */
+    private static void validateSelf(ClusterState state, LocalState localState) {
+        if (!state.cmgNodes().equals(localState.cmgNodeNames())) {
+            throw new IgniteInternalException(String.format(
+                    "Local state and CMG state differs. CMG nodes: %s, nodes stored in CMG: %s",
+                    localState.cmgNodeNames(), state.cmgNodes()
+            ));
+        }
+
+        var validationInfo = new ValidationInfo(IgniteProductVersion.CURRENT_VERSION, localState.clusterTag());
+
+        ValidationError validationError = NodeValidator.validateNode(state, validationInfo);
+
+        if (validationError != null) {
+            throw new IgniteInternalException(validationError.rejectReason());

Review Comment:
   Should a specific exception class be introduced for self-validation failures?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: notifications-unsubscribe@ignite.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org