You are viewing a plain text version of this content. The canonical link for it is here.
Posted to notifications@ignite.apache.org by GitBox <gi...@apache.org> on 2020/10/09 07:25:33 UTC

[GitHub] [ignite] ptupitsyn commented on a change in pull request #8325: IGNITE-13366 Maintenance Mode for corrupted PDS case

ptupitsyn commented on a change in pull request #8325:
URL: https://github.com/apache/ignite/pull/8325#discussion_r502230136



##########
File path: modules/core/src/main/java/org/apache/ignite/maintenance/MaintenanceRegistry.java
##########
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.maintenance;
+
+import java.util.List;
+import java.util.UUID;
+
+import org.apache.ignite.IgniteCheckedException;
+import org.apache.ignite.lang.IgniteExperimental;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+/**
+ * {@link MaintenanceRegistry} is a service local to each Ignite node
+ * that allows to request performing maintenance actions on that particular node.
+ *
+ * <p>
+ *     When a node gets into a situation when some specific actions are required
+ *     it enters the special mode called maintenance mode.
+ *     In maintenance mode it doesn't join to the rest of the cluster but still allows to connect to it
+ *     with control.{sh|bat} script or via JXM interface and perform needed actions.
+ * </p>
+ *
+ * <p>
+ *     Implementing new task for maintenance mode requires several pieces of code.
+ *
+ *     <ul>
+ *         <li>
+ *             First, component requiring Maintenance Mode should be able to register new {@link MaintenanceTask}
+ *             with {@link MaintenanceRegistry#registerMaintenanceTask(MaintenanceTask)} method.
+ *
+ *             Registration could happen automatically (e.g. if component detects some emergency situation
+ *             that requires user intervention)
+ *             or by user request (e.g. for a planned maintenance that requires
+ *             detaching node from the rest of the cluster).
+ *         </li>
+ *         <li>
+ *             Component responsible for handling this {@link MaintenanceTask}
+ *             on startup checks if the task is registered (thus it should go to Maintenance Mode).
+ *             If task is found component provides to {@link MaintenanceRegistry} its own implementation
+ *             of {@link MaintenanceWorkflowCallback} interface
+ *             via method {@link MaintenanceRegistry#registerWorkflowCallback(UUID, MaintenanceWorkflowCallback)}.
+ *         </li>
+ *         <li>
+ *             {@link MaintenanceWorkflowCallback} should provide {@link MaintenanceRegistry} with
+ *             {@link MaintenanceAction}s that are able to resolve maintenance task,
+ *             get information about it and so on.
+ *             Logic of these actions is completely up to the component providing it
+ *             and depends only on particular maintenance task.
+ *         </li>
+ *         <li>
+ *             When maintenance task is fixed, it should be removed from {@link MaintenanceRegistry}
+ *             with call {@link MaintenanceRegistry#unregisterMaintenanceTask(UUID)}.
+ *         </li>
+ *     </ul>
+ * </p>
+ */
+@IgniteExperimental
+public interface MaintenanceRegistry {
+    /**
+     * @return {@code True} if any maintenance task was found.
+     */
+    public boolean isMaintenanceMode();
+
+    /**
+     * @param task {@link MaintenanceTask} object with maintenance information that needs
+     *                                     to be stored to maintenance registry.
+     *
+     * @throws IgniteCheckedException If handling or storing maintenance task failed.
+     *
+     * @return Previously registered {@link MaintenanceTask} with the same ID
+     * or null if no tasks were registered for this ID.
+     */
+    public @Nullable MaintenanceTask registerMaintenanceTask(MaintenanceTask task) throws IgniteCheckedException;
+
+    /**
+     * Deletes {@link MaintenanceTask} of given ID from maintenance registry.
+     *
+     * @param mntcId
+     */
+    public void unregisterMaintenanceTask(UUID mntcId);

Review comment:
       `maitenanceId`

##########
File path: modules/core/src/main/java/org/apache/ignite/maintenance/MaintenanceTask.java
##########
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.maintenance;
+
+import java.util.UUID;
+
+import org.apache.ignite.lang.IgniteExperimental;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+/**
+ * Represents request to handle maintenance situation stored on disk.
+ *
+ * Maintenance request can be created programmatically
+ * with {@link MaintenanceRegistry#registerMaintenanceRecord(MaintenanceTask)} public API call.
+ *
+ * Record contains unique ID of maintenance situation (e.g. situation of PDS corruption or defragmentation),
+ * description of task and optional parameters.
+ *
+ * When task is created node should be restarted to enter maintenance mode.
+ * In that mode node can start actions needed to resolve maintenance situation or wait for user to trigger them.
+ *
+ * Components that may need to perform maintenance actions as part of their recovery workflow should check
+ * maintenance status on startup and supply {@link MaintenanceWorkflowCallback} implementation to
+ * {@link MaintenanceRegistry#registerWorkflowCallback(UUID, MaintenanceWorkflowCallback)} to allow Maintenance Registry
+ * to find maintenance actions and start them automatically or by user request.
+ *
+ * Matching between {@link MaintenanceTask} and {@link MaintenanceWorkflowCallback} is performed based on
+ * unique ID of maintenance situation.
+ */
+@IgniteExperimental
+public class MaintenanceTask {
+    /** */
+    private final UUID id;
+
+    /** */
+    private final String description;
+
+    /** */
+    private final String params;
+
+    /**
+     * @param id Mandatory unique ID of maintenance task.
+     * @param description Mandatory description of maintenance situation.
+     * @param params Optional parameters that may be needed to perform maintenance actions.
+     */
+    public MaintenanceTask(UUID id, String description, String params) {
+        this.id = id;
+        this.description = description;
+        this.params = params;
+    }
+
+    /** */

Review comment:
       Empty javadoc here and below

##########
File path: modules/core/src/main/java/org/apache/ignite/maintenance/MaintenanceRegistry.java
##########
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.maintenance;
+
+import java.util.List;
+import java.util.UUID;
+
+import org.apache.ignite.IgniteCheckedException;
+import org.apache.ignite.lang.IgniteExperimental;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+/**
+ * {@link MaintenanceRegistry} is a service local to each Ignite node
+ * that allows to request performing maintenance actions on that particular node.
+ *
+ * <p>
+ *     When a node gets into a situation when some specific actions are required
+ *     it enters the special mode called maintenance mode.
+ *     In maintenance mode it doesn't join to the rest of the cluster but still allows to connect to it
+ *     with control.{sh|bat} script or via JXM interface and perform needed actions.
+ * </p>
+ *
+ * <p>
+ *     Implementing new task for maintenance mode requires several pieces of code.
+ *
+ *     <ul>
+ *         <li>
+ *             First, component requiring Maintenance Mode should be able to register new {@link MaintenanceTask}
+ *             with {@link MaintenanceRegistry#registerMaintenanceTask(MaintenanceTask)} method.
+ *
+ *             Registration could happen automatically (e.g. if component detects some emergency situation
+ *             that requires user intervention)
+ *             or by user request (e.g. for a planned maintenance that requires
+ *             detaching node from the rest of the cluster).
+ *         </li>
+ *         <li>
+ *             Component responsible for handling this {@link MaintenanceTask}
+ *             on startup checks if the task is registered (thus it should go to Maintenance Mode).
+ *             If task is found component provides to {@link MaintenanceRegistry} its own implementation
+ *             of {@link MaintenanceWorkflowCallback} interface
+ *             via method {@link MaintenanceRegistry#registerWorkflowCallback(UUID, MaintenanceWorkflowCallback)}.
+ *         </li>
+ *         <li>
+ *             {@link MaintenanceWorkflowCallback} should provide {@link MaintenanceRegistry} with
+ *             {@link MaintenanceAction}s that are able to resolve maintenance task,
+ *             get information about it and so on.
+ *             Logic of these actions is completely up to the component providing it
+ *             and depends only on particular maintenance task.
+ *         </li>
+ *         <li>
+ *             When maintenance task is fixed, it should be removed from {@link MaintenanceRegistry}
+ *             with call {@link MaintenanceRegistry#unregisterMaintenanceTask(UUID)}.
+ *         </li>
+ *     </ul>
+ * </p>
+ */
+@IgniteExperimental
+public interface MaintenanceRegistry {
+    /**
+     * @return {@code True} if any maintenance task was found.
+     */
+    public boolean isMaintenanceMode();
+
+    /**
+     * @param task {@link MaintenanceTask} object with maintenance information that needs
+     *                                     to be stored to maintenance registry.
+     *
+     * @throws IgniteCheckedException If handling or storing maintenance task failed.
+     *
+     * @return Previously registered {@link MaintenanceTask} with the same ID
+     * or null if no tasks were registered for this ID.
+     */
+    public @Nullable MaintenanceTask registerMaintenanceTask(MaintenanceTask task) throws IgniteCheckedException;
+
+    /**
+     * Deletes {@link MaintenanceTask} of given ID from maintenance registry.
+     *
+     * @param mntcId
+     */
+    public void unregisterMaintenanceTask(UUID mntcId);
+
+    /**
+     * Returns active {@link MaintenanceTask} by its ID.
+     * There are active tasks only when node entered Maintenance Mode.
+     *
+     * {@link MaintenanceTask} becomes active when node enters Maintenance Mode and doesn't resolve the task
+     * during maintenance prepare phase.
+     *
+     * @return {@link MaintenanceTask} object for given maintenance ID or null if no maintenance task was found.
+     */
+    @Nullable public MaintenanceTask activeMaintenanceTask(UUID maitenanceId);
+
+    /**
+     * @param id UUID of {@link MaintenanceTask} this callback is registered for.

Review comment:
       Missing description

##########
File path: modules/core/src/main/java/org/apache/ignite/maintenance/MaintenanceTask.java
##########
@@ -0,0 +1,82 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.maintenance;
+
+import java.util.UUID;
+
+import org.apache.ignite.lang.IgniteExperimental;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+/**
+ * Represents request to handle maintenance situation stored on disk.

Review comment:
       Not clear - what is stored on disk? Do we need to mention that at all?

##########
File path: modules/core/src/main/java/org/apache/ignite/maintenance/MaintenanceWorkflowCallback.java
##########
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.maintenance;
+
+import java.util.List;
+import java.util.UUID;
+
+import org.apache.ignite.lang.IgniteExperimental;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+/**
+ * Abstraction to decouple interaction between {@link MaintenanceRegistry}
+ * and components that may require maintenance.
+ *
+ * If a component may cause node to enter maintenance mode, it should register this callback
+ * in {@link MaintenanceRegistry} using method {@link MaintenanceRegistry#registerWorkflowCallback(UUID, MaintenanceWorkflowCallback)}
+ *
+ * {@link MaintenanceRegistry} during its workflow will collect necessary information about maintenance for components
+ * without knowing implementation details of the components.
+ */
+@IgniteExperimental
+public interface MaintenanceWorkflowCallback {
+    /**
+     * Called by {@link MaintenanceRegistry} and enables it to check if maintenance is still needed
+     * for component that provided this callback.
+     *
+     * User may fix maintenance situation by hand when node was down thus before going to maintenance mode
+     * we should be able to check if it is still necessary.
+     *
+     * @return {@code True} if maintenance is still needed for the component.
+     */
+    public boolean proceedWithMaintenance();

Review comment:
       `proceedWithMaintenance` means "start maintenance right now". But we want to check whether maintenance should be started, so a better name would be `shouldProceedWithMaintenance`

##########
File path: modules/core/src/main/java/org/apache/ignite/maintenance/MaintenanceRegistry.java
##########
@@ -0,0 +1,132 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.ignite.maintenance;
+
+import java.util.List;
+import java.util.UUID;
+
+import org.apache.ignite.IgniteCheckedException;
+import org.apache.ignite.lang.IgniteExperimental;
+import org.jetbrains.annotations.NotNull;
+import org.jetbrains.annotations.Nullable;
+
+/**
+ * {@link MaintenanceRegistry} is a service local to each Ignite node
+ * that allows to request performing maintenance actions on that particular node.
+ *
+ * <p>
+ *     When a node gets into a situation when some specific actions are required
+ *     it enters the special mode called maintenance mode.
+ *     In maintenance mode it doesn't join to the rest of the cluster but still allows to connect to it
+ *     with control.{sh|bat} script or via JXM interface and perform needed actions.
+ * </p>
+ *
+ * <p>
+ *     Implementing new task for maintenance mode requires several pieces of code.
+ *
+ *     <ul>
+ *         <li>
+ *             First, component requiring Maintenance Mode should be able to register new {@link MaintenanceTask}
+ *             with {@link MaintenanceRegistry#registerMaintenanceTask(MaintenanceTask)} method.
+ *
+ *             Registration could happen automatically (e.g. if component detects some emergency situation
+ *             that requires user intervention)
+ *             or by user request (e.g. for a planned maintenance that requires
+ *             detaching node from the rest of the cluster).
+ *         </li>
+ *         <li>
+ *             Component responsible for handling this {@link MaintenanceTask}
+ *             on startup checks if the task is registered (thus it should go to Maintenance Mode).
+ *             If task is found component provides to {@link MaintenanceRegistry} its own implementation
+ *             of {@link MaintenanceWorkflowCallback} interface
+ *             via method {@link MaintenanceRegistry#registerWorkflowCallback(UUID, MaintenanceWorkflowCallback)}.
+ *         </li>
+ *         <li>
+ *             {@link MaintenanceWorkflowCallback} should provide {@link MaintenanceRegistry} with
+ *             {@link MaintenanceAction}s that are able to resolve maintenance task,
+ *             get information about it and so on.
+ *             Logic of these actions is completely up to the component providing it
+ *             and depends only on particular maintenance task.
+ *         </li>
+ *         <li>
+ *             When maintenance task is fixed, it should be removed from {@link MaintenanceRegistry}
+ *             with call {@link MaintenanceRegistry#unregisterMaintenanceTask(UUID)}.
+ *         </li>
+ *     </ul>
+ * </p>
+ */
+@IgniteExperimental
+public interface MaintenanceRegistry {
+    /**
+     * @return {@code True} if any maintenance task was found.
+     */
+    public boolean isMaintenanceMode();
+
+    /**
+     * @param task {@link MaintenanceTask} object with maintenance information that needs
+     *                                     to be stored to maintenance registry.
+     *
+     * @throws IgniteCheckedException If handling or storing maintenance task failed.
+     *
+     * @return Previously registered {@link MaintenanceTask} with the same ID
+     * or null if no tasks were registered for this ID.
+     */
+    public @Nullable MaintenanceTask registerMaintenanceTask(MaintenanceTask task) throws IgniteCheckedException;
+
+    /**
+     * Deletes {@link MaintenanceTask} of given ID from maintenance registry.
+     *
+     * @param mntcId
+     */
+    public void unregisterMaintenanceTask(UUID mntcId);
+
+    /**
+     * Returns active {@link MaintenanceTask} by its ID.
+     * There are active tasks only when node entered Maintenance Mode.
+     *
+     * {@link MaintenanceTask} becomes active when node enters Maintenance Mode and doesn't resolve the task
+     * during maintenance prepare phase.
+     *
+     * @return {@link MaintenanceTask} object for given maintenance ID or null if no maintenance task was found.
+     */
+    @Nullable public MaintenanceTask activeMaintenanceTask(UUID maitenanceId);
+
+    /**
+     * @param id UUID of {@link MaintenanceTask} this callback is registered for.
+     * @param cb {@link MaintenanceWorkflowCallback} interface used by MaintenanceRegistry to execute
+     *                                              maintenance steps by workflow.
+     */
+    public void registerWorkflowCallback(@NotNull UUID id, @NotNull MaintenanceWorkflowCallback cb);
+
+    /**

Review comment:
       Empty javadoc

##########
File path: modules/core/src/main/java/org/apache/ignite/maintenance/package-info.java
##########
@@ -0,0 +1,22 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * <!-- Package description. -->

Review comment:
       Please remove the placeholder




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org