You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by dc...@apache.org on 2020/09/25 04:19:42 UTC

[cassandra] branch cassandra-3.0 updated: Add flag to ignore unreplicated keyspaces during repair

This is an automated email from the ASF dual-hosted git repository.

dcapwell pushed a commit to branch cassandra-3.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git


The following commit(s) were added to refs/heads/cassandra-3.0 by this push:
     new 8a4c1d7  Add flag to ignore unreplicated keyspaces during repair
8a4c1d7 is described below

commit 8a4c1d7a485bbd2c12864d27e2705133ebbaabc0
Author: Marcus Eriksson <ma...@apache.org>
AuthorDate: Thu Sep 24 19:31:48 2020 -0700

    Add flag to ignore unreplicated keyspaces during repair
    
    patch by Marcus Eriksson; reviewed by Blake Eggleston, David Capwell for CASSANDRA-15160
---
 CHANGES.txt                                        |   1 +
 .../apache/cassandra/repair/RepairRunnable.java    |  29 ++++
 .../cassandra/repair/messages/RepairOption.java    |  13 +-
 .../apache/cassandra/service/StorageService.java   |   4 +-
 .../apache/cassandra/tools/nodetool/Repair.java    |   5 +
 .../distributed/test/RepairOperationalTest.java    | 163 +++++++++++++++++++++
 6 files changed, 211 insertions(+), 4 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index f3df5d0..5f326ce 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,6 +1,7 @@
 3.0.23:
  * Avoid failing compactions with very large partitions (CASSANDRA-15164)
  * Use IF NOT EXISTS for index and UDT create statements in snapshot schema files (CASSANDRA-13935)
+ * Add flag to ignore unreplicated keyspaces during repair (CASSANDRA-15160)
 
 3.0.22:
  * Fix gossip shutdown order (CASSANDRA-15816)
diff --git a/src/java/org/apache/cassandra/repair/RepairRunnable.java b/src/java/org/apache/cassandra/repair/RepairRunnable.java
index 774409f..a7c17c9 100644
--- a/src/java/org/apache/cassandra/repair/RepairRunnable.java
+++ b/src/java/org/apache/cassandra/repair/RepairRunnable.java
@@ -172,6 +172,21 @@ public class RepairRunnable extends WrappedRunnable implements ProgressEventNoti
                                                                               options.getDataCenters(),
                                                                               options.getHosts());
 
+                if (neighbors.isEmpty())
+                {
+                    if (options.ignoreUnreplicatedKeyspaces())
+                    {
+                        logger.info("Found no neighbors for range {} for {} - ignoring since repairing with --ignore-unreplicated-keyspaces", range, keyspace);
+                        continue;
+                    }
+                    else
+                    {
+                        String errorMessage = String.format("Nothing to repair for %s in %s - aborting", range, keyspace);
+                        logger.error("Repair {}",  errorMessage);
+                        fireErrorAndComplete(tag, progress.get(), totalProgress, errorMessage);
+                        return;
+                    }
+                }
                 addRangeToNeighbors(commonRanges, range, neighbors);
                 allNeighbors.addAll(neighbors);
             }
@@ -185,6 +200,20 @@ public class RepairRunnable extends WrappedRunnable implements ProgressEventNoti
             return;
         }
 
+        if (options.ignoreUnreplicatedKeyspaces() && allNeighbors.isEmpty())
+        {
+            String ignoreUnreplicatedMessage = String.format("Nothing to repair for %s in %s - unreplicated keyspace is ignored since repair was called with --ignore-unreplicated-keyspaces",
+                                                             options.getRanges(),
+                                                             keyspace);
+
+            logger.info("Repair {}", ignoreUnreplicatedMessage);
+            fireProgressEvent(tag, new ProgressEvent(ProgressEventType.COMPLETE,
+                                                     progress.get(),
+                                                     totalProgress,
+                                                     ignoreUnreplicatedMessage));
+            return;
+        }
+
         // Validate columnfamilies
         List<ColumnFamilyStore> columnFamilyStores = new ArrayList<>();
         try
diff --git a/src/java/org/apache/cassandra/repair/messages/RepairOption.java b/src/java/org/apache/cassandra/repair/messages/RepairOption.java
index 9d60ad7..5d56d3a 100644
--- a/src/java/org/apache/cassandra/repair/messages/RepairOption.java
+++ b/src/java/org/apache/cassandra/repair/messages/RepairOption.java
@@ -45,6 +45,7 @@ public class RepairOption
     public static final String DATACENTERS_KEY = "dataCenters";
     public static final String HOSTS_KEY = "hosts";
     public static final String TRACE_KEY = "trace";
+    public static final String IGNORE_UNREPLICATED_KS = "ignoreUnreplicatedKeyspaces";
 
     // we don't want to push nodes too much for repair
     public static final int MAX_JOB_THREADS = 4;
@@ -129,6 +130,7 @@ public class RepairOption
         boolean primaryRange = Boolean.parseBoolean(options.get(PRIMARY_RANGE_KEY));
         boolean incremental = Boolean.parseBoolean(options.get(INCREMENTAL_KEY));
         boolean trace = Boolean.parseBoolean(options.get(TRACE_KEY));
+        boolean ignoreUnreplicatedKeyspaces = Boolean.parseBoolean(options.get(IGNORE_UNREPLICATED_KS));
 
         int jobThreads = 1;
         if (options.containsKey(JOB_THREADS_KEY))
@@ -166,7 +168,7 @@ public class RepairOption
             }
         }
 
-        RepairOption option = new RepairOption(parallelism, primaryRange, incremental, trace, jobThreads, ranges, !ranges.isEmpty());
+        RepairOption option = new RepairOption(parallelism, primaryRange, incremental, trace, jobThreads, ranges, !ranges.isEmpty(), ignoreUnreplicatedKeyspaces);
 
         // data centers
         String dataCentersStr = options.get(DATACENTERS_KEY);
@@ -226,13 +228,14 @@ public class RepairOption
     private final boolean trace;
     private final int jobThreads;
     private final boolean isSubrangeRepair;
+    private final boolean ignoreUnreplicatedKeyspaces;
 
     private final Collection<String> columnFamilies = new HashSet<>();
     private final Collection<String> dataCenters = new HashSet<>();
     private final Collection<String> hosts = new HashSet<>();
     private final Collection<Range<Token>> ranges = new HashSet<>();
 
-    public RepairOption(RepairParallelism parallelism, boolean primaryRange, boolean incremental, boolean trace, int jobThreads, Collection<Range<Token>> ranges, boolean isSubrangeRepair)
+    public RepairOption(RepairParallelism parallelism, boolean primaryRange, boolean incremental, boolean trace, int jobThreads, Collection<Range<Token>> ranges, boolean isSubrangeRepair, boolean ignoreUnreplicatedKeyspaces)
     {
         if (FBUtilities.isWindows() &&
             (DatabaseDescriptor.getDiskAccessMode() != Config.DiskAccessMode.standard || DatabaseDescriptor.getIndexAccessMode() != Config.DiskAccessMode.standard) &&
@@ -250,6 +253,7 @@ public class RepairOption
         this.jobThreads = jobThreads;
         this.ranges.addAll(ranges);
         this.isSubrangeRepair = isSubrangeRepair;
+        this.ignoreUnreplicatedKeyspaces = ignoreUnreplicatedKeyspaces;
     }
 
     public RepairParallelism getParallelism()
@@ -311,6 +315,10 @@ public class RepairOption
         return dataCenters.size() == 1 && dataCenters.contains(DatabaseDescriptor.getLocalDataCenter());
     }
 
+    public boolean ignoreUnreplicatedKeyspaces()
+    {
+        return ignoreUnreplicatedKeyspaces;
+    }
     @Override
     public String toString()
     {
@@ -323,6 +331,7 @@ public class RepairOption
                        ", dataCenters: " + dataCenters +
                        ", hosts: " + hosts +
                        ", # of ranges: " + ranges.size() +
+                       ", ignore unreplicated keyspaces: "+ ignoreUnreplicatedKeyspaces +
                        ')';
     }
 }
diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java
index 0aba23c..69e337e 100644
--- a/src/java/org/apache/cassandra/service/StorageService.java
+++ b/src/java/org/apache/cassandra/service/StorageService.java
@@ -3167,7 +3167,7 @@ public class StorageService extends NotificationBroadcasterSupport implements IE
             parallelism = RepairParallelism.PARALLEL;
         }
 
-        RepairOption options = new RepairOption(parallelism, primaryRange, !fullRepair, false, 1, Collections.<Range<Token>>emptyList(), false);
+        RepairOption options = new RepairOption(parallelism, primaryRange, !fullRepair, false, 1, Collections.<Range<Token>>emptyList(), false, false);
         if (dataCenters != null)
         {
             options.getDataCenters().addAll(dataCenters);
@@ -3259,7 +3259,7 @@ public class StorageService extends NotificationBroadcasterSupport implements IE
                         "The repair will occur but without anti-compaction.");
         Collection<Range<Token>> repairingRange = createRepairRangeFrom(beginToken, endToken);
 
-        RepairOption options = new RepairOption(parallelism, false, !fullRepair, false, 1, repairingRange, true);
+        RepairOption options = new RepairOption(parallelism, false, !fullRepair, false, 1, repairingRange, true, false);
         if (dataCenters != null)
         {
             options.getDataCenters().addAll(dataCenters);
diff --git a/src/java/org/apache/cassandra/tools/nodetool/Repair.java b/src/java/org/apache/cassandra/tools/nodetool/Repair.java
index 02bfc5b..928cae8 100644
--- a/src/java/org/apache/cassandra/tools/nodetool/Repair.java
+++ b/src/java/org/apache/cassandra/tools/nodetool/Repair.java
@@ -81,6 +81,9 @@ public class Repair extends NodeToolCmd
     @Option(title = "trace_repair", name = {"-tr", "--trace"}, description = "Use -tr to trace the repair. Traces are logged to system_traces.events.")
     private boolean trace = false;
 
+    @Option(title = "ignore_unreplicated_keyspaces", name = {"-iuk","--ignore-unreplicated-keyspaces"}, description = "Use --ignore-unreplicated-keyspaces to ignore keyspaces which are not replicated, otherwise the repair will fail")
+    private boolean ignoreUnreplicatedKeyspaces = false;
+
     @Override
     public void execute(NodeProbe probe)
     {
@@ -108,6 +111,8 @@ public class Repair extends NodeToolCmd
             options.put(RepairOption.JOB_THREADS_KEY, Integer.toString(numJobThreads));
             options.put(RepairOption.TRACE_KEY, Boolean.toString(trace));
             options.put(RepairOption.COLUMNFAMILIES_KEY, StringUtils.join(cfnames, ","));
+            options.put(RepairOption.IGNORE_UNREPLICATED_KS, Boolean.toString(ignoreUnreplicatedKeyspaces));
+
             if (!startToken.isEmpty() || !endToken.isEmpty())
             {
                 options.put(RepairOption.RANGES_KEY, startToken + ":" + endToken);
diff --git a/test/distributed/org/apache/cassandra/distributed/test/RepairOperationalTest.java b/test/distributed/org/apache/cassandra/distributed/test/RepairOperationalTest.java
new file mode 100644
index 0000000..2f85227
--- /dev/null
+++ b/test/distributed/org/apache/cassandra/distributed/test/RepairOperationalTest.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.distributed.test;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+import org.apache.cassandra.distributed.Cluster;
+import org.apache.cassandra.distributed.api.ConsistencyLevel;
+import org.apache.cassandra.distributed.api.IInvokableInstance;
+
+import static org.apache.cassandra.distributed.api.Feature.GOSSIP;
+import static org.apache.cassandra.distributed.api.Feature.NETWORK;
+import static org.junit.Assert.assertEquals;
+
+public class RepairOperationalTest extends TestBaseImpl
+{
+    @Test
+    public void repairUnreplicatedKStest() throws IOException
+    {
+        try (Cluster cluster = init(Cluster.build(4)
+                                          .withDCs(2)
+                                          .withConfig(config -> config.with(GOSSIP).with(NETWORK))
+                                          .start()))
+        {
+            cluster.schemaChange("alter keyspace "+KEYSPACE+" with replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}");
+            cluster.schemaChange("create table "+KEYSPACE+".tbl (id int primary key, i int)");
+            for (int i = 0; i < 10; i++)
+                cluster.coordinator(1).execute("insert into "+KEYSPACE+".tbl (id, i) values (?, ?)", ConsistencyLevel.ALL, i, i);
+            cluster.forEach(i -> i.flush(KEYSPACE));
+
+            cluster.get(3).nodetoolResult("repair", "-full", KEYSPACE , "tbl", "-st", "0", "-et", "1000")
+                   .asserts()
+                   .failure()
+                   .errorContains("Nothing to repair for (0,1000] in distributed_test_keyspace - aborting");
+            cluster.get(3).nodetoolResult("repair", "-full", KEYSPACE , "tbl", "-st", "0", "-et", "1000", "--ignore-unreplicated-keyspaces")
+                   .asserts()
+                   .success()
+                   .notificationContains("unreplicated keyspace is ignored since repair was called with --ignore-unreplicated-keyspaces");
+
+        }
+    }
+
+    @Test
+    public void dcFilterOnEmptyDC() throws IOException
+    {
+        try (Cluster cluster = Cluster.build().withRacks(2, 1, 2).start())
+        {
+            // 1-2 : datacenter1
+            // 3-4 : datacenter2
+            cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}");
+            cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int PRIMARY KEY, i int)");
+            for (int i = 0; i < 10; i++)
+                cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + ".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i);
+            cluster.forEach(i -> i.flush(KEYSPACE));
+
+            // choose a node in the DC that doesn't have any replicas
+            IInvokableInstance node = cluster.get(3);
+            assertEquals("datacenter2", node.config().localDatacenter());
+            // fails with "the local data center must be part of the repair"
+            node.nodetoolResult("repair", "-full",
+                                "-dc", "datacenter1", "-dc", "datacenter2",
+                                "--ignore-unreplicated-keyspaces",
+                                "-st", "0", "-et", "1000",
+                                KEYSPACE, "tbl")
+                .asserts().success();
+        }
+    }
+
+    @Test
+    public void hostFilterDifferentDC() throws IOException
+    {
+        try (Cluster cluster = Cluster.build().withRacks(2, 1, 2).start())
+        {
+            // 1-2 : datacenter1
+            // 3-4 : datacenter2
+            cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}");
+            cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int PRIMARY KEY, i int)");
+            for (int i = 0; i < 10; i++)
+                cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + ".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i);
+            cluster.forEach(i -> i.flush(KEYSPACE));
+
+            // choose a node in the DC that doesn't have any replicas
+            IInvokableInstance node = cluster.get(3);
+            assertEquals("datacenter2", node.config().localDatacenter());
+            // fails with "Specified hosts [127.0.0.3, 127.0.0.1] do not share range (0,1000] needed for repair. Either restrict repair ranges with -st/-et options, or specify one of the neighbors that share this range with this node: [].. Check the logs on the repair participants for further details"
+            node.nodetoolResult("repair", "-full",
+                                "-hosts", cluster.get(1).broadcastAddress().getAddress().getHostAddress(),
+                                "-hosts", node.broadcastAddress().getAddress().getHostAddress(),
+                                "--ignore-unreplicated-keyspaces",
+                                "-st", "0", "-et", "1000",
+                                KEYSPACE, "tbl")
+                .asserts().success();
+        }
+    }
+
+    @Test
+    public void emptyDC() throws IOException
+    {
+        try (Cluster cluster = Cluster.build().withRacks(2, 1, 2).start())
+        {
+            // 1-2 : datacenter1
+            // 3-4 : datacenter2
+            cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}");
+            cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int PRIMARY KEY, i int)");
+            for (int i = 0; i < 10; i++)
+                cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + ".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i);
+            cluster.forEach(i -> i.flush(KEYSPACE));
+
+            // choose a node in the DC that doesn't have any replicas
+            IInvokableInstance node = cluster.get(3);
+            assertEquals("datacenter2", node.config().localDatacenter());
+            // fails with [2020-09-10 11:30:04,139] Repair command #1 failed with error Nothing to repair for (0,1000] in distributed_test_keyspace - aborting. Check the logs on the repair participants for further details
+            node.nodetoolResult("repair", "-full",
+                                "--ignore-unreplicated-keyspaces",
+                                "-st", "0", "-et", "1000",
+                                KEYSPACE, "tbl")
+                .asserts().success();
+        }
+    }
+
+    @Test
+    public void mainDC() throws IOException
+    {
+        try (Cluster cluster = Cluster.build().withRacks(2, 1, 2).start())
+        {
+            // 1-2 : datacenter1
+            // 3-4 : datacenter2
+            cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}");
+            cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int PRIMARY KEY, i int)");
+            for (int i = 0; i < 10; i++)
+                cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + ".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i);
+            cluster.forEach(i -> i.flush(KEYSPACE));
+
+            // choose a node in the DC that doesn't have any replicas
+            IInvokableInstance node = cluster.get(1);
+            assertEquals("datacenter1", node.config().localDatacenter());
+            node.nodetoolResult("repair", "-full",
+                                "--ignore-unreplicated-keyspaces",
+                                "-st", "0", "-et", "1000",
+                                KEYSPACE, "tbl")
+                .asserts().success();
+        }
+    }
+
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org