You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by dc...@apache.org on 2020/09/25 04:19:42 UTC
[cassandra] branch cassandra-3.0 updated: Add flag to ignore
unreplicated keyspaces during repair
This is an automated email from the ASF dual-hosted git repository.
dcapwell pushed a commit to branch cassandra-3.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git
The following commit(s) were added to refs/heads/cassandra-3.0 by this push:
new 8a4c1d7 Add flag to ignore unreplicated keyspaces during repair
8a4c1d7 is described below
commit 8a4c1d7a485bbd2c12864d27e2705133ebbaabc0
Author: Marcus Eriksson <ma...@apache.org>
AuthorDate: Thu Sep 24 19:31:48 2020 -0700
Add flag to ignore unreplicated keyspaces during repair
patch by Marcus Eriksson; reviewed by Blake Eggleston, David Capwell for CASSANDRA-15160
---
CHANGES.txt | 1 +
.../apache/cassandra/repair/RepairRunnable.java | 29 ++++
.../cassandra/repair/messages/RepairOption.java | 13 +-
.../apache/cassandra/service/StorageService.java | 4 +-
.../apache/cassandra/tools/nodetool/Repair.java | 5 +
.../distributed/test/RepairOperationalTest.java | 163 +++++++++++++++++++++
6 files changed, 211 insertions(+), 4 deletions(-)
diff --git a/CHANGES.txt b/CHANGES.txt
index f3df5d0..5f326ce 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,6 +1,7 @@
3.0.23:
* Avoid failing compactions with very large partitions (CASSANDRA-15164)
* Use IF NOT EXISTS for index and UDT create statements in snapshot schema files (CASSANDRA-13935)
+ * Add flag to ignore unreplicated keyspaces during repair (CASSANDRA-15160)
3.0.22:
* Fix gossip shutdown order (CASSANDRA-15816)
diff --git a/src/java/org/apache/cassandra/repair/RepairRunnable.java b/src/java/org/apache/cassandra/repair/RepairRunnable.java
index 774409f..a7c17c9 100644
--- a/src/java/org/apache/cassandra/repair/RepairRunnable.java
+++ b/src/java/org/apache/cassandra/repair/RepairRunnable.java
@@ -172,6 +172,21 @@ public class RepairRunnable extends WrappedRunnable implements ProgressEventNoti
options.getDataCenters(),
options.getHosts());
+ if (neighbors.isEmpty())
+ {
+ if (options.ignoreUnreplicatedKeyspaces())
+ {
+ logger.info("Found no neighbors for range {} for {} - ignoring since repairing with --ignore-unreplicated-keyspaces", range, keyspace);
+ continue;
+ }
+ else
+ {
+ String errorMessage = String.format("Nothing to repair for %s in %s - aborting", range, keyspace);
+ logger.error("Repair {}", errorMessage);
+ fireErrorAndComplete(tag, progress.get(), totalProgress, errorMessage);
+ return;
+ }
+ }
addRangeToNeighbors(commonRanges, range, neighbors);
allNeighbors.addAll(neighbors);
}
@@ -185,6 +200,20 @@ public class RepairRunnable extends WrappedRunnable implements ProgressEventNoti
return;
}
+ if (options.ignoreUnreplicatedKeyspaces() && allNeighbors.isEmpty())
+ {
+ String ignoreUnreplicatedMessage = String.format("Nothing to repair for %s in %s - unreplicated keyspace is ignored since repair was called with --ignore-unreplicated-keyspaces",
+ options.getRanges(),
+ keyspace);
+
+ logger.info("Repair {}", ignoreUnreplicatedMessage);
+ fireProgressEvent(tag, new ProgressEvent(ProgressEventType.COMPLETE,
+ progress.get(),
+ totalProgress,
+ ignoreUnreplicatedMessage));
+ return;
+ }
+
// Validate columnfamilies
List<ColumnFamilyStore> columnFamilyStores = new ArrayList<>();
try
diff --git a/src/java/org/apache/cassandra/repair/messages/RepairOption.java b/src/java/org/apache/cassandra/repair/messages/RepairOption.java
index 9d60ad7..5d56d3a 100644
--- a/src/java/org/apache/cassandra/repair/messages/RepairOption.java
+++ b/src/java/org/apache/cassandra/repair/messages/RepairOption.java
@@ -45,6 +45,7 @@ public class RepairOption
public static final String DATACENTERS_KEY = "dataCenters";
public static final String HOSTS_KEY = "hosts";
public static final String TRACE_KEY = "trace";
+ public static final String IGNORE_UNREPLICATED_KS = "ignoreUnreplicatedKeyspaces";
// we don't want to push nodes too much for repair
public static final int MAX_JOB_THREADS = 4;
@@ -129,6 +130,7 @@ public class RepairOption
boolean primaryRange = Boolean.parseBoolean(options.get(PRIMARY_RANGE_KEY));
boolean incremental = Boolean.parseBoolean(options.get(INCREMENTAL_KEY));
boolean trace = Boolean.parseBoolean(options.get(TRACE_KEY));
+ boolean ignoreUnreplicatedKeyspaces = Boolean.parseBoolean(options.get(IGNORE_UNREPLICATED_KS));
int jobThreads = 1;
if (options.containsKey(JOB_THREADS_KEY))
@@ -166,7 +168,7 @@ public class RepairOption
}
}
- RepairOption option = new RepairOption(parallelism, primaryRange, incremental, trace, jobThreads, ranges, !ranges.isEmpty());
+ RepairOption option = new RepairOption(parallelism, primaryRange, incremental, trace, jobThreads, ranges, !ranges.isEmpty(), ignoreUnreplicatedKeyspaces);
// data centers
String dataCentersStr = options.get(DATACENTERS_KEY);
@@ -226,13 +228,14 @@ public class RepairOption
private final boolean trace;
private final int jobThreads;
private final boolean isSubrangeRepair;
+ private final boolean ignoreUnreplicatedKeyspaces;
private final Collection<String> columnFamilies = new HashSet<>();
private final Collection<String> dataCenters = new HashSet<>();
private final Collection<String> hosts = new HashSet<>();
private final Collection<Range<Token>> ranges = new HashSet<>();
- public RepairOption(RepairParallelism parallelism, boolean primaryRange, boolean incremental, boolean trace, int jobThreads, Collection<Range<Token>> ranges, boolean isSubrangeRepair)
+ public RepairOption(RepairParallelism parallelism, boolean primaryRange, boolean incremental, boolean trace, int jobThreads, Collection<Range<Token>> ranges, boolean isSubrangeRepair, boolean ignoreUnreplicatedKeyspaces)
{
if (FBUtilities.isWindows() &&
(DatabaseDescriptor.getDiskAccessMode() != Config.DiskAccessMode.standard || DatabaseDescriptor.getIndexAccessMode() != Config.DiskAccessMode.standard) &&
@@ -250,6 +253,7 @@ public class RepairOption
this.jobThreads = jobThreads;
this.ranges.addAll(ranges);
this.isSubrangeRepair = isSubrangeRepair;
+ this.ignoreUnreplicatedKeyspaces = ignoreUnreplicatedKeyspaces;
}
public RepairParallelism getParallelism()
@@ -311,6 +315,10 @@ public class RepairOption
return dataCenters.size() == 1 && dataCenters.contains(DatabaseDescriptor.getLocalDataCenter());
}
+ public boolean ignoreUnreplicatedKeyspaces()
+ {
+ return ignoreUnreplicatedKeyspaces;
+ }
@Override
public String toString()
{
@@ -323,6 +331,7 @@ public class RepairOption
", dataCenters: " + dataCenters +
", hosts: " + hosts +
", # of ranges: " + ranges.size() +
+ ", ignore unreplicated keyspaces: "+ ignoreUnreplicatedKeyspaces +
')';
}
}
diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java
index 0aba23c..69e337e 100644
--- a/src/java/org/apache/cassandra/service/StorageService.java
+++ b/src/java/org/apache/cassandra/service/StorageService.java
@@ -3167,7 +3167,7 @@ public class StorageService extends NotificationBroadcasterSupport implements IE
parallelism = RepairParallelism.PARALLEL;
}
- RepairOption options = new RepairOption(parallelism, primaryRange, !fullRepair, false, 1, Collections.<Range<Token>>emptyList(), false);
+ RepairOption options = new RepairOption(parallelism, primaryRange, !fullRepair, false, 1, Collections.<Range<Token>>emptyList(), false, false);
if (dataCenters != null)
{
options.getDataCenters().addAll(dataCenters);
@@ -3259,7 +3259,7 @@ public class StorageService extends NotificationBroadcasterSupport implements IE
"The repair will occur but without anti-compaction.");
Collection<Range<Token>> repairingRange = createRepairRangeFrom(beginToken, endToken);
- RepairOption options = new RepairOption(parallelism, false, !fullRepair, false, 1, repairingRange, true);
+ RepairOption options = new RepairOption(parallelism, false, !fullRepair, false, 1, repairingRange, true, false);
if (dataCenters != null)
{
options.getDataCenters().addAll(dataCenters);
diff --git a/src/java/org/apache/cassandra/tools/nodetool/Repair.java b/src/java/org/apache/cassandra/tools/nodetool/Repair.java
index 02bfc5b..928cae8 100644
--- a/src/java/org/apache/cassandra/tools/nodetool/Repair.java
+++ b/src/java/org/apache/cassandra/tools/nodetool/Repair.java
@@ -81,6 +81,9 @@ public class Repair extends NodeToolCmd
@Option(title = "trace_repair", name = {"-tr", "--trace"}, description = "Use -tr to trace the repair. Traces are logged to system_traces.events.")
private boolean trace = false;
+ @Option(title = "ignore_unreplicated_keyspaces", name = {"-iuk","--ignore-unreplicated-keyspaces"}, description = "Use --ignore-unreplicated-keyspaces to ignore keyspaces which are not replicated, otherwise the repair will fail")
+ private boolean ignoreUnreplicatedKeyspaces = false;
+
@Override
public void execute(NodeProbe probe)
{
@@ -108,6 +111,8 @@ public class Repair extends NodeToolCmd
options.put(RepairOption.JOB_THREADS_KEY, Integer.toString(numJobThreads));
options.put(RepairOption.TRACE_KEY, Boolean.toString(trace));
options.put(RepairOption.COLUMNFAMILIES_KEY, StringUtils.join(cfnames, ","));
+ options.put(RepairOption.IGNORE_UNREPLICATED_KS, Boolean.toString(ignoreUnreplicatedKeyspaces));
+
if (!startToken.isEmpty() || !endToken.isEmpty())
{
options.put(RepairOption.RANGES_KEY, startToken + ":" + endToken);
diff --git a/test/distributed/org/apache/cassandra/distributed/test/RepairOperationalTest.java b/test/distributed/org/apache/cassandra/distributed/test/RepairOperationalTest.java
new file mode 100644
index 0000000..2f85227
--- /dev/null
+++ b/test/distributed/org/apache/cassandra/distributed/test/RepairOperationalTest.java
@@ -0,0 +1,163 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.distributed.test;
+
+import java.io.IOException;
+
+import org.junit.Test;
+
+import org.apache.cassandra.distributed.Cluster;
+import org.apache.cassandra.distributed.api.ConsistencyLevel;
+import org.apache.cassandra.distributed.api.IInvokableInstance;
+
+import static org.apache.cassandra.distributed.api.Feature.GOSSIP;
+import static org.apache.cassandra.distributed.api.Feature.NETWORK;
+import static org.junit.Assert.assertEquals;
+
+public class RepairOperationalTest extends TestBaseImpl
+{
+ @Test
+ public void repairUnreplicatedKStest() throws IOException
+ {
+ try (Cluster cluster = init(Cluster.build(4)
+ .withDCs(2)
+ .withConfig(config -> config.with(GOSSIP).with(NETWORK))
+ .start()))
+ {
+ cluster.schemaChange("alter keyspace "+KEYSPACE+" with replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}");
+ cluster.schemaChange("create table "+KEYSPACE+".tbl (id int primary key, i int)");
+ for (int i = 0; i < 10; i++)
+ cluster.coordinator(1).execute("insert into "+KEYSPACE+".tbl (id, i) values (?, ?)", ConsistencyLevel.ALL, i, i);
+ cluster.forEach(i -> i.flush(KEYSPACE));
+
+ cluster.get(3).nodetoolResult("repair", "-full", KEYSPACE , "tbl", "-st", "0", "-et", "1000")
+ .asserts()
+ .failure()
+ .errorContains("Nothing to repair for (0,1000] in distributed_test_keyspace - aborting");
+ cluster.get(3).nodetoolResult("repair", "-full", KEYSPACE , "tbl", "-st", "0", "-et", "1000", "--ignore-unreplicated-keyspaces")
+ .asserts()
+ .success()
+ .notificationContains("unreplicated keyspace is ignored since repair was called with --ignore-unreplicated-keyspaces");
+
+ }
+ }
+
+ @Test
+ public void dcFilterOnEmptyDC() throws IOException
+ {
+ try (Cluster cluster = Cluster.build().withRacks(2, 1, 2).start())
+ {
+ // 1-2 : datacenter1
+ // 3-4 : datacenter2
+ cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}");
+ cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int PRIMARY KEY, i int)");
+ for (int i = 0; i < 10; i++)
+ cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + ".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i);
+ cluster.forEach(i -> i.flush(KEYSPACE));
+
+ // choose a node in the DC that doesn't have any replicas
+ IInvokableInstance node = cluster.get(3);
+ assertEquals("datacenter2", node.config().localDatacenter());
+ // fails with "the local data center must be part of the repair"
+ node.nodetoolResult("repair", "-full",
+ "-dc", "datacenter1", "-dc", "datacenter2",
+ "--ignore-unreplicated-keyspaces",
+ "-st", "0", "-et", "1000",
+ KEYSPACE, "tbl")
+ .asserts().success();
+ }
+ }
+
+ @Test
+ public void hostFilterDifferentDC() throws IOException
+ {
+ try (Cluster cluster = Cluster.build().withRacks(2, 1, 2).start())
+ {
+ // 1-2 : datacenter1
+ // 3-4 : datacenter2
+ cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}");
+ cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int PRIMARY KEY, i int)");
+ for (int i = 0; i < 10; i++)
+ cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + ".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i);
+ cluster.forEach(i -> i.flush(KEYSPACE));
+
+ // choose a node in the DC that doesn't have any replicas
+ IInvokableInstance node = cluster.get(3);
+ assertEquals("datacenter2", node.config().localDatacenter());
+ // fails with "Specified hosts [127.0.0.3, 127.0.0.1] do not share range (0,1000] needed for repair. Either restrict repair ranges with -st/-et options, or specify one of the neighbors that share this range with this node: [].. Check the logs on the repair participants for further details"
+ node.nodetoolResult("repair", "-full",
+ "-hosts", cluster.get(1).broadcastAddress().getAddress().getHostAddress(),
+ "-hosts", node.broadcastAddress().getAddress().getHostAddress(),
+ "--ignore-unreplicated-keyspaces",
+ "-st", "0", "-et", "1000",
+ KEYSPACE, "tbl")
+ .asserts().success();
+ }
+ }
+
+ @Test
+ public void emptyDC() throws IOException
+ {
+ try (Cluster cluster = Cluster.build().withRacks(2, 1, 2).start())
+ {
+ // 1-2 : datacenter1
+ // 3-4 : datacenter2
+ cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}");
+ cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int PRIMARY KEY, i int)");
+ for (int i = 0; i < 10; i++)
+ cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + ".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i);
+ cluster.forEach(i -> i.flush(KEYSPACE));
+
+ // choose a node in the DC that doesn't have any replicas
+ IInvokableInstance node = cluster.get(3);
+ assertEquals("datacenter2", node.config().localDatacenter());
+ // fails with [2020-09-10 11:30:04,139] Repair command #1 failed with error Nothing to repair for (0,1000] in distributed_test_keyspace - aborting. Check the logs on the repair participants for further details
+ node.nodetoolResult("repair", "-full",
+ "--ignore-unreplicated-keyspaces",
+ "-st", "0", "-et", "1000",
+ KEYSPACE, "tbl")
+ .asserts().success();
+ }
+ }
+
+ @Test
+ public void mainDC() throws IOException
+ {
+ try (Cluster cluster = Cluster.build().withRacks(2, 1, 2).start())
+ {
+ // 1-2 : datacenter1
+ // 3-4 : datacenter2
+ cluster.schemaChange("CREATE KEYSPACE " + KEYSPACE + " WITH replication = {'class': 'NetworkTopologyStrategy', 'datacenter1':2, 'datacenter2':0}");
+ cluster.schemaChange("CREATE TABLE " + KEYSPACE + ".tbl (id int PRIMARY KEY, i int)");
+ for (int i = 0; i < 10; i++)
+ cluster.coordinator(1).execute("INSERT INTO " + KEYSPACE + ".tbl (id, i) VALUES (?, ?)", ConsistencyLevel.ALL, i, i);
+ cluster.forEach(i -> i.flush(KEYSPACE));
+
+ // choose a node in the DC that doesn't have any replicas
+ IInvokableInstance node = cluster.get(1);
+ assertEquals("datacenter1", node.config().localDatacenter());
+ node.nodetoolResult("repair", "-full",
+ "--ignore-unreplicated-keyspaces",
+ "-st", "0", "-et", "1000",
+ KEYSPACE, "tbl")
+ .asserts().success();
+ }
+ }
+
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org