You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@cassandra.apache.org by ma...@apache.org on 2021/09/01 08:54:24 UTC

[cassandra] branch cassandra-4.0 updated: Delay auth setup until after gossip has settled to avoid unavailables on startup

This is an automated email from the ASF dual-hosted git repository.

marcuse pushed a commit to branch cassandra-4.0
in repository https://gitbox.apache.org/repos/asf/cassandra.git


The following commit(s) were added to refs/heads/cassandra-4.0 by this push:
     new c36c081  Delay auth setup until after gossip has settled to avoid unavailables on startup
c36c081 is described below

commit c36c081e5c33362daae748c2da1be4da9ef18fa6
Author: Marcus Eriksson <ma...@apache.org>
AuthorDate: Thu Jul 1 14:08:25 2021 +0200

    Delay auth setup until after gossip has settled to avoid unavailables on startup
    
    Patch by marcuse; reviewed by Sam Tunnicliffe for CASSANDRA-16783
    
    Co-authored-by: Caleb Rackliffe <ca...@gmail.com>
---
 CHANGES.txt                                        |  1 +
 .../apache/cassandra/service/CassandraDaemon.java  |  2 +
 .../apache/cassandra/service/StorageService.java   | 14 +++++-
 .../cassandra/distributed/impl/Instance.java       |  3 +-
 .../cassandra/distributed/test/AuthTest.java       | 56 ++++++++++++++++++++++
 .../distributed/test/RepairDigestTrackingTest.java | 35 +++++++++++++-
 6 files changed, 107 insertions(+), 4 deletions(-)

diff --git a/CHANGES.txt b/CHANGES.txt
index f79cf30..c808128 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 4.0.2
+ * Delay auth setup until after gossip has settled to avoid unavailables on startup (CASSANDRA-16783)
  * Fix clustering order logic in CREATE MATERIALIZED VIEW (CASSANDRA-16898)
 
 4.0.1
diff --git a/src/java/org/apache/cassandra/service/CassandraDaemon.java b/src/java/org/apache/cassandra/service/CassandraDaemon.java
index 2cb1254..4844b84 100644
--- a/src/java/org/apache/cassandra/service/CassandraDaemon.java
+++ b/src/java/org/apache/cassandra/service/CassandraDaemon.java
@@ -440,6 +440,8 @@ public class CassandraDaemon
         if (!FBUtilities.getBroadcastAddressAndPort().equals(InetAddressAndPort.getLoopbackAddress()))
             Gossiper.waitToSettle();
 
+        StorageService.instance.doAuthSetup(false);
+
         // re-enable auto-compaction after gossip is settled, so correct disk boundaries are used
         for (Keyspace keyspace : Keyspace.all())
         {
diff --git a/src/java/org/apache/cassandra/service/StorageService.java b/src/java/org/apache/cassandra/service/StorageService.java
index 6c72682..9495b8a 100644
--- a/src/java/org/apache/cassandra/service/StorageService.java
+++ b/src/java/org/apache/cassandra/service/StorageService.java
@@ -1130,6 +1130,7 @@ public class StorageService extends NotificationBroadcasterSupport implements IE
             try
             {
                 joinTokenRing(0);
+                doAuthSetup(false);
             }
             catch (ConfigurationException e)
             {
@@ -1144,6 +1145,7 @@ public class StorageService extends NotificationBroadcasterSupport implements IE
             {
                 logger.info("Leaving write survey mode and joining ring at operator request");
                 finishJoiningRing(resumedBootstrap, SystemKeyspace.getSavedTokens());
+                doAuthSetup(false);
                 isSurveyMode = false;
                 daemon.start();
             }
@@ -1176,10 +1178,10 @@ public class StorageService extends NotificationBroadcasterSupport implements IE
         setTokens(tokens);
 
         assert tokenMetadata.sortedTokens().size() > 0;
-        doAuthSetup(false);
     }
 
-    private void doAuthSetup(boolean setUpSchema)
+    @VisibleForTesting
+    public void doAuthSetup(boolean setUpSchema)
     {
         if (!authSetupCalled.getAndSet(true))
         {
@@ -1204,6 +1206,13 @@ public class StorageService extends NotificationBroadcasterSupport implements IE
     }
 
     @VisibleForTesting
+    public boolean authSetupCalled()
+    {
+        return authSetupCalled.get();
+    }
+
+
+    @VisibleForTesting
     public void setUpDistributedSystemKeyspaces()
     {
         Collection<Mutation> changes = new ArrayList<>(3);
@@ -1847,6 +1856,7 @@ public class StorageService extends NotificationBroadcasterSupport implements IE
                             isSurveyMode = false;
                             progressSupport.progress("bootstrap", ProgressEvent.createNotification("Joining ring..."));
                             finishJoiningRing(true, bootstrapTokens);
+                            doAuthSetup(false);
                         }
                         progressSupport.progress("bootstrap", new ProgressEvent(ProgressEventType.COMPLETE, 1, 1, "Resume bootstrap complete"));
                         if (!isNativeTransportRunning())
diff --git a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java
index d772d51..971a2d4 100644
--- a/test/distributed/org/apache/cassandra/distributed/impl/Instance.java
+++ b/test/distributed/org/apache/cassandra/distributed/impl/Instance.java
@@ -561,7 +561,8 @@ public class Instance extends IsolatedExecutor implements IInvokableInstance
 
                 SystemKeyspace.finishStartup();
 
-                CassandraDaemon.getInstanceForTesting().setupCompleted();
+                StorageService.instance.doAuthSetup(false);
+                CassandraDaemon.getInstanceForTesting().completeSetup();
 
                 if (config.has(NATIVE_PROTOCOL))
                 {
diff --git a/test/distributed/org/apache/cassandra/distributed/test/AuthTest.java b/test/distributed/org/apache/cassandra/distributed/test/AuthTest.java
new file mode 100644
index 0000000..4f75080
--- /dev/null
+++ b/test/distributed/org/apache/cassandra/distributed/test/AuthTest.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.cassandra.distributed.test;
+
+import java.io.IOException;
+import java.util.concurrent.TimeUnit;
+
+import com.google.common.util.concurrent.Uninterruptibles;
+import org.junit.Test;
+
+import org.apache.cassandra.distributed.Cluster;
+import org.apache.cassandra.service.StorageService;
+
+import static org.junit.Assert.assertTrue;
+
+public class AuthTest extends TestBaseImpl
+{
+
+    /**
+     * Simply tests that initialisation of a test Instance results in
+     * StorageService.instance.doAuthSetup being called as the regular
+     * startup does in CassandraDaemon.setup
+     */
+    @Test
+    public void authSetupIsCalledAfterStartup() throws IOException
+    {
+        try (Cluster cluster = Cluster.build().withNodes(1).start())
+        {
+            boolean setupCalled = cluster.get(1).callOnInstance(() -> {
+                long maxWait = TimeUnit.NANOSECONDS.convert(10, TimeUnit.SECONDS);
+                long start = System.nanoTime();
+                while (!StorageService.instance.authSetupCalled() && System.nanoTime() - start < maxWait)
+                    Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS);
+
+                return StorageService.instance.authSetupCalled();
+            });
+            assertTrue(setupCalled);
+        }
+    }
+}
diff --git a/test/distributed/org/apache/cassandra/distributed/test/RepairDigestTrackingTest.java b/test/distributed/org/apache/cassandra/distributed/test/RepairDigestTrackingTest.java
index c8fc088..a4daceb 100644
--- a/test/distributed/org/apache/cassandra/distributed/test/RepairDigestTrackingTest.java
+++ b/test/distributed/org/apache/cassandra/distributed/test/RepairDigestTrackingTest.java
@@ -28,6 +28,12 @@ import java.util.stream.Stream;
 
 import com.google.common.util.concurrent.Uninterruptibles;
 import org.apache.cassandra.concurrent.SEPExecutor;
+import org.apache.cassandra.dht.Token;
+import org.apache.cassandra.locator.AbstractReplicationStrategy;
+import org.apache.cassandra.locator.EndpointsForToken;
+import org.apache.cassandra.locator.InetAddressAndPort;
+import org.apache.cassandra.locator.ReplicaLayout;
+import org.apache.cassandra.locator.ReplicaUtils;
 import org.apache.cassandra.utils.Throwables;
 import org.junit.Assert;
 import org.junit.Test;
@@ -53,6 +59,7 @@ import org.apache.cassandra.service.StorageProxy.LocalReadRunnable;
 import org.apache.cassandra.utils.DiagnosticSnapshotService;
 
 import static net.bytebuddy.matcher.ElementMatchers.named;
+import static net.bytebuddy.matcher.ElementMatchers.takesArguments;
 import static org.apache.cassandra.distributed.api.Feature.GOSSIP;
 import static org.apache.cassandra.distributed.api.Feature.NETWORK;
 import static org.apache.cassandra.distributed.shared.AssertUtils.assertRows;
@@ -359,6 +366,9 @@ public class RepairDigestTrackingTest extends TestBaseImpl
      * entirely within the scope of single {@link LocalReadRunnable}, but this test still attempts to validate some
      * assumptions about the cleanliness of the logs and the correctness of queries made when initial local reads and
      * local reads triggered by read repair (after speculative reads) execute at roughly the same time.
+     *
+     * This test depends on whether node1 gets a data or a digest request first, we force it to be a digest request
+     * in the forTokenReadLiveSorted ByteBuddy rule below.
      */
     @Test
     public void testLocalDataAndRemoteRequestConcurrency() throws Exception
@@ -397,7 +407,7 @@ public class RepairDigestTrackingTest extends TestBaseImpl
     public static class BBHelper
     {
         private static final CyclicBarrier barrier = new CyclicBarrier(2);
-        
+
         public static void install(ClassLoader classLoader, Integer num)
         {
             // Only install on the coordinating node, which is also a replica...
@@ -414,6 +424,12 @@ public class RepairDigestTrackingTest extends TestBaseImpl
                                .intercept(MethodDelegation.to(BBHelper.class))
                                .make()
                                .load(classLoader, ClassLoadingStrategy.Default.INJECTION);
+
+                new ByteBuddy().rebase(ReplicaLayout.class)
+                               .method(named("forTokenReadLiveSorted").and(takesArguments(AbstractReplicationStrategy.class, Token.class)))
+                               .intercept(MethodDelegation.to(BBHelper.class))
+                               .make()
+                               .load(classLoader, ClassLoadingStrategy.Default.INJECTION);
             }
         }
 
@@ -443,6 +459,23 @@ public class RepairDigestTrackingTest extends TestBaseImpl
                 throw Throwables.unchecked(e);
             }
         }
+
+        @SuppressWarnings({ "unused" })
+        public static ReplicaLayout.ForTokenRead forTokenReadLiveSorted(AbstractReplicationStrategy replicationStrategy, Token token)
+        {
+            try
+            {
+                EndpointsForToken.Builder builder = EndpointsForToken.builder(token, 3);
+                builder.add(ReplicaUtils.full(InetAddressAndPort.getByName("127.0.0.3")));
+                builder.add(ReplicaUtils.full(InetAddressAndPort.getByName("127.0.0.2")));
+                builder.add(ReplicaUtils.full(InetAddressAndPort.getByName("127.0.0.1")));
+                return new ReplicaLayout.ForTokenRead(replicationStrategy, builder.build());
+            }
+            catch (Exception e)
+            {
+                throw Throwables.unchecked(e);
+            }
+        }
     }
 
     private Object[][] rows(Object[][] head, Object[][]...tail)

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org