You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@geode.apache.org by zh...@apache.org on 2017/08/02 17:41:54 UTC

geode git commit: GEODE-3055: The old primary's the shadow bucket is not initialized when rebalance remove it. Thus the new primary candidate can never initialize from it. The fix is to wait until new primary exists before remove the old primary's bucket

Repository: geode
Updated Branches:
  refs/heads/develop 867ac0691 -> bb847c767


GEODE-3055: The old primary's the shadow bucket is not
initialized when rebalance remove it. Thus the new primary candidate can
never initialize from it. The fix is to wait until new primary exists before
remove the old primary's bucket in rebalance.

When the colocated child bucket failed to initialize, remove the leader
bucket since all these buckets should be created atomically.

This closes #570


Project: http://git-wip-us.apache.org/repos/asf/geode/repo
Commit: http://git-wip-us.apache.org/repos/asf/geode/commit/bb847c76
Tree: http://git-wip-us.apache.org/repos/asf/geode/tree/bb847c76
Diff: http://git-wip-us.apache.org/repos/asf/geode/diff/bb847c76

Branch: refs/heads/develop
Commit: bb847c767f1507eed8c26dee02b377c65d43a0f7
Parents: 867ac06
Author: zhouxh <gz...@pivotal.io>
Authored: Thu Jun 8 14:52:56 2017 -0700
Committer: zhouxh <gz...@pivotal.io>
Committed: Wed Aug 2 10:40:39 2017 -0700

----------------------------------------------------------------------
 .../cache/PartitionedRegionDataStore.java       | 77 +++++++++++++++++++-
 1 file changed, 74 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/geode/blob/bb847c76/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
----------------------------------------------------------------------
diff --git a/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java b/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
index 0318c75..a63a7dd 100644
--- a/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
+++ b/geode-core/src/main/java/org/apache/geode/internal/cache/PartitionedRegionDataStore.java
@@ -494,6 +494,18 @@ public class PartitionedRegionDataStore implements HasCachePerfStats {
 
       return result;
 
+    } catch (RuntimeException validationException) {
+      // GEODE-3055
+      PartitionedRegion leader = ColocationHelper.getLeaderRegion(this.partitionedRegion);
+      boolean isLeader = leader.equals(this.partitionedRegion);
+      if (!isLeader) {
+        leader.getDataStore().removeBucket(possiblyFreeBucketId, true);
+        logger.info(
+            "For bucket " + possiblyFreeBucketId + ", failed to create cololcated child bucket for "
+                + this.partitionedRegion.getFullPath() + ", removed leader region "
+                + leader.getFullPath() + " bucket.");
+      }
+      throw validationException;
     } finally {
       this.partitionedRegion.getPrStats().endBucketCreate(startTime, createdBucket, isRebalance);
     }
@@ -1417,6 +1429,39 @@ public class PartitionedRegionDataStore implements HasCachePerfStats {
     }
   }
 
+  public boolean isRemotePrimaryReadyForColocatedChildren(int bucketId) {
+    boolean isRemotePrimaryReady = true;
+    InternalDistributedMember myId =
+        this.partitionedRegion.getDistributionManager().getDistributionManagerId();
+
+    List<PartitionedRegion> colocatedChildPRs =
+        ColocationHelper.getColocatedChildRegions(this.partitionedRegion);
+    if (colocatedChildPRs != null) {
+      for (PartitionedRegion pr : colocatedChildPRs) {
+        InternalDistributedMember primaryChild = pr.getBucketPrimary(bucketId);
+        if (logger.isDebugEnabled()) {
+          logger.debug("Checking colocated child bucket " + pr + ", bucketId=" + bucketId
+              + ", primary is " + primaryChild);
+        }
+        if (primaryChild == null || myId.equals(primaryChild)) {
+          if (logger.isDebugEnabled()) {
+            logger.debug("Colocated bucket region " + pr + " " + bucketId
+                + " does not have a remote primary yet. Not to remove.");
+          }
+          return false;
+        } else {
+          if (logger.isDebugEnabled()) {
+            logger
+                .debug(pr + " bucketId=" + bucketId + " has remote primary, checking its children");
+          }
+          isRemotePrimaryReady = isRemotePrimaryReady
+              && pr.getDataStore().isRemotePrimaryReadyForColocatedChildren(bucketId);
+        }
+      }
+    }
+    return isRemotePrimaryReady;
+  }
+
   /**
    * Removes a redundant bucket hosted by this data store. The rebalancer invokes this method
    * directly or sends this member a message to invoke it.
@@ -1450,7 +1495,8 @@ public class PartitionedRegionDataStore implements HasCachePerfStats {
 
     // Don't allow the removal of a bucket if we haven't
     // finished recovering from disk
-    if (!this.partitionedRegion.getRedundancyProvider().isPersistentRecoveryComplete()) {
+    if (!forceRemovePrimary
+        && !this.partitionedRegion.getRedundancyProvider().isPersistentRecoveryComplete()) {
       if (logger.isDebugEnabled()) {
         logger.debug(
             "Returning false from removeBucket because we have not finished recovering all colocated regions from disk");
@@ -1471,7 +1517,11 @@ public class PartitionedRegionDataStore implements HasCachePerfStats {
 
       }
 
+      PartitionedRegion leader = ColocationHelper.getLeaderRegion(this.partitionedRegion);
+      boolean isLeader = leader.equals(this.partitionedRegion);
       BucketAdvisor bucketAdvisor = bucketRegion.getBucketAdvisor();
+      InternalDistributedMember myId =
+          this.partitionedRegion.getDistributionManager().getDistributionManagerId();
       Lock writeLock = bucketAdvisor.getActiveWriteLock();
 
       // Fix for 43613 - don't remove the bucket
@@ -1480,10 +1530,33 @@ public class PartitionedRegionDataStore implements HasCachePerfStats {
       // member is no longer hosting the bucket.
       writeLock.lock();
       try {
+        // forceRemovePrimary==true will enable remove the bucket even when:
+        // 1) it's primary
+        // 2) no other primary ready yet
+        // 3) colocated bucket and its child is not completely ready
         if (!forceRemovePrimary && bucketAdvisor.isPrimary()) {
           return false;
         }
 
+        if (isLeader) {
+          if (!forceRemovePrimary && !isRemotePrimaryReadyForColocatedChildren(bucketId)) {
+            return false;
+          }
+
+          InternalDistributedMember primary = bucketAdvisor.getPrimary();
+          if (!forceRemovePrimary && (primary == null || myId.equals(primary))) {
+            if (logger.isDebugEnabled()) {
+              logger.debug("Bucket region " + bucketRegion
+                  + " does not have a remote primary yet. Not to remove.");
+            }
+            return false;
+          }
+
+          if (logger.isDebugEnabled()) {
+            logger.debug("Bucket region " + bucketRegion + " has primary at " + primary);
+          }
+        }
+
         // recurse down to each tier of children to remove first
         removeBucketForColocatedChildren(bucketId, forceRemovePrimary);
 
@@ -1513,8 +1586,6 @@ public class PartitionedRegionDataStore implements HasCachePerfStats {
       // because it won't block write operations while we're trying to acquire
       // the activePrimaryMoveLock
       InternalDistributedMember primary = bucketAdvisor.getPrimary();
-      InternalDistributedMember myId =
-          this.partitionedRegion.getDistributionManager().getDistributionManagerId();
       if (!myId.equals(primary)) {
         StateFlushOperation flush = new StateFlushOperation(bucketRegion);
         int executor = DistributionManager.WAITING_POOL_EXECUTOR;