You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@zookeeper.apache.org by iv...@apache.org on 2013/08/13 15:02:00 UTC
svn commit: r1513467 - in /zookeeper/bookkeeper/branches/branch-4.2: ./
bookkeeper-server/src/main/java/org/apache/bookkeeper/client/
bookkeeper-server/src/test/java/org/apache/bookkeeper/client/
Author: ivank
Date: Tue Aug 13 13:01:59 2013
New Revision: 1513467
URL: http://svn.apache.org/r1513467
Log:
BOOKKEEPER-667: Client write will fail with BadMetadataVersion in case of multiple Bookie failures with AutoRecovery enabled (sijie via ivank)
Modified:
zookeeper/bookkeeper/branches/branch-4.2/CHANGES.txt
zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java
zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadata.java
zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieRecoveryTest.java
Modified: zookeeper/bookkeeper/branches/branch-4.2/CHANGES.txt
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/branches/branch-4.2/CHANGES.txt?rev=1513467&r1=1513466&r2=1513467&view=diff
==============================================================================
--- zookeeper/bookkeeper/branches/branch-4.2/CHANGES.txt (original)
+++ zookeeper/bookkeeper/branches/branch-4.2/CHANGES.txt Tue Aug 13 13:01:59 2013
@@ -56,6 +56,8 @@ Release 4.2.2 - Unreleased
BOOKKEEPER-604: Ledger storage can log an exception if GC happens concurrently. (sijie & ivank via ivank)
+ BOOKKEEPER-667: Client write will fail with BadMetadataVersion in case of multiple Bookie failures with AutoRecovery enabled (sijie via ivank)
+
hedwig-server:
BOOKKEEPER-579: TestSubAfterCloseSub was put in a wrong package (sijie via ivank)
Modified: zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java?rev=1513467&r1=1513466&r2=1513467&view=diff
==============================================================================
--- zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java (original)
+++ zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerHandle.java Tue Aug 13 13:01:59 2013
@@ -805,6 +805,24 @@ public class LedgerHandle {
return false;
}
+ // We should check number of ensembles since there are two kinds of metadata conflicts:
+ // - Case 1: Multiple bookies involved in ensemble change.
+ // Number of ensembles should be same in this case.
+ // - Case 2: Recovery (Auto/Manually) replaced ensemble and ensemble changed.
+ // The metadata changed due to ensemble change would have one more ensemble
+ // than the metadata changed by recovery.
+ int diff = newMeta.getEnsembles().size() - metadata.getEnsembles().size();
+ if (0 != diff) {
+ if (-1 == diff) {
+ // Case 1: metadata is changed by other ones (e.g. Recovery)
+ return updateMetadataIfPossible(newMeta);
+ }
+ return false;
+ }
+
+ //
+ // Case 2:
+ //
// If the failed the bookie is still existed in the metadata (in zookeeper), it means that
// the ensemble change of the failed bookie is failed due to metadata conflicts. so try to
// update the ensemble change metadata again. Otherwise, it means that the ensemble change
@@ -816,26 +834,7 @@ public class LedgerHandle {
// update ensemble changed metadata again.
if (!metadata.currentEnsemble.get(ensembleInfo.bookieIndex)
.equals(ensembleInfo.addr)) {
- // if the local metadata is newer than zookeeper metadata, it means that metadata is updated
- // again when it was trying re-reading the metatada, re-kick the reread again
- if (metadata.isNewerThan(newMeta)) {
- rereadMetadata(this);
- return true;
- }
- // make sure the metadata doesn't changed by other ones.
- if (metadata.isConflictWith(newMeta)) {
- return false;
- }
- LOG.info("Resolve ledger metadata conflict "
- + "while changing ensemble to: "
- + ensembleInfo.newEnsemble
- + ", old meta data is \n"
- + new String(metadata.serialize())
- + "\n, new meta data is \n"
- + new String(newMeta.serialize()));
- // update znode version
- metadata.setVersion(newMeta.getVersion());
- writeLedgerConfig(new ChangeEnsembleCb(ensembleInfo));
+ return updateMetadataIfPossible(newMeta);
}
} else {
// the failed bookie has been replaced
@@ -845,6 +844,29 @@ public class LedgerHandle {
return true;
}
+ private boolean updateMetadataIfPossible(LedgerMetadata newMeta) {
+ // if the local metadata is newer than zookeeper metadata, it means that metadata is updated
+ // again when it was trying re-reading the metatada, re-kick the reread again
+ if (metadata.isNewerThan(newMeta)) {
+ rereadMetadata(this);
+ return true;
+ }
+ // make sure the metadata doesn't changed by other ones.
+ if (metadata.isConflictWith(newMeta)) {
+ return false;
+ }
+ LOG.info("Resolve ledger metadata conflict while changing ensemble to: {},"
+ + " old meta data is \n {} \n, new meta data is \n {}.", new Object[] {
+ ensembleInfo.newEnsemble, metadata, newMeta });
+ // update znode version
+ metadata.setVersion(newMeta.getVersion());
+ // merge ensemble infos from new meta except last ensemble
+ // since they might be modified by recovery tool.
+ metadata.mergeEnsembles(newMeta.getEnsembles());
+ writeLedgerConfig(new ChangeEnsembleCb(ensembleInfo));
+ return true;
+ }
+
};
void unsetSuccessAndSendWriteRequest(final int bookieIndex) {
Modified: zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadata.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadata.java?rev=1513467&r1=1513466&r2=1513467&view=diff
==============================================================================
--- zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadata.java (original)
+++ zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/main/java/org/apache/bookkeeper/client/LedgerMetadata.java Tue Aug 13 13:01:59 2013
@@ -1,5 +1,3 @@
-package org.apache.bookkeeper.client;
-
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -17,6 +15,7 @@ package org.apache.bookkeeper.client;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
+package org.apache.bookkeeper.client;
import static com.google.common.base.Charsets.UTF_8;
@@ -478,8 +477,10 @@ public class LedgerMetadata {
}
// if ledger is closed, we can just take the new ensembles
if (newMeta.state != LedgerMetadataFormat.State.CLOSED) {
- // ensemble size should be same
- if (ensembles.size() != newMeta.ensembles.size()) {
+ // allow new metadata to be one ensemble less than current metadata
+ // since ensemble change might kick in when recovery changed metadata
+ int diff = ensembles.size() - newMeta.ensembles.size();
+ if (0 != diff && 1 != diff) {
return true;
}
// ensemble distribution should be same
@@ -487,7 +488,7 @@ public class LedgerMetadata {
// using recovery tool.
Iterator<Long> keyIter = ensembles.keySet().iterator();
Iterator<Long> newMetaKeyIter = newMeta.ensembles.keySet().iterator();
- for (int i=0; i<ensembles.size(); i++) {
+ for (int i=0; i<newMeta.ensembles.size(); i++) {
Long curKey = keyIter.next();
Long newMetaKey = newMetaKeyIter.next();
if (!curKey.equals(newMetaKey)) {
@@ -497,4 +498,32 @@ public class LedgerMetadata {
}
return false;
}
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("(meta:").append(new String(serialize(), UTF_8)).append(", version:").append(version).append(")");
+ return sb.toString();
+ }
+
+ void mergeEnsembles(SortedMap<Long, ArrayList<InetSocketAddress>> newEnsembles) {
+ // allow new metadata to be one ensemble less than current metadata
+ // since ensemble change might kick in when recovery changed metadata
+ int diff = ensembles.size() - newEnsembles.size();
+ if (0 != diff && 1 != diff) {
+ return;
+ }
+ int i = 0;
+ for (Entry<Long, ArrayList<InetSocketAddress>> entry : newEnsembles.entrySet()) {
+ ++i;
+ if (ensembles.size() != i) {
+ // we should use last ensemble from current metadata
+ // not the new metadata read from zookeeper
+ long key = entry.getKey();
+ ArrayList<InetSocketAddress> ensemble = entry.getValue();
+ ensembles.put(key, ensemble);
+ }
+ }
+ }
+
}
Modified: zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieRecoveryTest.java
URL: http://svn.apache.org/viewvc/zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieRecoveryTest.java?rev=1513467&r1=1513466&r2=1513467&view=diff
==============================================================================
--- zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieRecoveryTest.java (original)
+++ zookeeper/bookkeeper/branches/branch-4.2/bookkeeper-server/src/test/java/org/apache/bookkeeper/client/BookieRecoveryTest.java Tue Aug 13 13:01:59 2013
@@ -228,6 +228,45 @@ public class BookieRecoveryTest extends
}
/**
+ * This tests the bookie recovery functionality with ensemble changes.
+ * We'll verify that:
+ * - bookie recovery should not affect ensemble change.
+ * - ensemble change should not erase changes made by recovery.
+ *
+ * {@link https://issues.apache.org/jira/browse/BOOKKEEPER-667}
+ */
+ @Test(timeout = 60000)
+ public void testMetadataConflictWithRecovery() throws Exception {
+ int numEntries = 10;
+ byte[] data = "testMetadataConflictWithRecovery".getBytes();
+
+ LedgerHandle lh = bkc.createLedger(2, 2, digestType, baseClientConf.getBookieRecoveryPasswd());
+ for (int i = 0; i < numEntries; i++) {
+ lh.addEntry(data);
+ }
+ InetSocketAddress bookieToKill = lh.getLedgerMetadata().getEnsemble(numEntries - 1).get(1);
+ killBookie(bookieToKill);
+ startNewBookie();
+ for (int i = 0; i < numEntries; i++) {
+ lh.addEntry(data);
+ }
+ bkAdmin.recoverBookieData(bookieToKill, null);
+ // fail another bookie to cause ensemble change again
+ bookieToKill = lh.getLedgerMetadata().getEnsemble(2 * numEntries - 1).get(1);
+ ServerConfiguration confOfKilledBookie = killBookie(bookieToKill);
+ startNewBookie();
+ for (int i = 0; i < numEntries; i++) {
+ lh.addEntry(data);
+ }
+ // start the killed bookie again
+ bsConfs.add(confOfKilledBookie);
+ bs.add(startBookie(confOfKilledBookie));
+ // all ensembles should be fully replicated since it is recovered
+ assertTrue("Not fully replicated", verifyFullyReplicated(lh, 3 * numEntries));
+ lh.close();
+ }
+
+ /**
* This tests the asynchronous bookie recovery functionality by writing
* entries into 3 bookies, killing one bookie, starting up a new one to
* replace it, and then recovering the ledger entries from the killed bookie