You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by bu...@apache.org on 2014/04/05 02:59:37 UTC
[02/15] git commit: ACCUMULO-2519 Aborts upgrade if there are Fate
transactions from an old version.
ACCUMULO-2519 Aborts upgrade if there are Fate transactions from an old version.
Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/5a504b31
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/5a504b31
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/5a504b31
Branch: refs/heads/1.6.0-SNAPSHOT
Commit: 5a504b311c0e5f59ff5b14221c6bf61f43b4d093
Parents: a904f69
Author: Sean Busbey <bu...@cloudera.com>
Authored: Fri Mar 28 01:46:09 2014 -0500
Committer: Sean Busbey <bu...@cloudera.com>
Committed: Fri Apr 4 17:27:05 2014 -0700
----------------------------------------------------------------------
README | 14 +++
.../org/apache/accumulo/server/Accumulo.java | 31 ++++++
.../apache/accumulo/server/master/Master.java | 100 ++++++++++++-------
.../server/tabletserver/TabletServer.java | 5 +
.../accumulo/server/util/MetadataTable.java | 3 +
5 files changed, 116 insertions(+), 37 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/README
----------------------------------------------------------------------
diff --git a/README b/README
index 115a9b7..0bb1030 100644
--- a/README
+++ b/README
@@ -54,12 +54,26 @@ accumulo.
This happens automatically the first time Accumulo 1.5 is started.
+ * Verify that there are no outstanding FATE operations
+ - Under 1.4 you can list what's in FATE by running
+ $ACCUMULO_HOME/bin/accumulo org.apache.accumulo.server.fate.Admin print
+ - Note that operations in any state will prevent an upgrade. It is safe
+ to delete operations with status SUCCESSFUL. For others, you should restart
+ your 1.4 cluster and allow them to finish.
* Stop the 1.4 instance.
* Configure 1.5 to use the hdfs directory, walog directories, and zookeepers
that 1.4 was using.
* Copy other 1.4 configuration options as needed.
* Start Accumulo 1.5.
+ The upgrade process must make changes to Accumulo's internal state in both ZooKeeper and
+ the table metadata. This process may take some time as Tablet Servers move write-ahead
+ logs to HDFS and then do recovery. During this time, the Monitor will claim that the
+ Master is down and some services may send the Monitor log messages about failure to
+ communicate with each other. These messages are safe to ignore. If you need detail on
+ the upgrade's progress you should view the local logs on the Tablet Servers and active
+ Master.
+
******************************************************************************
4. Configuring
http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/server/src/main/java/org/apache/accumulo/server/Accumulo.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/Accumulo.java b/server/src/main/java/org/apache/accumulo/server/Accumulo.java
index 99ec7e4..420b6cc 100644
--- a/server/src/main/java/org/apache/accumulo/server/Accumulo.java
+++ b/server/src/main/java/org/apache/accumulo/server/Accumulo.java
@@ -27,11 +27,16 @@ import java.util.Map.Entry;
import java.util.TreeMap;
import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.AccumuloException;
import org.apache.accumulo.core.conf.Property;
import org.apache.accumulo.core.trace.DistributedTrace;
import org.apache.accumulo.core.util.AddressUtil;
import org.apache.accumulo.core.util.UtilWaitThread;
import org.apache.accumulo.core.util.Version;
+import org.apache.accumulo.core.zookeeper.ZooUtil;
+import org.apache.accumulo.fate.ReadOnlyTStore;
+import org.apache.accumulo.fate.ReadOnlyStore;
+import org.apache.accumulo.fate.ZooStore;
import org.apache.accumulo.server.client.HdfsZooInstance;
import org.apache.accumulo.server.conf.ServerConfiguration;
import org.apache.accumulo.server.util.time.SimpleTimer;
@@ -53,6 +58,7 @@ public class Accumulo {
try {
if (getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) {
fs.create(new Path(ServerConstants.getDataVersionLocation() + "/" + Constants.DATA_VERSION));
+ // TODO document failure mode & recovery if FS permissions cause above to work and below to fail ACCUMULO-2596
fs.delete(new Path(ServerConstants.getDataVersionLocation() + "/" + Constants.PREV_DATA_VERSION), false);
}
} catch (IOException e) {
@@ -263,4 +269,29 @@ public class Accumulo {
throw new RuntimeException("cannot find method setSafeMode", ex);
}
}
+
+ /**
+ * Exit loudly if there are outstanding Fate operations.
+ * Since Fate serializes class names, we need to make sure there are no queued
+ * transactions from a previous version before continuing an upgrade. The status of the operations is
+ * irrelevant; those in SUCCESSFUL status cause the same problem as those just queued.
+ *
+ * Note that the Master should not allow write access to Fate until after all upgrade steps are complete.
+ *
+ * Should be called as a guard before performing any upgrade steps, after determining that an upgrade is needed.
+ *
+ * see ACCUMULO-2519
+ */
+ public static void abortIfFateTransactions() {
+ try {
+ final ReadOnlyTStore<Accumulo> fate = new ReadOnlyStore<Accumulo>(new ZooStore<Accumulo>(ZooUtil.getRoot(HdfsZooInstance.getInstance()) + Constants.ZFATE,
+ ZooReaderWriter.getRetryingInstance()));
+ if (!(fate.list().isEmpty())) {
+ throw new AccumuloException("Aborting upgrade because there are outstanding FATE transactions from a previous Accumulo version. Please see the README document for instructions on what to do under your previous version.");
+ }
+ } catch (Exception exception) {
+ log.fatal("Problem verifying Fate readiness", exception);
+ System.exit(1);
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/server/src/main/java/org/apache/accumulo/server/master/Master.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/master/Master.java b/server/src/main/java/org/apache/accumulo/server/master/Master.java
index 270eb18..a2ad2e6 100644
--- a/server/src/main/java/org/apache/accumulo/server/master/Master.java
+++ b/server/src/main/java/org/apache/accumulo/server/master/Master.java
@@ -34,6 +34,7 @@ import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
+import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@@ -271,7 +272,9 @@ public class Master implements LiveTServerSet.Listener, TableObserver, CurrentSt
upgradeMetadata();
}
}
-
+
+ private boolean haveUpgradedZooKeeper = false;
+
private void upgradeZookeeper() {
// 1.5.1 and 1.6.0 both do some state checking after obtaining the zoolock for the
// monitor and before starting up. It's not tied to the data version at all (and would
@@ -279,59 +282,79 @@ public class Master implements LiveTServerSet.Listener, TableObserver, CurrentSt
// that the master is not the only thing that may alter zookeeper before starting.
if (Accumulo.getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) {
+ // This Master hasn't started Fate yet, so any outstanding transactions must be from before the upgrade.
+ // Change to Guava's Verify once we use Guava 17.
+ if (null != fate) {
+ throw new IllegalStateException("Access to Fate should not have been initialized prior to the Master transitioning to active. Please save all logs and file a bug.");
+ }
+ Accumulo.abortIfFateTransactions();
try {
log.info("Upgrading zookeeper");
-
+
IZooReaderWriter zoo = ZooReaderWriter.getInstance();
-
+
zoo.recursiveDelete(ZooUtil.getRoot(instance) + "/loggers", NodeMissingPolicy.SKIP);
zoo.recursiveDelete(ZooUtil.getRoot(instance) + "/dead/loggers", NodeMissingPolicy.SKIP);
zoo.putPersistentData(ZooUtil.getRoot(instance) + Constants.ZRECOVERY, new byte[] {'0'}, NodeExistsPolicy.SKIP);
-
+
for (String id : Tables.getIdToNameMap(instance).keySet()) {
-
+
zoo.putPersistentData(ZooUtil.getRoot(instance) + Constants.ZTABLES + "/" + id + Constants.ZTABLE_COMPACT_CANCEL_ID, "0".getBytes(Constants.UTF8),
NodeExistsPolicy.SKIP);
}
+ haveUpgradedZooKeeper = true;
} catch (Exception ex) {
log.fatal("Error performing upgrade", ex);
System.exit(1);
}
}
}
-
+
private final AtomicBoolean upgradeMetadataRunning = new AtomicBoolean(false);
-
+ private final CountDownLatch waitForMetadataUpgrade = new CountDownLatch(1);
+
private final ServerConfiguration serverConfig;
private void upgradeMetadata() {
- if (Accumulo.getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) {
- if (upgradeMetadataRunning.compareAndSet(false, true)) {
+ // we make sure we're only doing the rest of this method once so that we can signal to other threads that an upgrade wasn't needed.
+ if (upgradeMetadataRunning.compareAndSet(false, true)) {
+ if (Accumulo.getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) {
+ // sanity check that we passed the Fate verification prior to ZooKeeper upgrade, and that Fate still hasn't been started.
+ // Change both to use Guava's Verify once we use Guava 17.
+ if (!haveUpgradedZooKeeper) {
+ throw new IllegalStateException("We should only attempt to upgrade Accumulo's !METADATA table if we've already upgraded ZooKeeper. Please save all logs and file a bug.");
+ }
+ if (null != fate) {
+ throw new IllegalStateException("Access to Fate should not have been initialized prior to the Master finishing upgrades. Please save all logs and file a bug.");
+ }
Runnable upgradeTask = new Runnable() {
@Override
public void run() {
try {
+ log.info("Starting to upgrade !METADATA table.");
MetadataTable.moveMetaDeleteMarkers(instance, SecurityConstants.getSystemCredentials());
+ log.info("Updating persistent data version.");
Accumulo.updateAccumuloVersion(fs);
-
log.info("Upgrade complete");
-
+ waitForMetadataUpgrade.countDown();
} catch (Exception ex) {
log.fatal("Error performing upgrade", ex);
System.exit(1);
}
-
+
}
};
-
+
// need to run this in a separate thread because a lock is held that prevents !METADATA tablets from being assigned and this task writes to the
// !METADATA table
new Thread(upgradeTask).start();
+ } else {
+ waitForMetadataUpgrade.countDown();
}
}
}
-
+
private int assignedOrHosted(Text tableId) {
int result = 0;
for (TabletGroupWatcher watcher : watchers) {
@@ -2136,28 +2159,6 @@ public class Master implements LiveTServerSet.Listener, TableObserver, CurrentSt
tserverSet.startListeningForTabletServerChanges();
- // TODO: add shutdown for fate object - ACCUMULO-1307
- try {
- final AgeOffStore<Master> store = new AgeOffStore<Master>(new org.apache.accumulo.fate.ZooStore<Master>(ZooUtil.getRoot(instance) + Constants.ZFATE,
- ZooReaderWriter.getRetryingInstance()), 1000 * 60 * 60 * 8);
-
- int threads = this.getConfiguration().getConfiguration().getCount(Property.MASTER_FATE_THREADPOOL_SIZE);
-
- fate = new Fate<Master>(this, store, threads);
-
- SimpleTimer.getInstance().schedule(new Runnable() {
-
- @Override
- public void run() {
- store.ageOff();
- }
- }, 63000, 63000);
- } catch (KeeperException e) {
- throw new IOException(e);
- } catch (InterruptedException e) {
- throw new IOException(e);
- }
-
ZooReaderWriter.getInstance().getChildren(zroot + Constants.ZRECOVERY, new Watcher() {
@Override
public void process(WatchedEvent event) {
@@ -2183,7 +2184,32 @@ public class Master implements LiveTServerSet.Listener, TableObserver, CurrentSt
for (TabletGroupWatcher watcher : watchers) {
watcher.start();
}
-
+
+ // Once we are sure tablet servers are no longer checking for an empty Fate transaction queue before doing WAL upgrades, we can safely start using Fate ourselves.
+ waitForMetadataUpgrade.await();
+
+ // TODO: add shutdown for fate object - ACCUMULO-1307
+ try {
+ final AgeOffStore<Master> store = new AgeOffStore<Master>(new org.apache.accumulo.fate.ZooStore<Master>(ZooUtil.getRoot(instance) + Constants.ZFATE,
+ ZooReaderWriter.getRetryingInstance()), 1000 * 60 * 60 * 8);
+
+ int threads = this.getConfiguration().getConfiguration().getCount(Property.MASTER_FATE_THREADPOOL_SIZE);
+
+ fate = new Fate<Master>(this, store, threads);
+
+ SimpleTimer.getInstance().schedule(new Runnable() {
+
+ @Override
+ public void run() {
+ store.ageOff();
+ }
+ }, 63000, 63000);
+ } catch (KeeperException e) {
+ throw new IOException(e);
+ } catch (InterruptedException e) {
+ throw new IOException(e);
+ }
+
Processor<Iface> processor = new Processor<Iface>(TraceWrap.service(new MasterClientServiceHandler()));
ServerPort serverPort = TServerUtils.startServer(getSystemConfiguration(), Property.MASTER_CLIENTPORT, processor, "Master",
"Master Client Service Handler", null, Property.MASTER_MINTHREADS, Property.MASTER_THREADCHECK, Property.GENERAL_MAX_MESSAGE_SIZE);
http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java b/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java
index d76946d..ad3d615 100644
--- a/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java
+++ b/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java
@@ -3322,6 +3322,11 @@ public class TabletServer extends AbstractMetricsImpl implements org.apache.accu
*
*/
public static void recoverLocalWriteAheadLogs(FileSystem fs, ServerConfiguration serverConf) throws IOException {
+ if (Accumulo.getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) {
+ // If the Master has not yet signaled a finish to upgrading, we need to make sure we can rollback in the
+ // event of outstanding transactions in Fate from the previous version.
+ Accumulo.abortIfFateTransactions();
+ }
FileSystem localfs = FileSystem.getLocal(fs.getConf()).getRawFileSystem();
AccumuloConfiguration conf = serverConf.getConfiguration();
String localWalDirectories = conf.get(Property.LOGGER_DIR);
http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java b/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java
index 7328a55..d6e0a3c 100644
--- a/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java
+++ b/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java
@@ -1233,6 +1233,9 @@ public class MetadataTable extends org.apache.accumulo.core.util.MetadataTable {
update(SecurityConstants.getSystemCredentials(), m);
}
+ /**
+ * During an upgrade from Accumulo 1.4 -> 1.5, we need to move deletion requests for files under the !METADATA table to the root tablet.
+ */
public static void moveMetaDeleteMarkers(Instance instance, TCredentials creds) {
// move delete markers from the normal delete keyspace to the root tablet delete keyspace if the files are for the !METADATA table
Scanner scanner = new ScannerImpl(instance, creds, Constants.METADATA_TABLE_ID, Constants.NO_AUTHS);