You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@accumulo.apache.org by bu...@apache.org on 2014/04/05 02:59:37 UTC

[02/15] git commit: ACCUMULO-2519 Aborts upgrade if there are Fate transactions from an old version.

ACCUMULO-2519 Aborts upgrade if there are Fate transactions from an old version.


Project: http://git-wip-us.apache.org/repos/asf/accumulo/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo/commit/5a504b31
Tree: http://git-wip-us.apache.org/repos/asf/accumulo/tree/5a504b31
Diff: http://git-wip-us.apache.org/repos/asf/accumulo/diff/5a504b31

Branch: refs/heads/1.6.0-SNAPSHOT
Commit: 5a504b311c0e5f59ff5b14221c6bf61f43b4d093
Parents: a904f69
Author: Sean Busbey <bu...@cloudera.com>
Authored: Fri Mar 28 01:46:09 2014 -0500
Committer: Sean Busbey <bu...@cloudera.com>
Committed: Fri Apr 4 17:27:05 2014 -0700

----------------------------------------------------------------------
 README                                          |  14 +++
 .../org/apache/accumulo/server/Accumulo.java    |  31 ++++++
 .../apache/accumulo/server/master/Master.java   | 100 ++++++++++++-------
 .../server/tabletserver/TabletServer.java       |   5 +
 .../accumulo/server/util/MetadataTable.java     |   3 +
 5 files changed, 116 insertions(+), 37 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/README
----------------------------------------------------------------------
diff --git a/README b/README
index 115a9b7..0bb1030 100644
--- a/README
+++ b/README
@@ -54,12 +54,26 @@ accumulo.
 
  This happens automatically the first time Accumulo 1.5 is started.  
 
+  * Verify that there are no outstanding FATE operations
+    - Under 1.4 you can list what's in FATE by running
+      $ACCUMULO_HOME/bin/accumulo org.apache.accumulo.server.fate.Admin print
+    - Note that operations in any state will prevent an upgrade. It is safe
+      to delete operations with status SUCCESSFUL. For others, you should restart
+      your 1.4 cluster and allow them to finish.
   * Stop the 1.4 instance.  
   * Configure 1.5 to use the hdfs directory, walog directories, and zookeepers
     that 1.4 was using.
   * Copy other 1.4 configuration options as needed.
   * Start Accumulo 1.5. 
 
+  The upgrade process must make changes to Accumulo's internal state in both ZooKeeper and
+  the table metadata. This process may take some time as Tablet Servers move write-ahead
+  logs to HDFS and then do recovery. During this time, the Monitor will claim that the
+  Master is down and some services may send the Monitor log messages about failure to
+  communicate with each other. These messages are safe to ignore. If you need detail on
+  the upgrade's progress you should view the local logs on the Tablet Servers and active
+  Master.
+
 ******************************************************************************
 4. Configuring
 

http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/server/src/main/java/org/apache/accumulo/server/Accumulo.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/Accumulo.java b/server/src/main/java/org/apache/accumulo/server/Accumulo.java
index 99ec7e4..420b6cc 100644
--- a/server/src/main/java/org/apache/accumulo/server/Accumulo.java
+++ b/server/src/main/java/org/apache/accumulo/server/Accumulo.java
@@ -27,11 +27,16 @@ import java.util.Map.Entry;
 import java.util.TreeMap;
 
 import org.apache.accumulo.core.Constants;
+import org.apache.accumulo.core.client.AccumuloException;
 import org.apache.accumulo.core.conf.Property;
 import org.apache.accumulo.core.trace.DistributedTrace;
 import org.apache.accumulo.core.util.AddressUtil;
 import org.apache.accumulo.core.util.UtilWaitThread;
 import org.apache.accumulo.core.util.Version;
+import org.apache.accumulo.core.zookeeper.ZooUtil;
+import org.apache.accumulo.fate.ReadOnlyTStore;
+import org.apache.accumulo.fate.ReadOnlyStore;
+import org.apache.accumulo.fate.ZooStore;
 import org.apache.accumulo.server.client.HdfsZooInstance;
 import org.apache.accumulo.server.conf.ServerConfiguration;
 import org.apache.accumulo.server.util.time.SimpleTimer;
@@ -53,6 +58,7 @@ public class Accumulo {
     try {
       if (getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) {
         fs.create(new Path(ServerConstants.getDataVersionLocation() + "/" + Constants.DATA_VERSION));
+        // TODO document failure mode & recovery if FS permissions cause above to work and below to fail ACCUMULO-2596
         fs.delete(new Path(ServerConstants.getDataVersionLocation() + "/" + Constants.PREV_DATA_VERSION), false);
       }
     } catch (IOException e) {
@@ -263,4 +269,29 @@ public class Accumulo {
       throw new RuntimeException("cannot find method setSafeMode", ex);
     }
   }
+
+  /**
+   * Exit loudly if there are outstanding Fate operations.
+   * Since Fate serializes class names, we need to make sure there are no queued
+   * transactions from a previous version before continuing an upgrade. The status of the operations is
+   * irrelevant; those in SUCCESSFUL status cause the same problem as those just queued.
+   *
+   * Note that the Master should not allow write access to Fate until after all upgrade steps are complete.
+   *
+   * Should be called as a guard before performing any upgrade steps, after determining that an upgrade is needed.
+   *
+   * see ACCUMULO-2519
+   */
+  public static void abortIfFateTransactions() {
+    try {
+      final ReadOnlyTStore<Accumulo> fate = new ReadOnlyStore<Accumulo>(new ZooStore<Accumulo>(ZooUtil.getRoot(HdfsZooInstance.getInstance()) + Constants.ZFATE,
+          ZooReaderWriter.getRetryingInstance()));
+      if (!(fate.list().isEmpty())) {
+        throw new AccumuloException("Aborting upgrade because there are outstanding FATE transactions from a previous Accumulo version. Please see the README document for instructions on what to do under your previous version.");
+      }
+    } catch (Exception exception) {
+      log.fatal("Problem verifying Fate readiness", exception);
+      System.exit(1);
+    }
+  }
 }

http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/server/src/main/java/org/apache/accumulo/server/master/Master.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/master/Master.java b/server/src/main/java/org/apache/accumulo/server/master/Master.java
index 270eb18..a2ad2e6 100644
--- a/server/src/main/java/org/apache/accumulo/server/master/Master.java
+++ b/server/src/main/java/org/apache/accumulo/server/master/Master.java
@@ -34,6 +34,7 @@ import java.util.Set;
 import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.TreeSet;
+import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 
@@ -271,7 +272,9 @@ public class Master implements LiveTServerSet.Listener, TableObserver, CurrentSt
       upgradeMetadata();
     }
   }
-  
+
+  private boolean haveUpgradedZooKeeper = false;
+
   private void upgradeZookeeper() {
     // 1.5.1 and 1.6.0 both do some state checking after obtaining the zoolock for the
     // monitor and before starting up. It's not tied to the data version at all (and would
@@ -279,59 +282,79 @@ public class Master implements LiveTServerSet.Listener, TableObserver, CurrentSt
     // that the master is not the only thing that may alter zookeeper before starting.
 
     if (Accumulo.getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) {
+      // This Master hasn't started Fate yet, so any outstanding transactions must be from before the upgrade.
+      // Change to Guava's Verify once we use Guava 17.
+      if (null != fate) {
+        throw new IllegalStateException("Access to Fate should not have been initialized prior to the Master transitioning to active. Please save all logs and file a bug.");
+      }
+      Accumulo.abortIfFateTransactions();
       try {
         log.info("Upgrading zookeeper");
-        
+
         IZooReaderWriter zoo = ZooReaderWriter.getInstance();
-        
+
         zoo.recursiveDelete(ZooUtil.getRoot(instance) + "/loggers", NodeMissingPolicy.SKIP);
         zoo.recursiveDelete(ZooUtil.getRoot(instance) + "/dead/loggers", NodeMissingPolicy.SKIP);
 
         zoo.putPersistentData(ZooUtil.getRoot(instance) + Constants.ZRECOVERY, new byte[] {'0'}, NodeExistsPolicy.SKIP);
-        
+
         for (String id : Tables.getIdToNameMap(instance).keySet()) {
-          
+
           zoo.putPersistentData(ZooUtil.getRoot(instance) + Constants.ZTABLES + "/" + id + Constants.ZTABLE_COMPACT_CANCEL_ID, "0".getBytes(Constants.UTF8),
               NodeExistsPolicy.SKIP);
         }
+        haveUpgradedZooKeeper = true;
       } catch (Exception ex) {
         log.fatal("Error performing upgrade", ex);
         System.exit(1);
       }
     }
   }
-  
+
   private final AtomicBoolean upgradeMetadataRunning = new AtomicBoolean(false);
-  
+  private final CountDownLatch waitForMetadataUpgrade = new CountDownLatch(1);
+
   private final ServerConfiguration serverConfig;
   
   private void upgradeMetadata() {
-    if (Accumulo.getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) {
-      if (upgradeMetadataRunning.compareAndSet(false, true)) {
+    // we make sure we're only doing the rest of this method once so that we can signal to other threads that an upgrade wasn't needed.
+    if (upgradeMetadataRunning.compareAndSet(false, true)) {
+      if (Accumulo.getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) {
+        // sanity check that we passed the Fate verification prior to ZooKeeper upgrade, and that Fate still hasn't been started.
+        // Change both to use Guava's Verify once we use Guava 17.
+        if (!haveUpgradedZooKeeper) {
+          throw new IllegalStateException("We should only attempt to upgrade Accumulo's !METADATA table if we've already upgraded ZooKeeper. Please save all logs and file a bug.");
+        }
+        if (null != fate) {
+          throw new IllegalStateException("Access to Fate should not have been initialized prior to the Master finishing upgrades. Please save all logs and file a bug.");
+        }
         Runnable upgradeTask = new Runnable() {
           @Override
           public void run() {
             try {
+              log.info("Starting to upgrade !METADATA table.");
               MetadataTable.moveMetaDeleteMarkers(instance, SecurityConstants.getSystemCredentials());
+              log.info("Updating persistent data version.");
               Accumulo.updateAccumuloVersion(fs);
-              
               log.info("Upgrade complete");
-              
+              waitForMetadataUpgrade.countDown();
             } catch (Exception ex) {
               log.fatal("Error performing upgrade", ex);
               System.exit(1);
             }
-            
+
           }
         };
-        
+
         // need to run this in a separate thread because a lock is held that prevents !METADATA tablets from being assigned and this task writes to the
         // !METADATA table
         new Thread(upgradeTask).start();
+      } else {
+        waitForMetadataUpgrade.countDown();
       }
     }
   }
-  
+
   private int assignedOrHosted(Text tableId) {
     int result = 0;
     for (TabletGroupWatcher watcher : watchers) {
@@ -2136,28 +2159,6 @@ public class Master implements LiveTServerSet.Listener, TableObserver, CurrentSt
     
     tserverSet.startListeningForTabletServerChanges();
     
-    // TODO: add shutdown for fate object - ACCUMULO-1307
-    try {
-      final AgeOffStore<Master> store = new AgeOffStore<Master>(new org.apache.accumulo.fate.ZooStore<Master>(ZooUtil.getRoot(instance) + Constants.ZFATE,
-          ZooReaderWriter.getRetryingInstance()), 1000 * 60 * 60 * 8);
-      
-      int threads = this.getConfiguration().getConfiguration().getCount(Property.MASTER_FATE_THREADPOOL_SIZE);
-      
-      fate = new Fate<Master>(this, store, threads);
-      
-      SimpleTimer.getInstance().schedule(new Runnable() {
-        
-        @Override
-        public void run() {
-          store.ageOff();
-        }
-      }, 63000, 63000);
-    } catch (KeeperException e) {
-      throw new IOException(e);
-    } catch (InterruptedException e) {
-      throw new IOException(e);
-    }
-    
     ZooReaderWriter.getInstance().getChildren(zroot + Constants.ZRECOVERY, new Watcher() {
       @Override
       public void process(WatchedEvent event) {
@@ -2183,7 +2184,32 @@ public class Master implements LiveTServerSet.Listener, TableObserver, CurrentSt
     for (TabletGroupWatcher watcher : watchers) {
       watcher.start();
     }
-    
+
+    // Once we are sure tablet servers are no longer checking for an empty Fate transaction queue before doing WAL upgrades, we can safely start using Fate ourselves.
+    waitForMetadataUpgrade.await();
+
+    // TODO: add shutdown for fate object - ACCUMULO-1307
+    try {
+      final AgeOffStore<Master> store = new AgeOffStore<Master>(new org.apache.accumulo.fate.ZooStore<Master>(ZooUtil.getRoot(instance) + Constants.ZFATE,
+          ZooReaderWriter.getRetryingInstance()), 1000 * 60 * 60 * 8);
+
+      int threads = this.getConfiguration().getConfiguration().getCount(Property.MASTER_FATE_THREADPOOL_SIZE);
+
+      fate = new Fate<Master>(this, store, threads);
+
+      SimpleTimer.getInstance().schedule(new Runnable() {
+
+        @Override
+        public void run() {
+          store.ageOff();
+        }
+      }, 63000, 63000);
+    } catch (KeeperException e) {
+      throw new IOException(e);
+    } catch (InterruptedException e) {
+      throw new IOException(e);
+    }
+
     Processor<Iface> processor = new Processor<Iface>(TraceWrap.service(new MasterClientServiceHandler()));
     ServerPort serverPort = TServerUtils.startServer(getSystemConfiguration(), Property.MASTER_CLIENTPORT, processor, "Master",
         "Master Client Service Handler", null, Property.MASTER_MINTHREADS, Property.MASTER_THREADCHECK, Property.GENERAL_MAX_MESSAGE_SIZE);

http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java b/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java
index d76946d..ad3d615 100644
--- a/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java
+++ b/server/src/main/java/org/apache/accumulo/server/tabletserver/TabletServer.java
@@ -3322,6 +3322,11 @@ public class TabletServer extends AbstractMetricsImpl implements org.apache.accu
    * 
    */
   public static void recoverLocalWriteAheadLogs(FileSystem fs, ServerConfiguration serverConf) throws IOException {
+    if (Accumulo.getAccumuloPersistentVersion(fs) == Constants.PREV_DATA_VERSION) {
+      // If the Master has not yet signaled a finish to upgrading, we need to make sure we can rollback in the
+      // event of outstanding transactions in Fate from the previous version.
+      Accumulo.abortIfFateTransactions();
+    }
     FileSystem localfs = FileSystem.getLocal(fs.getConf()).getRawFileSystem();
     AccumuloConfiguration conf = serverConf.getConfiguration();
     String localWalDirectories = conf.get(Property.LOGGER_DIR);

http://git-wip-us.apache.org/repos/asf/accumulo/blob/5a504b31/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java
----------------------------------------------------------------------
diff --git a/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java b/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java
index 7328a55..d6e0a3c 100644
--- a/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java
+++ b/server/src/main/java/org/apache/accumulo/server/util/MetadataTable.java
@@ -1233,6 +1233,9 @@ public class MetadataTable extends org.apache.accumulo.core.util.MetadataTable {
     update(SecurityConstants.getSystemCredentials(), m);
   }
 
+  /**
+   * During an upgrade from Accumulo 1.4 -> 1.5, we need to move deletion requests for files under the !METADATA table to the root tablet.
+   */
   public static void moveMetaDeleteMarkers(Instance instance, TCredentials creds) {
     // move delete markers from the normal delete keyspace to the root tablet delete keyspace if the files are for the !METADATA table
     Scanner scanner = new ScannerImpl(instance, creds, Constants.METADATA_TABLE_ID, Constants.NO_AUTHS);