You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by mb...@apache.org on 2014/06/02 09:40:42 UTC

git commit: HBASE-10935 support snapshot policy where flush memstore can be skipped to prevent production cluster freeze (Tianying Chang)

Repository: hbase
Updated Branches:
  refs/heads/master 7f2f7c23a -> 86e5db509


HBASE-10935 support snapshot policy where flush memstore can be skipped to prevent production cluster freeze (Tianying Chang)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/86e5db50
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/86e5db50
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/86e5db50

Branch: refs/heads/master
Commit: 86e5db50992e9b297014e9c9824429e1cefb824b
Parents: 7f2f7c2
Author: Matteo Bertozzi <ma...@cloudera.com>
Authored: Mon Jun 2 08:03:19 2014 +0100
Committer: Matteo Bertozzi <ma...@cloudera.com>
Committed: Mon Jun 2 08:03:19 2014 +0100

----------------------------------------------------------------------
 .../apache/hadoop/hbase/client/HBaseAdmin.java  | 20 +++++++++
 .../hbase/protobuf/generated/HBaseProtos.java   | 44 ++++++++++++--------
 hbase-protocol/src/main/protobuf/HBase.proto    |  1 +
 .../snapshot/FlushSnapshotSubprocedure.java     | 26 ++++++++++--
 .../snapshot/RegionServerSnapshotManager.java   | 13 ++++++
 .../snapshot/TestFlushSnapshotFromClient.java   | 43 +++++++++++++++++++
 hbase-shell/src/main/ruby/hbase.rb              |  1 +
 hbase-shell/src/main/ruby/hbase/admin.rb        | 15 ++++++-
 .../src/main/ruby/shell/commands/snapshot.rb    |  6 +--
 9 files changed, 144 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/86e5db50/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
----------------------------------------------------------------------
diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
index c6b11fd..95b5c07 100644
--- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
+++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HBaseAdmin.java
@@ -2627,6 +2627,26 @@ public class HBaseAdmin implements Admin {
   }
 
   /**
+   * Create snapshot for the given table of given flush type.
+   * <p>
+   * Snapshots are considered unique based on <b>the name of the snapshot</b>. Attempts to take a
+   * snapshot with the same name (even a different type or with different parameters) will fail with
+   * a {@link SnapshotCreationException} indicating the duplicate naming.
+   * <p>
+   * Snapshot names follow the same naming constraints as tables in HBase.
+   * @param snapshotName name of the snapshot to be created
+   * @param tableName name of the table for which snapshot is created
+   * @param flushType if the snapshot should be taken without flush memstore first
+   * @throws IOException if a remote or network exception occurs
+   * @throws SnapshotCreationException if snapshot creation failed
+   * @throws IllegalArgumentException if the snapshot request is formatted incorrectly
+   */
+   public void snapshot(final byte[] snapshotName, final byte[] tableName,
+                       final SnapshotDescription.Type flushType) throws
+      IOException, SnapshotCreationException, IllegalArgumentException {
+      snapshot(Bytes.toString(snapshotName), Bytes.toString(tableName), flushType);
+  }
+  /**
    public void snapshot(final String snapshotName,
     * Create a timestamp consistent snapshot for the given table.
                         final byte[] tableName) throws IOException,

http://git-wip-us.apache.org/repos/asf/hbase/blob/86e5db50/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/HBaseProtos.java
----------------------------------------------------------------------
diff --git a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/HBaseProtos.java b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/HBaseProtos.java
index 238db31..9c0447e 100644
--- a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/HBaseProtos.java
+++ b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/HBaseProtos.java
@@ -10476,6 +10476,10 @@ public final class HBaseProtos {
        * <code>FLUSH = 1;</code>
        */
       FLUSH(1, 1),
+      /**
+       * <code>SKIPFLUSH = 2;</code>
+       */
+      SKIPFLUSH(2, 2),
       ;
 
       /**
@@ -10486,6 +10490,10 @@ public final class HBaseProtos {
        * <code>FLUSH = 1;</code>
        */
       public static final int FLUSH_VALUE = 1;
+      /**
+       * <code>SKIPFLUSH = 2;</code>
+       */
+      public static final int SKIPFLUSH_VALUE = 2;
 
 
       public final int getNumber() { return value; }
@@ -10494,6 +10502,7 @@ public final class HBaseProtos {
         switch (value) {
           case 0: return DISABLED;
           case 1: return FLUSH;
+          case 2: return SKIPFLUSH;
           default: return null;
         }
       }
@@ -16241,26 +16250,27 @@ public final class HBaseProtos {
       ",\n\rNameBytesPair\022\014\n\004name\030\001 \002(\t\022\r\n\005value\030" +
       "\002 \001(\014\"/\n\016BytesBytesPair\022\r\n\005first\030\001 \002(\014\022\016" +
       "\n\006second\030\002 \002(\014\",\n\rNameInt64Pair\022\014\n\004name\030" +
-      "\001 \001(\t\022\r\n\005value\030\002 \001(\003\"\256\001\n\023SnapshotDescrip" +
+      "\001 \001(\t\022\r\n\005value\030\002 \001(\003\"\275\001\n\023SnapshotDescrip" +
       "tion\022\014\n\004name\030\001 \002(\t\022\r\n\005table\030\002 \001(\t\022\030\n\rcre" +
       "ation_time\030\003 \001(\003:\0010\022.\n\004type\030\004 \001(\0162\031.Snap" +
       "shotDescription.Type:\005FLUSH\022\017\n\007version\030\005" +
-      " \001(\005\"\037\n\004Type\022\014\n\010DISABLED\020\000\022\t\n\005FLUSH\020\001\"}\n",
-      "\024ProcedureDescription\022\021\n\tsignature\030\001 \002(\t" +
-      "\022\020\n\010instance\030\002 \001(\t\022\030\n\rcreation_time\030\003 \001(" +
-      "\003:\0010\022&\n\rconfiguration\030\004 \003(\0132\017.NameString" +
-      "Pair\"\n\n\010EmptyMsg\"\033\n\007LongMsg\022\020\n\010long_msg\030" +
-      "\001 \002(\003\"\037\n\tDoubleMsg\022\022\n\ndouble_msg\030\001 \002(\001\"\'" +
-      "\n\rBigDecimalMsg\022\026\n\016bigdecimal_msg\030\001 \002(\014\"" +
-      "5\n\004UUID\022\026\n\016least_sig_bits\030\001 \002(\004\022\025\n\rmost_" +
-      "sig_bits\030\002 \002(\004\"K\n\023NamespaceDescriptor\022\014\n" +
-      "\004name\030\001 \002(\014\022&\n\rconfiguration\030\002 \003(\0132\017.Nam" +
-      "eStringPair\"$\n\020RegionServerInfo\022\020\n\010infoP",
-      "ort\030\001 \001(\005*r\n\013CompareType\022\010\n\004LESS\020\000\022\021\n\rLE" +
-      "SS_OR_EQUAL\020\001\022\t\n\005EQUAL\020\002\022\r\n\tNOT_EQUAL\020\003\022" +
-      "\024\n\020GREATER_OR_EQUAL\020\004\022\013\n\007GREATER\020\005\022\t\n\005NO" +
-      "_OP\020\006B>\n*org.apache.hadoop.hbase.protobu" +
-      "f.generatedB\013HBaseProtosH\001\240\001\001"
+      " \001(\005\".\n\004Type\022\014\n\010DISABLED\020\000\022\t\n\005FLUSH\020\001\022\r\n",
+      "\tSKIPFLUSH\020\002\"}\n\024ProcedureDescription\022\021\n\t" +
+      "signature\030\001 \002(\t\022\020\n\010instance\030\002 \001(\t\022\030\n\rcre" +
+      "ation_time\030\003 \001(\003:\0010\022&\n\rconfiguration\030\004 \003" +
+      "(\0132\017.NameStringPair\"\n\n\010EmptyMsg\"\033\n\007LongM" +
+      "sg\022\020\n\010long_msg\030\001 \002(\003\"\037\n\tDoubleMsg\022\022\n\ndou" +
+      "ble_msg\030\001 \002(\001\"\'\n\rBigDecimalMsg\022\026\n\016bigdec" +
+      "imal_msg\030\001 \002(\014\"5\n\004UUID\022\026\n\016least_sig_bits" +
+      "\030\001 \002(\004\022\025\n\rmost_sig_bits\030\002 \002(\004\"K\n\023Namespa" +
+      "ceDescriptor\022\014\n\004name\030\001 \002(\014\022&\n\rconfigurat" +
+      "ion\030\002 \003(\0132\017.NameStringPair\"$\n\020RegionServ",
+      "erInfo\022\020\n\010infoPort\030\001 \001(\005*r\n\013CompareType\022" +
+      "\010\n\004LESS\020\000\022\021\n\rLESS_OR_EQUAL\020\001\022\t\n\005EQUAL\020\002\022" +
+      "\r\n\tNOT_EQUAL\020\003\022\024\n\020GREATER_OR_EQUAL\020\004\022\013\n\007" +
+      "GREATER\020\005\022\t\n\005NO_OP\020\006B>\n*org.apache.hadoo" +
+      "p.hbase.protobuf.generatedB\013HBaseProtosH" +
+      "\001\240\001\001"
     };
     com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner =
       new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() {

http://git-wip-us.apache.org/repos/asf/hbase/blob/86e5db50/hbase-protocol/src/main/protobuf/HBase.proto
----------------------------------------------------------------------
diff --git a/hbase-protocol/src/main/protobuf/HBase.proto b/hbase-protocol/src/main/protobuf/HBase.proto
index 5622735..3e3d570 100644
--- a/hbase-protocol/src/main/protobuf/HBase.proto
+++ b/hbase-protocol/src/main/protobuf/HBase.proto
@@ -159,6 +159,7 @@ message SnapshotDescription {
   enum Type {
     DISABLED = 0;
     FLUSH = 1;
+    SKIPFLUSH = 2;
   }
   optional Type type = 4 [default = FLUSH];
   optional int32 version = 5;

http://git-wip-us.apache.org/repos/asf/hbase/blob/86e5db50/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java
index a7a5186..5a2c113 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/FlushSnapshotSubprocedure.java
@@ -48,6 +48,7 @@ public class FlushSnapshotSubprocedure extends Subprocedure {
   private final List<HRegion> regions;
   private final SnapshotDescription snapshot;
   private final SnapshotSubprocedurePool taskManager;
+  private boolean snapshotSkipFlush = false;
 
   public FlushSnapshotSubprocedure(ProcedureMember member,
       ForeignExceptionDispatcher errorListener, long wakeFrequency, long timeout,
@@ -55,6 +56,10 @@ public class FlushSnapshotSubprocedure extends Subprocedure {
       SnapshotSubprocedurePool taskManager) {
     super(member, snapshot.getName(), errorListener, wakeFrequency, timeout);
     this.snapshot = snapshot;
+
+    if (this.snapshot.getType() == SnapshotDescription.Type.SKIPFLUSH) {
+      snapshotSkipFlush = true;
+    }
     this.regions = regions;
     this.taskManager = taskManager;
   }
@@ -78,10 +83,25 @@ public class FlushSnapshotSubprocedure extends Subprocedure {
       LOG.debug("Starting region operation on " + region);
       region.startRegionOperation();
       try {
-        LOG.debug("Flush Snapshotting region " + region.toString() + " started...");
-        region.flushcache();
+        if (snapshotSkipFlush) {
+        /*
+         * This is to take an online-snapshot without force a coordinated flush to prevent pause
+         * The snapshot type is defined inside the snapshot description. FlushSnapshotSubprocedure
+         * should be renamed to distributedSnapshotSubprocedure, and the flush() behavior can be
+         * turned on/off based on the flush type.
+         * To minimized the code change, class name is not changed.
+         */
+          LOG.debug("take snapshot without flush memstore first");
+        } else {
+          LOG.debug("Flush Snapshotting region " + region.toString() + " started...");
+          region.flushcache();
+        }
         region.addRegionToSnapshot(snapshot, monitor);
-        LOG.debug("... Flush Snapshotting region " + region.toString() + " completed.");
+        if (snapshotSkipFlush) {
+          LOG.debug("... SkipFlush Snapshotting region " + region.toString() + " completed.");
+        } else {
+          LOG.debug("... Flush Snapshotting region " + region.toString() + " completed.");
+        }
       } finally {
         LOG.debug("Closing region operation on " + region);
         region.closeRegionOperation();

http://git-wip-us.apache.org/repos/asf/hbase/blob/86e5db50/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java
index 4a4ee79..e78d690 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/snapshot/RegionServerSnapshotManager.java
@@ -186,6 +186,19 @@ public class RegionServerSnapshotManager extends RegionServerProcedureManager {
         new SnapshotSubprocedurePool(rss.getServerName().toString(), conf);
       return new FlushSnapshotSubprocedure(member, exnDispatcher, wakeMillis,
           timeoutMillis, involvedRegions, snapshot, taskManager);
+    case SKIPFLUSH:
+        /*
+         * This is to take an online-snapshot without force a coordinated flush to prevent pause
+         * The snapshot type is defined inside the snapshot description. FlushSnapshotSubprocedure
+         * should be renamed to distributedSnapshotSubprocedure, and the flush() behavior can be
+         * turned on/off based on the flush type.
+         * To minimized the code change, class name is not changed.
+         */
+        SnapshotSubprocedurePool taskManager2 =
+            new SnapshotSubprocedurePool(rss.getServerName().toString(), conf);
+        return new FlushSnapshotSubprocedure(member, exnDispatcher, wakeMillis,
+            timeoutMillis, involvedRegions, snapshot, taskManager2);
+
     default:
       throw new UnsupportedOperationException("Unrecognized snapshot type:" + snapshot.getType());
     }

http://git-wip-us.apache.org/repos/asf/hbase/blob/86e5db50/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java
index f0e7985..fd69d62 100644
--- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java
+++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestFlushSnapshotFromClient.java
@@ -175,6 +175,49 @@ public class TestFlushSnapshotFromClient {
         admin, fs);
   }
 
+   /**
+   * Test snapshotting a table that is online without flushing
+   * @throws Exception
+   */
+  @Test(timeout=30000)
+  public void testSkipFlushTableSnapshot() throws Exception {
+    HBaseAdmin admin = UTIL.getHBaseAdmin();
+    // make sure we don't fail on listing snapshots
+    SnapshotTestingUtils.assertNoSnapshots(admin);
+
+    // put some stuff in the table
+    HTable table = new HTable(UTIL.getConfiguration(), TABLE_NAME);
+    UTIL.loadTable(table, TEST_FAM);
+
+    LOG.debug("FS state before snapshot:");
+    FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
+        FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
+
+    // take a snapshot of the enabled table
+    String snapshotString = "skipFlushTableSnapshot";
+    byte[] snapshot = Bytes.toBytes(snapshotString);
+    admin.snapshot(snapshotString, STRING_TABLE_NAME, SnapshotDescription.Type.SKIPFLUSH);
+    LOG.debug("Snapshot completed.");
+
+    // make sure we have the snapshot
+    List<SnapshotDescription> snapshots = SnapshotTestingUtils.assertOneSnapshotThatMatches(admin,
+        snapshot, TABLE_NAME);
+
+    // make sure its a valid snapshot
+    FileSystem fs = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getFileSystem();
+    Path rootDir = UTIL.getHBaseCluster().getMaster().getMasterFileSystem().getRootDir();
+    LOG.debug("FS state after snapshot:");
+    FSUtils.logFileSystemState(UTIL.getTestFileSystem(),
+        FSUtils.getRootDir(UTIL.getConfiguration()), LOG);
+
+    SnapshotTestingUtils.confirmSnapshotValid(snapshots.get(0), TABLE_NAME, TEST_FAM, rootDir,
+        admin, fs);
+
+    admin.deleteSnapshot(snapshot);
+    snapshots = admin.listSnapshots();
+    SnapshotTestingUtils.assertNoSnapshots(admin);
+  }
+
 
   /**
    * Test simple flush snapshotting a table that is online

http://git-wip-us.apache.org/repos/asf/hbase/blob/86e5db50/hbase-shell/src/main/ruby/hbase.rb
----------------------------------------------------------------------
diff --git a/hbase-shell/src/main/ruby/hbase.rb b/hbase-shell/src/main/ruby/hbase.rb
index 3c09c4d..fcd11fc 100644
--- a/hbase-shell/src/main/ruby/hbase.rb
+++ b/hbase-shell/src/main/ruby/hbase.rb
@@ -61,6 +61,7 @@ module HBaseConstants
   ATTRIBUTES="ATTRIBUTES"
   VISIBILITY="VISIBILITY"
   AUTHORIZATIONS = "AUTHORIZATIONS"
+  SKIP_FLUSH = 'SKIP_FLUSH'
 
   # Load constants from hbase java API
   def self.promote_constants(constants)

http://git-wip-us.apache.org/repos/asf/hbase/blob/86e5db50/hbase-shell/src/main/ruby/hbase/admin.rb
----------------------------------------------------------------------
diff --git a/hbase-shell/src/main/ruby/hbase/admin.rb b/hbase-shell/src/main/ruby/hbase/admin.rb
index 3d97501..43ccad3 100644
--- a/hbase-shell/src/main/ruby/hbase/admin.rb
+++ b/hbase-shell/src/main/ruby/hbase/admin.rb
@@ -22,6 +22,7 @@ java_import java.util.Arrays
 java_import org.apache.hadoop.hbase.util.Pair
 java_import org.apache.hadoop.hbase.util.RegionSplitter
 java_import org.apache.hadoop.hbase.util.Bytes
+java_import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos::SnapshotDescription
 
 # Wrapper for org.apache.hadoop.hbase.client.HBaseAdmin
 
@@ -718,8 +719,18 @@ module Hbase
     
     #----------------------------------------------------------------------------------------------
     # Take a snapshot of specified table
-    def snapshot(table, snapshot_name)
-      @admin.snapshot(snapshot_name.to_java_bytes, table.to_java_bytes)
+    def snapshot(table, snapshot_name, *args)
+      if args.empty?
+         @admin.snapshot(snapshot_name.to_java_bytes, table.to_java_bytes)
+      else
+         args.each do |arg|
+            if arg[SKIP_FLUSH] == true
+              @admin.snapshot(snapshot_name.to_java_bytes, table.to_java_bytes, SnapshotDescription::Type::SKIPFLUSH)
+            else
+               @admin.snapshot(snapshot_name.to_java_bytes, table.to_java_bytes)
+            end
+         end
+      end
     end
 
     #----------------------------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/hbase/blob/86e5db50/hbase-shell/src/main/ruby/shell/commands/snapshot.rb
----------------------------------------------------------------------
diff --git a/hbase-shell/src/main/ruby/shell/commands/snapshot.rb b/hbase-shell/src/main/ruby/shell/commands/snapshot.rb
index 62de845..15bf298 100644
--- a/hbase-shell/src/main/ruby/shell/commands/snapshot.rb
+++ b/hbase-shell/src/main/ruby/shell/commands/snapshot.rb
@@ -24,13 +24,13 @@ module Shell
 Take a snapshot of specified table. Examples:
 
   hbase> snapshot 'sourceTable', 'snapshotName'
-  hbase> snapshot 'namespace:sourceTable', 'snapshotName'
+  hbase> snapshot 'namespace:sourceTable', 'snapshotName', {SKIP_FLUSH => true}
 EOF
       end
 
-      def command(table, snapshot_name)
+      def command(table, snapshot_name, *args)
         format_simple_command do
-          admin.snapshot(table, snapshot_name)
+          admin.snapshot(table, snapshot_name, *args)
         end
       end
     end