You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2011/07/01 23:33:15 UTC

svn commit: r1142101 - in /hbase/trunk: ./ src/main/java/org/apache/hadoop/hbase/ src/main/java/org/apache/hadoop/hbase/master/ src/main/java/org/apache/hadoop/hbase/master/handler/ src/main/java/org/apache/hadoop/hbase/regionserver/ src/main/java/org/...

Author: stack
Date: Fri Jul  1 21:33:14 2011
New Revision: 1142101

URL: http://svn.apache.org/viewvc?rev=1142101&view=rev
Log:
HBASE-3968 HLog Pretty Printer

Added:
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogPrettyPrinter.java
Modified:
    hbase/trunk/CHANGES.txt
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/KeyValue.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
    hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogKey.java

Modified: hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hbase/trunk/CHANGES.txt?rev=1142101&r1=1142100&r2=1142101&view=diff
==============================================================================
--- hbase/trunk/CHANGES.txt (original)
+++ hbase/trunk/CHANGES.txt Fri Jul  1 21:33:14 2011
@@ -296,6 +296,7 @@ Release 0.91.0 - Unreleased
                (Mingjie Lai via garyh)
    HBASE-4036  Implementing a MultipleColumnPrefixFilter (Anirudh Todi)
    HBASE-4048  [Coprocessors] Support configuration of coprocessor at load time
+   HBASE-3968  HLog Pretty Printer (Riley Patterson)
 
   TASKS
    HBASE-3559  Move report of split to master OFF the heartbeat channel

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/KeyValue.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/KeyValue.java?rev=1142101&r1=1142100&r2=1142101&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/KeyValue.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/KeyValue.java Fri Jul  1 21:33:14 2011
@@ -24,6 +24,8 @@ import java.io.DataOutput;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Map;
 
 import com.google.common.primitives.Longs;
 import org.apache.commons.logging.Log;
@@ -613,6 +615,23 @@ public class KeyValue implements Writabl
    * @param l Length of key.
    * @return Key as a String.
    */
+  /**
+   * Produces a string map for this key/value pair. Useful for programmatic use
+   * and manipulation of the data stored in an HLogKey, for example, printing 
+   * as JSON. Values are left out due to their tendency to be large. If needed, 
+   * they can be added manually.
+   * 
+   * @return the Map<String,?> containing data from this key
+   */
+  public Map<String, Object> toStringMap() {
+    Map<String, Object> stringMap = new HashMap<String, Object>();
+    stringMap.put("row", Bytes.toStringBinary(getRow()));
+    stringMap.put("family", Bytes.toStringBinary(getFamily()));
+    stringMap.put("qualifier", Bytes.toStringBinary(getQualifier()));
+    stringMap.put("timestamp", getTimestamp());
+    return stringMap;
+  }
+
   public static String keyToString(final byte [] b, final int o, final int l) {
     if (b == null) return "";
     int rowlength = Bytes.toShort(b, o);

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java?rev=1142101&r1=1142100&r2=1142101&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java Fri Jul  1 21:33:14 2011
@@ -70,6 +70,7 @@ import org.apache.hadoop.hbase.util.Writ
 import org.apache.hadoop.hbase.zookeeper.ZKAssign;
 import org.apache.hadoop.hbase.zookeeper.ZKTable;
 import org.apache.hadoop.hbase.zookeeper.ZKUtil;
+import org.apache.hadoop.hbase.zookeeper.ZKUtil.NodeAndData;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperListener;
 import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
 import org.apache.hadoop.io.Writable;

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java?rev=1142101&r1=1142100&r2=1142101&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/CatalogJanitor.java Fri Jul  1 21:33:14 2011
@@ -44,6 +44,7 @@ import org.apache.hadoop.hbase.client.Re
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.Store;
 import org.apache.hadoop.hbase.regionserver.StoreFile;
+import org.apache.hadoop.hbase.util.Pair;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.Writables;
 
@@ -166,11 +167,11 @@ class CatalogJanitor extends Chore {
   throws IOException {
     boolean result = false;
     // Run checks on each daughter split.
-    boolean hasReferencesA =
+    Pair<Boolean, Boolean> a =
       checkDaughter(parent, rowContent, HConstants.SPLITA_QUALIFIER);
-    boolean hasReferencesB =
+    Pair<Boolean, Boolean> b =
       checkDaughter(parent, rowContent, HConstants.SPLITB_QUALIFIER);
-    if (!hasReferencesA && !hasReferencesB) {
+    if ((a.getFirst() && !a.getSecond()) && (b.getFirst() && !b.getSecond())) {
       LOG.debug("Deleting region " + parent.getRegionNameAsString() +
         " because daughter splits no longer hold references");
       // This latter regionOffline should not be necessary but is done for now
@@ -197,14 +198,26 @@ class CatalogJanitor extends Chore {
    * @param parent
    * @param rowContent
    * @param qualifier
-   * @return True if this daughter still has references to the parent.
+   * @return A pair where the first boolean says whether or not the daughter
+   * region directory exists in the filesystem and then the second boolean says
+   * whether the daughter has references to the parent.
    * @throws IOException
    */
-  boolean checkDaughter(final HRegionInfo parent,
+  Pair<Boolean, Boolean> checkDaughter(final HRegionInfo parent,
     final Result rowContent, final byte [] qualifier)
   throws IOException {
     HRegionInfo hri = getDaughterRegionInfo(rowContent, qualifier);
-    return hasReferences(parent, rowContent, hri, qualifier);
+    Pair<Boolean, Boolean> result =
+      checkDaughterInFs(parent, rowContent, hri, qualifier);
+    if (result.getFirst() && !result.getSecond()) {
+      // Remove daughter from the parent IFF the daughter region exists in FS.
+      // If there is no daughter region in the filesystem, must be because of
+      // a failed split.  The ServerShutdownHandler will do the fixup.  Don't
+      // do any deletes in here that could intefere with ServerShutdownHandler
+      // fixup
+      removeDaughterFromParent(parent, hri, qualifier);
+    }
+    return result;
   }
 
   /**
@@ -242,23 +255,35 @@ class CatalogJanitor extends Chore {
   /**
    * Checks if a daughter region -- either splitA or splitB -- still holds
    * references to parent.  If not, removes reference to the split from
-   * the parent meta region row so we don't check it any more.
+   * the parent meta region row so we don't check it any more.  Also checks
+   * daughter region exists in the filesytem.
    * @param parent Parent region name. 
    * @param rowContent Keyed content of the parent row in meta region.
    * @param split Which column family.
    * @param qualifier Which of the daughters to look at, splitA or splitB.
-   * @return True if still has references to parent.
+   * @return A pair where the first boolean says whether or not the daughter
+   * region directory exists in the filesystem and then the second boolean says
+   * whether the daughter has references to the parent.
    * @throws IOException
    */
-  boolean hasReferences(final HRegionInfo parent,
+  Pair<Boolean, Boolean> checkDaughterInFs(final HRegionInfo parent,
     final Result rowContent, final HRegionInfo split,
     final byte [] qualifier)
   throws IOException {
-    boolean result = false;
-    if (split == null)  return result;
+    boolean references = false;
+    boolean exists = false;
+    if (split == null)  {
+      return new Pair<Boolean, Boolean>(Boolean.FALSE, Boolean.FALSE);
+    }
     FileSystem fs = this.services.getMasterFileSystem().getFileSystem();
     Path rootdir = this.services.getMasterFileSystem().getRootDir();
     Path tabledir = new Path(rootdir, split.getTableNameAsString());
+    Path regiondir = new Path(tabledir, split.getEncodedName());
+    exists = fs.exists(regiondir);
+    if (!exists) {
+      LOG.warn("Daughter regiondir does not exist: " + regiondir.toString());
+      return new Pair<Boolean, Boolean>(exists, Boolean.FALSE);
+    }
     HTableDescriptor parentDescriptor = getTableDescriptor(parent.getTableName());
 
     for (HColumnDescriptor family: parentDescriptor.getFamilies()) {
@@ -275,18 +300,16 @@ class CatalogJanitor extends Chore {
       );
 
       if (ps != null && ps.length > 0) {
-        result = true;
+        references = true;
         break;
       }
     }
-    if (!result) {
-      removeDaughterFromParent(parent, split, qualifier);
-    }
-    return result;
+    return new Pair<Boolean, Boolean>(Boolean.valueOf(exists),
+      Boolean.valueOf(references));
   }
 
   private HTableDescriptor getTableDescriptor(byte[] tableName)
   throws TableExistsException, FileNotFoundException, IOException {
     return this.services.getTableDescriptors().get(Bytes.toString(tableName));
   }
-}
\ No newline at end of file
+}

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java?rev=1142101&r1=1142100&r2=1142101&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java Fri Jul  1 21:33:14 2011
@@ -236,11 +236,12 @@ public class ServerShutdownHandler exten
    */
   static void fixupDaughters(final Result result,
       final AssignmentManager assignmentManager,
-      final CatalogTracker catalogTracker) throws IOException {
+      final CatalogTracker catalogTracker)
+  throws IOException {
     fixupDaughter(result, HConstants.SPLITA_QUALIFIER, assignmentManager,
-        catalogTracker);
+      catalogTracker);
     fixupDaughter(result, HConstants.SPLITB_QUALIFIER, assignmentManager,
-        catalogTracker);
+      catalogTracker);
   }
 
   /**
@@ -281,8 +282,8 @@ public class ServerShutdownHandler exten
   }
 
   /**
-   * Look for presence of the daughter OR of a split of the daughter. Daughter
-   * could have been split over on regionserver before a run of the
+   * Look for presence of the daughter OR of a split of the daughter in .META.
+   * Daughter could have been split over on regionserver before a run of the
    * catalogJanitor had chance to clear reference from parent.
    * @param daughter Daughter region to search for.
    * @throws IOException 
@@ -344,4 +345,4 @@ public class ServerShutdownHandler exten
       return false;
     }
   }
-}
+}
\ No newline at end of file

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java?rev=1142101&r1=1142100&r2=1142101&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitRequest.java Fri Jul  1 21:33:14 2011
@@ -62,15 +62,20 @@ class SplitRequest implements Runnable {
         st.execute(this.server, this.server);
       } catch (Exception e) {
         try {
-          LOG.info("Running rollback of failed split of " + parent + "; "
-              + e.getMessage());
-          st.rollback(this.server, this.server);
-          LOG.info("Successful rollback of failed split of " + parent);
+          LOG.info("Running rollback/cleanup of failed split of " +
+            parent.getRegionNameAsString() + "; " + e.getMessage());
+          if (st.rollback(this.server, this.server)) {
+            LOG.info("Successful rollback of failed split of " +
+              parent.getRegionNameAsString());
+          } else {
+            this.server.abort("Abort; we got an error after point-of-no-return");
+          }
         } catch (RuntimeException ee) {
-          // If failed rollback, kill server to avoid having a hole in table.
-          LOG.info("Failed rollback of failed split of "
-              + parent.getRegionNameAsString() + " -- aborting server", ee);
-          this.server.abort("Failed split");
+          String msg = "Failed rollback of failed split of " +
+            parent.getRegionNameAsString() + " -- aborting server";
+          // If failed rollback, kill this server to avoid having a hole in table.
+          LOG.info(msg, ee);
+          this.server.abort(msg);
         }
         return;
       }

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java?rev=1142101&r1=1142100&r2=1142101&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/SplitTransaction.java Fri Jul  1 21:33:14 2011
@@ -60,7 +60,7 @@ import com.google.common.util.concurrent
 
 /**
  * Executes region split as a "transaction".  Call {@link #prepare()} to setup
- * the transaction, {@link #execute(OnlineRegions)} to run the transaction and
+ * the transaction, {@link #execute(Server, RegionServerServices)} to run the transaction and
  * {@link #rollback(OnlineRegions)} to cleanup if execute fails.
  *
  * <p>Here is an example of how you would use this class:
@@ -68,10 +68,10 @@ import com.google.common.util.concurrent
  *  SplitTransaction st = new SplitTransaction(this.conf, parent, midKey)
  *  if (!st.prepare()) return;
  *  try {
- *    st.execute(myOnlineRegions);
+ *    st.execute(server, services);
  *  } catch (IOException ioe) {
  *    try {
- *      st.rollback(myOnlineRegions);
+ *      st.rollback(server, services);
  *      return;
  *    } catch (RuntimeException e) {
  *      myAbortable.abort("Failed split, abort");
@@ -127,7 +127,12 @@ public class SplitTransaction {
     /**
      * Started in on the creation of the second daughter region.
      */
-    STARTED_REGION_B_CREATION
+    STARTED_REGION_B_CREATION,
+    /**
+     * Point of no return.
+     * If we got here, then transaction is not recoverable.
+     */
+    PONR
   }
 
   /*
@@ -284,9 +289,13 @@ public class SplitTransaction {
         this.parent.getRegionInfo(), a.getRegionInfo(), b.getRegionInfo());
     }
 
-    // This is the point of no return.  We are committed to the split now.  We
-    // have still the daughter regions to open but meta has been changed.
-    // If we fail from here on out, we cannot rollback so, we'll just abort.
+    // This is the point of no return.  Adding edits to .META. can fail in
+    // various interesting ways the most interesting of which is a timeout
+    // BUT the edits all went through (See HBASE-3872).
+    this.journal.add(JournalEntry.PONR);
+
+    // TODO: Could we be smarter about the sequence in which we do these steps?
+
     if (!testing) {
       // Open daughters in parallel.
       DaughterOpener aOpener = new DaughterOpener(server, services, a);
@@ -297,7 +306,8 @@ public class SplitTransaction {
         aOpener.join();
         bOpener.join();
       } catch (InterruptedException e) {
-        server.abort("Exception running daughter opens", e);
+        Thread.currentThread().interrupt();
+        throw new IOException("Interrupted " + e.getMessage());
       }
     }
 
@@ -598,13 +608,15 @@ public class SplitTransaction {
   }
 
   /**
-   * @param or Object that can online/offline parent region.  Can be passed null
-   * by unit tests.
-   * @return The region we were splitting
+   * @param server Hosting server instance.
+   * @param services
    * @throws IOException If thrown, rollback failed.  Take drastic action.
+   * @return True if we successfully rolled back, false if we got to the point
+   * of no return and so now need to abort the server to minimize damage.
    */
-  public void rollback(final Server server, final OnlineRegions or)
+  public boolean rollback(final Server server, final RegionServerServices services)
   throws IOException {
+    boolean result = true;
     FileSystem fs = this.parent.getFilesystem();
     ListIterator<JournalEntry> iterator =
       this.journal.listIterator(this.journal.size());
@@ -642,13 +654,19 @@ public class SplitTransaction {
         break;
 
       case OFFLINED_PARENT:
-        if (or != null) or.addToOnlineRegions(this.parent);
+        services.addToOnlineRegions(this.parent);
+        break;
+
+      case PONR:
+        // We got to the point-of-no-return so we need to abort after cleanup
+        result = false;
         break;
 
       default:
         throw new RuntimeException("Unhandled journal entry: " + je);
       }
     }
+    return result;
   }
 
   HRegionInfo getFirstDaughter() {

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java?rev=1142101&r1=1142100&r2=1142101&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLog.java Fri Jul  1 21:33:14 2011
@@ -33,6 +33,7 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Map;
 import java.util.NavigableSet;
+import java.util.Arrays;
 import java.util.SortedMap;
 import java.util.TreeMap;
 import java.util.TreeSet;
@@ -1441,30 +1442,25 @@ public class HLog implements Syncable {
       usage();
       System.exit(-1);
     }
-    boolean dump = true;
-    if (args[0].compareTo("--dump") != 0) {
-      if (args[0].compareTo("--split") == 0) {
-        dump = false;
-      } else {
-        usage();
-        System.exit(-1);
-      }
-    }
-    Configuration conf = HBaseConfiguration.create();
-    for (int i = 1; i < args.length; i++) {
-      try {
-        conf.set("fs.default.name", args[i]);
-        conf.set("fs.defaultFS", args[i]);
-        Path logPath = new Path(args[i]);
-        if (dump) {
-          dump(conf, logPath);
-        } else {
+    // either dump using the HLogPrettyPrinter or split, depending on args
+    if (args[0].compareTo("--dump") == 0) {
+      HLogPrettyPrinter.run(Arrays.copyOfRange(args, 1, args.length));
+    } else if (args[0].compareTo("--split") == 0) {
+      Configuration conf = HBaseConfiguration.create();
+      for (int i = 1; i < args.length; i++) {
+        try {
+          conf.set("fs.default.name", args[i]);
+          conf.set("fs.defaultFS", args[i]);
+          Path logPath = new Path(args[i]);
           split(conf, logPath);
+        } catch (Throwable t) {
+          t.printStackTrace(System.err);
+          System.exit(-1);
         }
-      } catch (Throwable t) {
-        t.printStackTrace(System.err);
-        System.exit(-1);
       }
+    } else {
+      usage();
+      System.exit(-1);
     }
   }
 }

Modified: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogKey.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogKey.java?rev=1142101&r1=1142100&r2=1142101&view=diff
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogKey.java (original)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogKey.java Fri Jul  1 21:33:14 2011
@@ -23,6 +23,8 @@ import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.EOFException;
 import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
 
 import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -121,6 +123,21 @@ public class HLogKey implements Writable
       logSeqNum;
   }
 
+  /**
+   * Produces a string map for this key. Useful for programmatic use and
+   * manipulation of the data stored in an HLogKey, for example, printing 
+   * as JSON.
+   * 
+   * @return a Map containing data from this key
+   */
+  public Map<String, Object> toStringMap() {
+    Map<String, Object> stringMap = new HashMap<String, Object>();
+    stringMap.put("table", Bytes.toStringBinary(tablename));
+    stringMap.put("region", Bytes.toStringBinary(encodedRegionName));
+    stringMap.put("sequence", logSeqNum);
+    return stringMap;
+  }
+
   @Override
   public boolean equals(Object obj) {
     if (this == obj) {

Added: hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogPrettyPrinter.java
URL: http://svn.apache.org/viewvc/hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogPrettyPrinter.java?rev=1142101&view=auto
==============================================================================
--- hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogPrettyPrinter.java (added)
+++ hbase/trunk/src/main/java/org/apache/hadoop/hbase/regionserver/wal/HLogPrettyPrinter.java Fri Jul  1 21:33:14 2011
@@ -0,0 +1,366 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver.wal;
+
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Date;
+import java.util.List;
+
+import org.apache.commons.cli.CommandLine;
+import org.apache.commons.cli.CommandLineParser;
+import org.apache.commons.cli.HelpFormatter;
+import org.apache.commons.cli.Options;
+import org.apache.commons.cli.ParseException;
+import org.apache.commons.cli.PosixParser;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.regionserver.wal.HLog.Reader;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.codehaus.jettison.json.JSONArray;
+import org.codehaus.jettison.json.JSONException;
+import org.codehaus.jettison.json.JSONObject;
+
+/**
+ * HLogPrettyPrinter prints the contents of a given HLog with a variety of
+ * options affecting formatting and extent of content.
+ * 
+ * It targets two usage cases: pretty printing for ease of debugging directly by
+ * humans, and JSON output for consumption by monitoring and/or maintenance
+ * scripts.
+ * 
+ * It can filter by row, region, or sequence id.
+ * 
+ * It can also toggle output of values.
+ * 
+ */
+public class HLogPrettyPrinter {
+  private boolean outputValues;
+  private boolean outputJSON;
+  // The following enable filtering by sequence, region, and row, respectively
+  private long sequence;
+  private String region;
+  private String row;
+  // enable in order to output a single list of transactions from several files
+  private boolean persistentOutput;
+  private boolean firstTxn;
+  // useful for programatic capture of JSON output
+  private PrintStream out;
+
+  /**
+   * Basic constructor that simply initializes values to reasonable defaults.
+   */
+  public HLogPrettyPrinter() {
+    outputValues = false;
+    outputJSON = false;
+    sequence = -1;
+    region = null;
+    row = null;
+    persistentOutput = false;
+    firstTxn = true;
+    out = System.out;
+  }
+
+  /**
+   * Fully specified constructor.
+   * 
+   * @param outputValues
+   *          when true, enables output of values along with other log
+   *          information
+   * @param outputJSON
+   *          when true, enables output in JSON format rather than a
+   *          "pretty string"
+   * @param sequence
+   *          when nonnegative, serves as a filter; only log entries with this
+   *          sequence id will be printed
+   * @param region
+   *          when not null, serves as a filter; only log entries from this
+   *          region will be printed
+   * @param row
+   *          when not null, serves as a filter; only log entries from this row
+   *          will be printed
+   * @param persistentOutput
+   *          keeps a single list running for multiple files. if enabled, the
+   *          endPersistentOutput() method must be used!
+   * @param out
+   *          Specifies an alternative to stdout for the destination of this 
+   *          PrettyPrinter's output.
+   */
+  public HLogPrettyPrinter(boolean outputValues, boolean outputJSON,
+      long sequence, String region, String row, boolean persistentOutput,
+      PrintStream out) {
+    this.outputValues = outputValues;
+    this.outputJSON = outputJSON;
+    this.sequence = sequence;
+    this.region = region;
+    this.row = row;
+    this.persistentOutput = persistentOutput;
+    if (persistentOutput) {
+      beginPersistentOutput();
+    }
+    this.out = out;
+    this.firstTxn = true;
+  }
+
+  /**
+   * turns value output on
+   */
+  public void enableValues() {
+    outputValues = true;
+  }
+
+  /**
+   * turns value output off
+   */
+  public void disableValues() {
+    outputValues = false;
+  }
+
+  /**
+   * turns JSON output on
+   */
+  public void enableJSON() {
+    outputJSON = true;
+  }
+
+  /**
+   * turns JSON output off, and turns on "pretty strings" for human consumption
+   */
+  public void disableJSON() {
+    outputJSON = false;
+  }
+
+  /**
+   * sets the region by which output will be filtered
+   * 
+   * @param sequence
+   *          when nonnegative, serves as a filter; only log entries with this
+   *          sequence id will be printed
+   */
+  public void setSequenceFilter(long sequence) {
+    this.sequence = sequence;
+  }
+
+  /**
+   * sets the region by which output will be filtered
+   * 
+   * @param region
+   *          when not null, serves as a filter; only log entries from this
+   *          region will be printed
+   */
+  public void setRegionFilter(String region) {
+    this.region = region;
+  }
+
+  /**
+   * sets the region by which output will be filtered
+   * 
+   * @param row
+   *          when not null, serves as a filter; only log entries from this row
+   *          will be printed
+   */
+  public void setRowFilter(String row) {
+    this.row = row;
+  }
+
+  /**
+   * enables output as a single, persistent list. at present, only relevant in
+   * the case of JSON output.
+   */
+  public void beginPersistentOutput() {
+    if (persistentOutput)
+      return;
+    persistentOutput = true;
+    firstTxn = true;
+    if (outputJSON)
+      out.print("[");
+  }
+
+  /**
+   * ends output of a single, persistent list. at present, only relevant in the
+   * case of JSON output.
+   */
+  public void endPersistentOutput() {
+    if (!persistentOutput)
+      return;
+    persistentOutput = false;
+    if (outputJSON)
+      out.print("]");
+  }
+
+  /**
+   * reads a log file and outputs its contents, one transaction at a time, as
+   * specified by the currently configured options
+   * 
+   * @param conf
+   *          the HBase configuration relevant to this log file
+   * @param p
+   *          the path of the log file to be read
+   * @throws IOException
+   *           may be unable to access the configured filesystem or requested
+   *           file.
+   */
+  public void processFile(final Configuration conf, final Path p)
+      throws IOException {
+    FileSystem fs = FileSystem.get(conf);
+    if (!fs.exists(p)) {
+      throw new FileNotFoundException(p.toString());
+    }
+    if (!fs.isFile(p)) {
+      throw new IOException(p + " is not a file");
+    }
+    if (outputJSON && !persistentOutput) {
+      out.print("[");
+      firstTxn = true;
+    }
+    Reader log = HLog.getReader(fs, p, conf);
+    try {
+      HLog.Entry entry;
+      while ((entry = log.next()) != null) {
+        HLogKey key = entry.getKey();
+        WALEdit edit = entry.getEdit();
+        // begin building a transaction structure
+        JSONObject txn = new JSONObject(key.toStringMap());
+        // check output filters
+        if (sequence >= 0 && ((Long) txn.get("sequence")) != sequence)
+          continue;
+        if (region != null && !((String) txn.get("region")).equals(region))
+          continue;
+        // initialize list into which we will store atomic actions
+        JSONArray actions = new JSONArray();
+        for (KeyValue kv : edit.getKeyValues()) {
+          // add atomic operation to txn
+          JSONObject op = new JSONObject(kv.toStringMap());
+          if (outputValues)
+            op.put("value", Bytes.toStringBinary(kv.getValue()));
+          if (row == null || ((String) op.get("row")).equals(row))
+            actions.put(op);
+        }
+        if (actions.length() == 0)
+          continue;
+        txn.put("actions", actions);
+        if (outputJSON) {
+          // JSON output is a straightforward "toString" on the txn object
+          if (firstTxn)
+            firstTxn = false;
+          else
+            out.print(",");
+          out.print(txn);
+        } else {
+          // Pretty output, complete with indentation by atomic action
+          out.println("Sequence " + txn.getLong("sequence") + " "
+              + "from region " + txn.getString("region") + " " + "in table "
+              + txn.getString("table"));
+          for (int i = 0; i < actions.length(); i++) {
+            JSONObject op = actions.getJSONObject(i);
+            out.println("  Action:");
+            out.println("    row: " + op.getString("row"));
+            out.println("    column: " + op.getString("family") + ":"
+                + op.getString("qualifier"));
+            out.println("    at time: "
+                + (new Date(op.getLong("timestamp"))));
+            if (outputValues)
+              out.println("    value: " + op.get("value"));
+          }
+        }
+      }
+    } catch (JSONException e) {
+      e.printStackTrace();
+    } finally {
+      log.close();
+    }
+    if (outputJSON && !persistentOutput) {
+      out.print("]");
+    }
+  }
+
+  /**
+   * Pass one or more log file names and formatting options and it will dump out
+   * a text version of the contents on <code>stdout</code>.
+   * 
+   * @param args
+   *          Command line arguments
+   * @throws IOException
+   *           Thrown upon file system errors etc.
+   * @throws ParseException
+   *           Thrown if command-line parsing fails.
+   */
+  public static void run(String[] args) throws IOException {
+    // create options
+    Options options = new Options();
+    options.addOption("h", "help", false, "Output help message");
+    options.addOption("j", "json", false, "Output JSON");
+    options.addOption("p", "printvals", false, "Print values");
+    options.addOption("r", "region", true,
+        "Region to filter by. Pass region name; e.g. '.META.,,1'");
+    options.addOption("s", "sequence", true,
+        "Sequence to filter by. Pass sequence number.");
+    options.addOption("w", "row", true, "Row to filter by. Pass row name.");
+
+    HLogPrettyPrinter printer = new HLogPrettyPrinter();
+    CommandLineParser parser = new PosixParser();
+    List files = null;
+    try {
+      CommandLine cmd = parser.parse(options, args);
+      files = cmd.getArgList();
+      if (files.size() == 0 || cmd.hasOption("h")) {
+        HelpFormatter formatter = new HelpFormatter();
+        formatter.printHelp("HFile filename(s) ", options, true);
+        System.exit(-1);
+      }
+      // configure the pretty printer using command line options
+      if (cmd.hasOption("p"))
+        printer.enableValues();
+      if (cmd.hasOption("j"))
+        printer.enableJSON();
+      if (cmd.hasOption("r"))
+        printer.setRegionFilter(cmd.getOptionValue("r"));
+      if (cmd.hasOption("s"))
+        printer.setSequenceFilter(Long.parseLong(cmd.getOptionValue("s")));
+      if (cmd.hasOption("w"))
+        printer.setRowFilter(cmd.getOptionValue("w"));
+    } catch (ParseException e) {
+      e.printStackTrace();
+      HelpFormatter formatter = new HelpFormatter();
+      formatter.printHelp("HFile filename(s) ", options, true);
+      System.exit(-1);
+    }
+    // get configuration, file system, and process the given files
+    Configuration conf = HBaseConfiguration.create();
+    conf.set("fs.defaultFS",
+        conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
+    conf.set("fs.default.name",
+        conf.get(org.apache.hadoop.hbase.HConstants.HBASE_DIR));
+    // begin output
+    printer.beginPersistentOutput();
+    for (Object f : files) {
+      Path file = new Path((String) f);
+      FileSystem fs = file.getFileSystem(conf);
+      if (!fs.exists(file)) {
+        System.err.println("ERROR, file doesnt exist: " + file);
+        return;
+      }
+      printer.processFile(conf, file);
+    }
+    printer.endPersistentOutput();
+  }
+}