You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2007/05/07 21:58:57 UTC

svn commit: r535970 [1/4] - in /lucene/hadoop/trunk: ./ src/contrib/hbase/bin/ src/contrib/hbase/conf/ src/contrib/hbase/src/java/org/apache/hadoop/hbase/ src/contrib/hbase/src/test/org/apache/hadoop/hbase/

Author: cutting
Date: Mon May  7 12:58:53 2007
New Revision: 535970

URL: http://svn.apache.org/viewvc?view=rev&rev=535970
Log:
HADOOP-1325.  First complete, functioning version of HBase.  Contributed by Jim Kellerman.

Added:
    lucene/hadoop/trunk/src/contrib/hbase/bin/
    lucene/hadoop/trunk/src/contrib/hbase/bin/hbase
    lucene/hadoop/trunk/src/contrib/hbase/bin/hbase-config.sh
    lucene/hadoop/trunk/src/contrib/hbase/conf/regionservers
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HInternalScannerInterface.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegiondirReader.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/package.html
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestGet.java
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestHBaseCluster.java
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestScanner.java
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestToString.java
Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-default.xml
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HBaseConfiguration.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConstants.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HGlobals.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogEdit.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogKey.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMaster.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMasterInterface.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMemcache.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HMsg.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegion.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionInfo.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionInterface.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HRegionServer.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HServerAddress.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HServerInfo.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStore.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HStoreKey.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HTableDescriptor.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/LabelledData.java
    lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/Leases.java
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/MiniHBaseCluster.java
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestHMemcache.java
    lucene/hadoop/trunk/src/contrib/hbase/src/test/org/apache/hadoop/hbase/TestHRegion.java

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?view=diff&rev=535970&r1=535969&r2=535970
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Mon May  7 12:58:53 2007
@@ -336,6 +336,9 @@
     with a random, exponentially increasing backoff time, to avoid
     overloading the namenode on, e.g., job start.  (Hairong Kuang via cutting)
 
+100. HADOOP-1325.  First complete, functioning version of HBase.
+    (Jim Kellerman via cutting)
+
 
 Release 0.12.3 - 2007-04-06
 

Added: lucene/hadoop/trunk/src/contrib/hbase/bin/hbase
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/bin/hbase?view=auto&rev=535970
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/bin/hbase (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/bin/hbase Mon May  7 12:58:53 2007
@@ -0,0 +1,207 @@
+#! /bin/sh
+# 
+# The hbase command script.  Based on the hadoop command script putting
+# in hbase classes, libs and configurations ahead of hadoop's.
+#
+# TODO: Narrow the amount of duplicated code.
+#
+# Environment Variables:
+#
+#   JAVA_HOME        The java implementation to use.  Overrides JAVA_HOME.
+#
+#   HBASE_HEAPSIZE   The maximum amount of heap to use, in MB. 
+#                    Default is 1000.
+#
+#   HBASE_OPTS       Extra Java runtime options.
+#
+#   HBASE_CONF_DIR   Alternate conf dir. Default is ${HBASE_HOME}/conf.
+#
+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
+#
+#   HADOOP_HOME      Hadoop home directory.
+#
+#   HADOOP_ROOT_LOGGER The root appender. Default is INFO,console
+#
+
+bin=`dirname "$0"`
+bin=`cd "$bin"; pwd`
+
+# This will set HBASE_HOME, HADOOP_HOME, etc.
+. "$bin"/hbase-config.sh
+
+cygwin=false
+case "`uname`" in
+CYGWIN*) cygwin=true;;
+esac
+
+# if no args specified, show usage
+if [ $# = 0 ]; then
+  echo "Usage: hbase [--hadoop=hadoopdir] <command>"
+  echo "where <command> is one of:"
+  echo "  client           run a hbase client"
+  echo "  reader           run a hbase region directory reader"
+  echo "  logreader        output content of a logfile"
+  echo "  master           run a hbase HMaster node" 
+  echo "  regionserver     run a hbase HRegionServer node" 
+  echo " or"
+  echo "  CLASSNAME        run the class named CLASSNAME"
+  echo "Most commands print help when invoked w/o parameters."
+  exit 1
+fi
+
+# get arguments
+COMMAND=$1
+shift
+
+# Source the hadoop-env.sh.  Will have JAVA_HOME defined. There is no
+# hbase-env.sh as yet.
+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
+  . "${HADOOP_CONF_DIR}/hadoop-env.sh"
+fi
+
+# some Java parameters
+if [ "$JAVA_HOME" != "" ]; then
+  #echo "run java in $JAVA_HOME"
+  JAVA_HOME=$JAVA_HOME
+fi
+  
+if [ "$JAVA_HOME" = "" ]; then
+  echo "Error: JAVA_HOME is not set."
+  exit 1
+fi
+
+JAVA=$JAVA_HOME/bin/java
+JAVA_HEAP_MAX=-Xmx1000m 
+
+# check envvars which might override default args
+if [ "$HBASE_HEAPSIZE" != "" ]; then
+  #echo "run with heapsize $HBASE_HEAPSIZE"
+  JAVA_HEAP_MAX="-Xmx""$HBASE_HEAPSIZE""m"
+  #echo $JAVA_HEAP_MAX
+fi
+
+# CLASSPATH initially contains $HBASE_CONF_DIR
+# Add HADOOP_CONF_DIR if its been defined.
+CLASSPATH="${HBASE_CONF_DIR}"
+if [ ! "$HADOOP_CONF_DIR" = "" ]; then
+    CLASSPATH="${CLASSPATH}:${HADOOP_CONF_DIR}"
+fi
+CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
+
+# for developers, add hbase and hadoop classes to CLASSPATH
+if [ -d "$HADOOP_HOME/build/contrib/hbase/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/contrib/hbase/classes
+fi
+if [ -d "$HADOOP_HOME/build/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/classes
+fi
+if [ -d "$HADOOP_HOME/build/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build
+fi
+if [ -d "$HADOOP_HOME/build/test/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/test/classes
+fi
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# for releases, add core hbase, hadoop jar & webapps to CLASSPATH
+for f in  "$HBASE_HOME/hadoop-hbase-*.jar"; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+for f in  "$HADOOP_HOME/build/contrib/hbase/hadoop-hbase-*.jar"; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+if [ -d "$HADOOP_HOME/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME
+fi
+for f in $HADOOP_HOME/hadoop-*-core.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# add hbase and hadoop libs to CLASSPATH
+for f in $HBASE_HOME/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+for f in $HADOOP_HOME/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+for f in $HADOOP_HOME/lib/jetty-ext/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+
+# default log directory & file
+# TODO: Should we log to hadoop or under hbase?
+if [ "$HADOOP_LOG_DIR" = "" ]; then
+  HADOOP_LOG_DIR="$HADOOP_HOME/logs"
+fi
+if [ "$HADOOP_LOGFILE" = "" ]; then
+  HADOOP_LOGFILE='hbase.log'
+fi
+
+# cygwin path translation
+if $cygwin; then
+  CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+  HADOOP_HOME=`cygpath -d "$HADOOP_HOME"`
+  HBASE_HOME=`cygpath -d "$HBASE_HOME"`
+  HADOOP_LOG_DIR=`cygpath -d "$HADOOP_LOG_DIR"`
+fi
+
+# TODO: Can this be put into separate script so don't have to duplicate
+# hadoop command script code?
+# setup 'java.library.path' for native-hadoop code if necessary
+JAVA_LIBRARY_PATH=''
+if [ -d "${HADOOP_HOME}/build/native" -o -d "${HADOOP_HOME}/lib/native" ]; then
+  JAVA_PLATFORM=`CLASSPATH=${CLASSPATH} ${JAVA} org.apache.hadoop.util.PlatformName | sed -e "s/ /_/g"`
+  
+  if [ -d "$HADOOP_HOME/build/native" ]; then
+    JAVA_LIBRARY_PATH=${HADOOP_HOME}/build/native/${JAVA_PLATFORM}/lib
+  fi
+  
+  if [ -d "${HADOOP_HOME}/lib/native" ]; then
+    if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
+      JAVA_LIBRARY_PATH=${JAVA_LIBRARY_PATH}:${HADOOP_HOME}/lib/native/${JAVA_PLATFORM}
+    else
+      JAVA_LIBRARY_PATH=${HADOOP_HOME}/lib/native/${JAVA_PLATFORM}
+    fi
+  fi
+fi
+
+# cygwin path translation
+if $cygwin; then
+  JAVA_LIBRARY_PATH=`cygpath -p "$JAVA_LIBRARY_PATH"`
+fi
+ 
+# restore ordinary behaviour
+unset IFS
+
+# figure out which class to run
+if [ "$COMMAND" = "client" ] ; then
+  CLASS='org.apache.hadoop.hbase.HClient'
+elif [ "$COMMAND" = "reader" ] ; then
+  CLASS='org.apache.hadoop.hbase.HRegiondirReader'
+elif [ "$COMMAND" = "logreader" ] ; then
+  CLASS='org.apache.hadoop.hbase.HLog'
+elif [ "$COMMAND" = "master" ] ; then
+  CLASS='org.apache.hadoop.hbase.HMaster'
+elif [ "$COMMAND" = "regionserver" ] ; then
+  CLASS='org.apache.hadoop.hbase.HRegionServer'
+else
+  CLASS=$COMMAND
+fi
+
+
+HBASE_OPTS="$HBASE_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
+HBASE_OPTS="$HBASE_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
+HBASE_OPTS="$HBASE_OPTS -Dhadoop.home.dir=$HADOOP_HOME"
+HBASE_OPTS="$HBASE_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
+HBASE_OPTS="$HBASE_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
+HBASE_OPTS="$HBASE_OPTS -Dhbase.home.dir=$HBASE_HOME"
+if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
+  HBASE_OPTS="$HBASE_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
+fi  
+
+# run it
+exec "$JAVA" $JAVA_HEAP_MAX $HBASE_OPTS -classpath "$CLASSPATH" $CLASS "$@"

Added: lucene/hadoop/trunk/src/contrib/hbase/bin/hbase-config.sh
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/bin/hbase-config.sh?view=auto&rev=535970
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/bin/hbase-config.sh (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/bin/hbase-config.sh Mon May  7 12:58:53 2007
@@ -0,0 +1,62 @@
+# included in all the hbase scripts with source command
+# should not be executable directly
+# also should not be passed any arguments, since we need original $*
+
+# resolve links - $0 may be a softlink
+
+this="$0"
+while [ -h "$this" ]; do
+  ls=`ls -ld "$this"`
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '.*/.*' > /dev/null; then
+    this="$link"
+  else
+    this=`dirname "$this"`/"$link"
+  fi
+done
+
+# convert relative path to absolute path
+bin=`dirname "$this"`
+script=`basename "$this"`
+bin=`cd "$bin"; pwd`
+this="$bin/$script"
+
+# the root of the hbase installation
+export HBASE_HOME=`dirname "$this"`/..
+
+#check to see if the conf dir or hadoop home are given as an optional arguments
+while [ $# -gt 1 ]
+do
+  case $1 in
+    --config=*)
+        HADOOP_CONF_DIR=`echo $1|sed 's/[^=]*=\(.*\)/\1/'`
+        shift
+      ;;
+    --hbaseconfig=*)
+        HBASE_CONF_DIR=`echo $1|sed 's/[^=]*=\(.*\)/\1/'`
+        shift
+      ;;
+
+    --hadoop=*)
+        HADOOP_HOME=`echo $1|sed 's/[^=]*=\(.*\)/\1/'`
+        shift
+      ;;
+    --hosts=*)
+        regionservers=`echo $1|sed 's/[^=]*=\(.*\)/\1/'`
+        shift
+      ;;
+
+      *)
+        break
+      ;; 
+  esac
+done
+ 
+# If no hadoop home specified, then we assume its above this directory.
+HADOOP_HOME="${HADOOP_HOME:-$HBASE_HOME/../../../}"
+# Allow alternate hadoop conf dir location.
+HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_HOME/conf}"
+# Allow alternate hbase conf dir location.
+HBASE_CONF_DIR="${HBASE_CONF_DIR:-$HBASE_HOME/conf}"
+# List of hbase regions servers.
+HBASE_REGIONSERVERS="${HBASE_REGIONSERVERS:-$HBASE_HOME/conf/regionservers}"

Modified: lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-default.xml
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-default.xml?view=diff&rev=535970&r1=535969&r2=535970
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-default.xml (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/conf/hbase-default.xml Mon May  7 12:58:53 2007
@@ -42,7 +42,43 @@
     <name>hbase.master.meta.thread.rescanfrequency</name>
     <value>60000</value>
     <description>How long the HMaster sleeps (in milliseconds) between scans of
-    the META table.
+    the root and meta tables.
     </description>
   </property>
+  <property>
+    <name>hbase.master.lease.period</name>
+    <value>30000</value>
+    <description>HMaster server lease period in milliseconds. Default is
+    30 seconds.</description>
+  </property>
+  <property>
+    <name>hbase.server.thread.wakefrequency</name>
+    <value>10000</value>
+    <description>Time to sleep in between searches for work (in milliseconds).
+    Used as sleep interval by service threads such as META scanner and log roller.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.lease.period</name>
+    <value>30000</value>
+    <description>HRegion server lease period in milliseconds. Default is
+    30 seconds.</description>
+  </property>
+  <property>
+    <name>hbase.regionserver.handler.count</name>
+    <value>10</value>
+    <description>Count of RPC Server instances spun up on RegionServers
+    Same property is used by the HMaster for count of master handlers.
+    Default is 10.
+    </description>
+  </property>
+  <property>
+    <name>hbase.regionserver.msginterval</name>
+    <value>15000</value>
+    <description>Interval between messages from the RegionServer to HMaster
+    in milliseconds.  Default is 15. Set this value low if you want unit
+    tests to be responsive.
+    </description>
+  </property>
+
 </configuration>

Added: lucene/hadoop/trunk/src/contrib/hbase/conf/regionservers
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/conf/regionservers?view=auto&rev=535970
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/conf/regionservers (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/conf/regionservers Mon May  7 12:58:53 2007
@@ -0,0 +1 @@
+localhost

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java?view=diff&rev=535970&r1=535969&r2=535970
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HAbstractScanner.java Mon May  7 12:58:53 2007
@@ -30,7 +30,7 @@
  * Abstract base class that implements the HScannerInterface.
  * Used by the concrete HMemcacheScanner and HStoreScanners
  ******************************************************************************/
-public abstract class HAbstractScanner implements HScannerInterface {
+public abstract class HAbstractScanner implements HInternalScannerInterface {
 
   // Pattern to determine if a column key is a regex
 
@@ -51,6 +51,7 @@
   // 3. Simple match: compare column family + column key literally
   
   private class ColumnMatcher {
+    private boolean wildCardmatch;
     private MATCH_TYPE matchType;
     private String family;
     private Pattern columnMatcher;
@@ -69,14 +70,17 @@
         if(columnkey == null || columnkey.length() == 0) {
           this.matchType = MATCH_TYPE.FAMILY_ONLY;
           this.family = column.substring(0, colpos);
+          this.wildCardmatch = true;
 
         } else if(isRegexPattern.matcher(columnkey).matches()) {
           this.matchType = MATCH_TYPE.REGEX;
           this.columnMatcher = Pattern.compile(column);
+          this.wildCardmatch = true;
 
         } else {
           this.matchType = MATCH_TYPE.SIMPLE;
           this.col = col;
+          this.wildCardmatch = false;
         }
       } catch(Exception e) {
         throw new IOException("Column: " + column + ": " + e.getMessage());
@@ -99,8 +103,12 @@
         throw new IOException("Invalid match type: " + this.matchType);
       }
     }
+    
+    boolean isWildCardMatch() {
+      return this.wildCardmatch;
+    }
   }
-  
+
   protected TreeMap<Text, Vector<ColumnMatcher>> okCols;        // Holds matchers for each column family 
   
   protected boolean scannerClosed = false;                      // True when scanning is done
@@ -109,14 +117,17 @@
   protected BytesWritable vals[];                               // Values that correspond to those keys
   
   protected long timestamp;                                     // The timestamp to match entries against
+  private boolean wildcardMatch;
+  private boolean multipleMatchers;
   
   protected DataOutputBuffer outbuf = new DataOutputBuffer();
   protected DataInputBuffer inbuf = new DataInputBuffer();
 
   /** Constructor for abstract base class */
   HAbstractScanner(long timestamp, Text[] targetCols) throws IOException {
-    
     this.timestamp = timestamp;
+    this.wildcardMatch = false;
+    this.multipleMatchers = false;
     this.okCols = new TreeMap<Text, Vector<ColumnMatcher>>();
     for(int i = 0; i < targetCols.length; i++) {
       Text family = HStoreKey.extractFamily(targetCols[i]);
@@ -124,7 +135,14 @@
       if(matchers == null) {
         matchers = new Vector<ColumnMatcher>();
       }
-      matchers.add(new ColumnMatcher(targetCols[i]));
+      ColumnMatcher matcher = new ColumnMatcher(targetCols[i]);
+      if(matcher.isWildCardMatch()) {
+        this.wildcardMatch = true;
+      }
+      matchers.add(matcher);
+      if(matchers.size() > 1) {
+        this.multipleMatchers = true;
+      }
       okCols.put(family, matchers);
     }
   }
@@ -170,6 +188,19 @@
   /** Mechanism used to shut down the whole scan */
   public abstract void close() throws IOException;
 
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.hbase.HInternalScannerInterface#isWildcardScanner()
+   */
+  public boolean isWildcardScanner() {
+    return this.wildcardMatch;
+  }
+  
+  /* (non-Javadoc)
+   * @see org.apache.hadoop.hbase.HInternalScannerInterface#isMultipleMatchScanner()
+   */
+  public boolean isMultipleMatchScanner() {
+    return this.multipleMatchers;
+  }
   /**
    * Get the next set of values for this scanner.
    * 
@@ -179,7 +210,7 @@
    * 
    * @see org.apache.hadoop.hbase.HScannerInterface#next(org.apache.hadoop.hbase.HStoreKey, java.util.TreeMap)
    */
-  public boolean next(HStoreKey key, TreeMap<Text, byte[]> results)
+  public boolean next(HStoreKey key, TreeMap<Text, BytesWritable> results)
       throws IOException {
  
     // Find the next row label (and timestamp)
@@ -187,7 +218,7 @@
     Text chosenRow = null;
     long chosenTimestamp = -1;
     for(int i = 0; i < keys.length; i++) {
-      while((keys[i] != null)
+      if((keys[i] != null)
           && (columnMatch(i))
           && (keys[i].getTimestamp() <= this.timestamp)
           && ((chosenRow == null)
@@ -210,23 +241,31 @@
 
       for(int i = 0; i < keys.length; i++) {
         // Fetch the data
-        
+
         while((keys[i] != null)
-            && (keys[i].getRow().compareTo(chosenRow) == 0)
-            && (keys[i].getTimestamp() == chosenTimestamp)) {
+            && (keys[i].getRow().compareTo(chosenRow) == 0)) {
+
+          // If we are doing a wild card match or there are multiple matchers
+          // per column, we need to scan all the older versions of this row
+          // to pick up the rest of the family members
+          
+          if(!wildcardMatch
+              && !multipleMatchers
+              && (keys[i].getTimestamp() != chosenTimestamp)) {
+            break;
+          }
 
           if(columnMatch(i)) {
-            outbuf.reset();
-            vals[i].write(outbuf);
-            byte byteresults[] = outbuf.getData();
-            inbuf.reset(byteresults, outbuf.getLength());
-            BytesWritable tmpval = new BytesWritable();
-            tmpval.readFields(inbuf);
-            results.put(new Text(keys[i].getColumn()), tmpval.get());
-            insertedItem = true;
+              
+            // We only want the first result for any specific family member
+            
+            if(!results.containsKey(keys[i].getColumn())) {
+              results.put(new Text(keys[i].getColumn()), vals[i]);
+              insertedItem = true;
+            }
           }
 
-          if (! getNext(i)) {
+          if(!getNext(i)) {
             closeSubScanner(i);
           }
         }

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HBaseConfiguration.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HBaseConfiguration.java?view=diff&rev=535970&r1=535969&r2=535970
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HBaseConfiguration.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HBaseConfiguration.java Mon May  7 12:58:53 2007
@@ -21,5 +21,6 @@
   public HBaseConfiguration() {
     super();
     addDefaultResource("hbase-default.xml");
+    addDefaultResource("hbase-site.xml");
   }
 }

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java?view=diff&rev=535970&r1=535969&r2=535970
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HClient.java Mon May  7 12:58:53 2007
@@ -19,28 +19,30 @@
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Iterator;
+import java.util.NoSuchElementException;
 import java.util.Random;
 import java.util.TreeMap;
 import java.util.TreeSet;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.DataInputBuffer;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.ipc.RPC;
-import org.apache.log4j.Logger;
 
 /*******************************************************************************
  * HClient manages a connection to a single HRegionServer.
  ******************************************************************************/
 public class HClient implements HConstants {
-  private final Logger LOG =
-    Logger.getLogger(this.getClass().getName());
+  private final Log LOG = LogFactory.getLog(this.getClass().getName());
   
-  private static final Text[] metaColumns = {
-    META_COLUMN_FAMILY
+  private static final Text[] META_COLUMNS = {
+    COLUMN_FAMILY
   };
-  private static final Text startRow = new Text();
+  
+  private static final Text EMPTY_START_ROW = new Text();
   
   private boolean closed;
   private long clientTimeout;
@@ -83,7 +85,7 @@
     this.closed = false;
     this.conf = conf;
 
-    this.clientTimeout = conf.getLong("hbase.client.timeout.length", 10 * 1000);
+    this.clientTimeout = conf.getLong("hbase.client.timeout.length", 30 * 1000);
     this.numTimeouts = conf.getInt("hbase.client.timeout.number", 5);
     this.numRetries =  conf.getInt("hbase.client.retries.number", 2);
     
@@ -98,40 +100,61 @@
     this.currentServer = null;
     this.rand = new Random();
   }
-
-  public synchronized void createTable(HTableDescriptor desc) throws IOException {
-    if(closed) {
+  
+  /**
+   * Check client is open.
+   */
+  private synchronized void checkOpen() {
+    if (this.closed) {
       throw new IllegalStateException("client is not open");
     }
-    if(master == null) {
-      locateRootRegion();
+  }
+  
+  private synchronized void checkMaster() throws IOException {
+    if (this.master != null) {
+      return;
     }
-    master.createTable(desc);
+    HServerAddress masterLocation =
+      new HServerAddress(this.conf.get(MASTER_ADDRESS));
+    this.master = (HMasterInterface)RPC.getProxy(HMasterInterface.class, 
+      HMasterInterface.versionID, masterLocation.getInetSocketAddress(), this.conf);
+  }
+
+  public synchronized void createTable(HTableDescriptor desc)
+  throws IOException {
+    checkOpen();
+    checkMaster();
+    locateRootRegion();
+    this.master.createTable(desc);
   }
 
   public synchronized void deleteTable(Text tableName) throws IOException {
-    if(closed) {
-      throw new IllegalStateException("client is not open");
-    }
-    if(master == null) {
-      locateRootRegion();
-    }
-    master.deleteTable(tableName);
+    checkOpen();
+    checkMaster();
+    locateRootRegion();
+    this.master.deleteTable(tableName);
+  }
+  
+  public synchronized void shutdown() throws IOException {
+    checkOpen();
+    checkMaster();
+    this.master.shutdown();
   }
   
   public synchronized void openTable(Text tableName) throws IOException {
-    if(closed) {
-      throw new IllegalStateException("client is not open");
+    if(tableName == null || tableName.getLength() == 0) {
+      throw new IllegalArgumentException("table name cannot be null or zero length");
     }
-
-    tableServers = tablesToServers.get(tableName);
-    if(tableServers == null ) {                 // We don't know where the table is
+    checkOpen();
+    this.tableServers = tablesToServers.get(tableName);
+    if(this.tableServers == null ) {            // We don't know where the table is
       findTableInMeta(tableName);               // Load the information from meta
     }
   }
 
   private void findTableInMeta(Text tableName) throws IOException {
-    TreeMap<Text, TableInfo> metaServers = tablesToServers.get(META_TABLE_NAME);
+    TreeMap<Text, TableInfo> metaServers =
+      this.tablesToServers.get(META_TABLE_NAME);
     
     if(metaServers == null) {                   // Don't know where the meta is
       loadMetaFromRoot(tableName);
@@ -139,18 +162,51 @@
         // All we really wanted was the meta or root table
         return;
       }
-      metaServers = tablesToServers.get(META_TABLE_NAME);
+      metaServers = this.tablesToServers.get(META_TABLE_NAME);
     }
 
-    tableServers = new TreeMap<Text, TableInfo>();
-    for(Iterator<TableInfo> i = metaServers.tailMap(tableName).values().iterator();
-        i.hasNext(); ) {
+    this.tableServers = new TreeMap<Text, TableInfo>();
+    for(int tries = 0;
+        this.tableServers.size() == 0 && tries < this.numRetries;
+        tries++) {
+      
+      Text firstMetaRegion = null;
+      if(metaServers.containsKey(tableName)) {
+        firstMetaRegion = tableName;
+        
+      } else {
+        firstMetaRegion = metaServers.headMap(tableName).lastKey();
+      }
+      for(Iterator<TableInfo> i
+          = metaServers.tailMap(firstMetaRegion).values().iterator();
+          i.hasNext(); ) {
       
-      TableInfo t = i.next();
+        TableInfo t = i.next();
       
-      scanOneMetaRegion(t, tableName);
+        scanOneMetaRegion(t, tableName);
+      }
+      if(this.tableServers.size() == 0) {
+        // Table not assigned. Sleep and try again
+
+        if(LOG.isDebugEnabled()) {
+          LOG.debug("Sleeping. Table " + tableName
+              + " not currently being served.");
+        }
+        try {
+          Thread.sleep(this.clientTimeout);
+          
+        } catch(InterruptedException e) {
+        }
+        if(LOG.isDebugEnabled()) {
+          LOG.debug("Wake. Retry finding table " + tableName);
+        }
+      }
+    }
+    if(this.tableServers.size() == 0) {
+      throw new IOException("failed to scan " + META_TABLE_NAME + " after "
+          + this.numRetries + " retries");
     }
-    tablesToServers.put(tableName, tableServers);
+    this.tablesToServers.put(tableName, this.tableServers);
   }
 
   /*
@@ -169,24 +225,23 @@
    * could be.
    */
   private void locateRootRegion() throws IOException {
-    if(master == null) {
-      HServerAddress masterLocation =
-        new HServerAddress(this.conf.get(MASTER_ADDRESS));
-      master = (HMasterInterface)RPC.getProxy(HMasterInterface.class, 
-          HMasterInterface.versionID,
-          masterLocation.getInetSocketAddress(), conf);
-    }
+    checkMaster();
     
-    int tries = 0;
     HServerAddress rootRegionLocation = null;
-    do {
+    for(int tries = 0; rootRegionLocation == null && tries < numRetries; tries++){
       int localTimeouts = 0;
       while(rootRegionLocation == null && localTimeouts < numTimeouts) {
         rootRegionLocation = master.findRootRegion();
 
         if(rootRegionLocation == null) {
           try {
-            Thread.sleep(clientTimeout);
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Sleeping. Waiting for root region.");
+            }
+            Thread.sleep(this.clientTimeout);
+            if (LOG.isDebugEnabled()) {
+              LOG.debug("Wake. Retry finding root region.");
+            }
           } catch(InterruptedException iex) {
           }
           localTimeouts++;
@@ -201,17 +256,18 @@
       HRegionInterface rootRegion = getHRegionConnection(rootRegionLocation);
 
       if(rootRegion.getRegionInfo(HGlobals.rootRegionInfo.regionName) != null) {
-        tableServers = new TreeMap<Text, TableInfo>();
-        tableServers.put(startRow, new TableInfo(HGlobals.rootRegionInfo, rootRegionLocation));
-        tablesToServers.put(ROOT_TABLE_NAME, tableServers);
+        this.tableServers = new TreeMap<Text, TableInfo>();
+        this.tableServers.put(EMPTY_START_ROW,
+            new TableInfo(HGlobals.rootRegionInfo, rootRegionLocation));
+        
+        this.tablesToServers.put(ROOT_TABLE_NAME, this.tableServers);
         break;
       }
       rootRegionLocation = null;
-      
-    } while(rootRegionLocation == null && tries++ < numRetries);
+    }
     
-    if(rootRegionLocation == null) {
-      closed = true;
+    if (rootRegionLocation == null) {
+      this.closed = true;
       throw new IOException("unable to locate root region server");
     }
   }
@@ -220,38 +276,78 @@
    * Scans the root region to find all the meta regions
    */
   private void scanRoot() throws IOException {
-    tableServers = new TreeMap<Text, TableInfo>();
-    TableInfo t = tablesToServers.get(ROOT_TABLE_NAME).get(startRow);
-    scanOneMetaRegion(t, META_TABLE_NAME);
-    tablesToServers.put(META_TABLE_NAME, tableServers);
+    this.tableServers = new TreeMap<Text, TableInfo>();
+    TableInfo t = this.tablesToServers.get(ROOT_TABLE_NAME).get(EMPTY_START_ROW);
+    for(int tries = 0;
+        scanOneMetaRegion(t, META_TABLE_NAME) == 0 && tries < this.numRetries;
+        tries++) {
+      
+      // The table is not yet being served. Sleep and retry.
+      
+      if(LOG.isDebugEnabled()) {
+        LOG.debug("Sleeping. Table " + META_TABLE_NAME
+            + " not currently being served.");
+      }
+      try {
+        Thread.sleep(this.clientTimeout);
+        
+      } catch(InterruptedException e) {
+      }
+      if(LOG.isDebugEnabled()) {
+        LOG.debug("Wake. Retry finding table " + META_TABLE_NAME);
+      }
+    }
+    if(this.tableServers.size() == 0) {
+      throw new IOException("failed to scan " + ROOT_TABLE_NAME + " after "
+          + this.numRetries + " retries");
+    }
+    this.tablesToServers.put(META_TABLE_NAME, this.tableServers);
   }
 
   /*
    * Scans a single meta region
    * @param t the table we're going to scan
    * @param tableName the name of the table we're looking for
+   * @return returns the number of servers that are serving the table
    */
-  private void scanOneMetaRegion(TableInfo t, Text tableName) throws IOException {
+  private int scanOneMetaRegion(TableInfo t, Text tableName)
+      throws IOException {
+    
     HRegionInterface server = getHRegionConnection(t.serverAddress);
+    int servers = 0;
     long scannerId = -1L;
     try {
-      scannerId = server.openScanner(t.regionInfo.regionName, metaColumns, tableName);
-
+      scannerId =
+        server.openScanner(t.regionInfo.regionName, META_COLUMNS, tableName);
+      
       DataInputBuffer inbuf = new DataInputBuffer();
       while(true) {
+        HRegionInfo regionInfo = null;
+        String serverAddress = null;
         HStoreKey key = new HStoreKey();
-
         LabelledData[] values = server.next(scannerId, key);
         if(values.length == 0) {
+          if(servers == 0) {
+            // If we didn't find any servers then the table does not exist
+            
+            throw new NoSuchElementException("table '" + tableName
+                + "' does not exist");
+          }
+          
+          // We found at least one server for the table and now we're done.
+          
           break;
         }
 
+        byte[] bytes = null;
         TreeMap<Text, byte[]> results = new TreeMap<Text, byte[]>();
         for(int i = 0; i < values.length; i++) {
-          results.put(values[i].getLabel(), values[i].getData().get());
+          bytes = new byte[values[i].getData().getSize()];
+          System.arraycopy(values[i].getData().get(), 0, bytes, 0, bytes.length);
+          results.put(values[i].getLabel(), bytes);
         }
-        HRegionInfo regionInfo = new HRegionInfo();
-        byte[] bytes = results.get(META_COL_REGIONINFO);
+        regionInfo = new HRegionInfo();
+        bytes = results.get(COL_REGIONINFO);
         inbuf.reset(bytes, bytes.length);
         regionInfo.readFields(inbuf);
 
@@ -259,15 +355,26 @@
           // We're done
           break;
         }
-        
-        bytes = results.get(META_COL_SERVER);
-        String serverName = new String(bytes, UTF8_ENCODING);
+
+        bytes = results.get(COL_SERVER);
+        if(bytes == null || bytes.length == 0) {
+          // We need to rescan because the table we want is unassigned.
           
-        tableServers.put(regionInfo.startKey, 
-            new TableInfo(regionInfo, new HServerAddress(serverName)));
+          if(LOG.isDebugEnabled()) {
+            LOG.debug("no server address for " + regionInfo.toString());
+          }
+          servers = 0;
+          this.tableServers.clear();
+          break;
+        }
+        servers += 1;
+        serverAddress = new String(bytes, UTF8_ENCODING);
 
+        this.tableServers.put(regionInfo.startKey, 
+            new TableInfo(regionInfo, new HServerAddress(serverAddress)));
       }
-
+      return servers;
+      
     } finally {
       if(scannerId != -1L) {
         server.close(scannerId);
@@ -280,23 +387,24 @@
 
       // See if we already have a connection
 
-    HRegionInterface server = servers.get(regionServer.toString());
+    HRegionInterface server = this.servers.get(regionServer.toString());
     
     if(server == null) {                                // Get a connection
       
       server = (HRegionInterface)RPC.waitForProxy(HRegionInterface.class, 
-          HRegionInterface.versionID, regionServer.getInetSocketAddress(), conf);
+          HRegionInterface.versionID, regionServer.getInetSocketAddress(),
+          this.conf);
       
-      servers.put(regionServer.toString(), server);
+      this.servers.put(regionServer.toString(), server);
     }
     return server;
   }
 
-  /** Close the connection to the HRegionServer */
+  /** Close the connection */
   public synchronized void close() throws IOException {
-    if(! closed) {
+    if(! this.closed) {
       RPC.stopClient();
-      closed = true;
+      this.closed = true;
     }
   }
 
@@ -307,65 +415,75 @@
    * catalog table that just contains table names and their descriptors.
    * Right now, it only exists as part of the META table's region info.
    */
-  public synchronized HTableDescriptor[] listTables() throws IOException {
+  public synchronized HTableDescriptor[] listTables()
+  throws IOException {
     TreeSet<HTableDescriptor> uniqueTables = new TreeSet<HTableDescriptor>();
     
-    TreeMap<Text, TableInfo> metaTables = tablesToServers.get(META_TABLE_NAME);
+    TreeMap<Text, TableInfo> metaTables =
+      this.tablesToServers.get(META_TABLE_NAME);
+    
     if(metaTables == null) {
       // Meta is not loaded yet so go do that
       loadMetaFromRoot(META_TABLE_NAME);
       metaTables = tablesToServers.get(META_TABLE_NAME);
     }
 
-    for(Iterator<TableInfo>it = metaTables.values().iterator(); it.hasNext(); ) {
-      TableInfo t = it.next();
+    for (TableInfo t: metaTables.values()) {
       HRegionInterface server = getHRegionConnection(t.serverAddress);
       long scannerId = -1L;
       try {
-        scannerId = server.openScanner(t.regionInfo.regionName, metaColumns, startRow);
-        HStoreKey key = new HStoreKey();
+        scannerId = server.openScanner(t.regionInfo.regionName,
+            META_COLUMNS, EMPTY_START_ROW);
         
+        HStoreKey key = new HStoreKey();
         DataInputBuffer inbuf = new DataInputBuffer();
         while(true) {
           LabelledData[] values = server.next(scannerId, key);
           if(values.length == 0) {
             break;
           }
-
           for(int i = 0; i < values.length; i++) {
-            if(values[i].getLabel().equals(META_COL_REGIONINFO)) {
+            if(values[i].getLabel().equals(COL_REGIONINFO)) {
               byte[] bytes = values[i].getData().get();
               inbuf.reset(bytes, bytes.length);
               HRegionInfo info = new HRegionInfo();
               info.readFields(inbuf);
 
-              // Only examine the rows where the startKey is zero length
-              
+              // Only examine the rows where the startKey is zero length   
               if(info.startKey.getLength() == 0) {
                 uniqueTables.add(info.tableDesc);
               }
             }
           }
         }
-        
       } finally {
         if(scannerId != -1L) {
           server.close(scannerId);
         }
       }
     }
-    return (HTableDescriptor[]) uniqueTables.toArray(new HTableDescriptor[uniqueTables.size()]);
+    return (HTableDescriptor[])uniqueTables.
+      toArray(new HTableDescriptor[uniqueTables.size()]);
   }
 
   private synchronized TableInfo getTableInfo(Text row) {
-    if(tableServers == null) {
+    if(row == null || row.getLength() == 0) {
+      throw new IllegalArgumentException("row key cannot be null or zero length");
+    }
+    if(this.tableServers == null) {
       throw new IllegalStateException("Must open table first");
     }
     
     // Only one server will have the row we are looking for
     
-    Text serverKey = tableServers.tailMap(row).firstKey();
-    return tableServers.get(serverKey);
+    Text serverKey = null;
+    if(this.tableServers.containsKey(row)) {
+      serverKey = row;
+      
+    } else {
+      serverKey = this.tableServers.headMap(row).lastKey();
+    }
+    return this.tableServers.get(serverKey);
   }
   
   /** Get a single value for the specified row and column */
@@ -416,7 +534,7 @@
    * Return the specified columns.
    */
   public synchronized HScannerInterface obtainScanner(Text[] columns, Text startRow) throws IOException {
-    if(tableServers == null) {
+    if(this.tableServers == null) {
       throw new IllegalStateException("Must open table first");
     }
     return new ClientScanner(columns, startRow);
@@ -427,14 +545,14 @@
     TableInfo info = getTableInfo(row);
     long lockid;
     try {
-      currentServer = getHRegionConnection(info.serverAddress);
-      currentRegion = info.regionInfo.regionName;
-      clientid = rand.nextLong();
-      lockid = currentServer.startUpdate(currentRegion, clientid, row);
+      this.currentServer = getHRegionConnection(info.serverAddress);
+      this.currentRegion = info.regionInfo.regionName;
+      this.clientid = rand.nextLong();
+      lockid = currentServer.startUpdate(this.currentRegion, this.clientid, row);
       
     } catch(IOException e) {
-      currentServer = null;
-      currentRegion = null;
+      this.currentServer = null;
+      this.currentRegion = null;
       throw e;
     }
     return lockid;
@@ -443,16 +561,17 @@
   /** Change a value for the specified column */
   public void put(long lockid, Text column, byte val[]) throws IOException {
     try {
-      currentServer.put(currentRegion, clientid, lockid, column, new BytesWritable(val));
+      this.currentServer.put(this.currentRegion, this.clientid, lockid, column,
+          new BytesWritable(val));
       
     } catch(IOException e) {
       try {
-        currentServer.abort(currentRegion, clientid, lockid);
+        this.currentServer.abort(this.currentRegion, this.clientid, lockid);
         
       } catch(IOException e2) {
       }
-      currentServer = null;
-      currentRegion = null;
+      this.currentServer = null;
+      this.currentRegion = null;
       throw e;
     }
   }
@@ -460,16 +579,16 @@
   /** Delete the value for a column */
   public void delete(long lockid, Text column) throws IOException {
     try {
-      currentServer.delete(currentRegion, clientid, lockid, column);
+      this.currentServer.delete(this.currentRegion, this.clientid, lockid, column);
       
     } catch(IOException e) {
       try {
-        currentServer.abort(currentRegion, clientid, lockid);
+        this.currentServer.abort(this.currentRegion, this.clientid, lockid);
         
       } catch(IOException e2) {
       }
-      currentServer = null;
-      currentRegion = null;
+      this.currentServer = null;
+      this.currentRegion = null;
       throw e;
     }
   }
@@ -477,11 +596,10 @@
   /** Abort a row mutation */
   public void abort(long lockid) throws IOException {
     try {
-      currentServer.abort(currentRegion, clientid, lockid);
-      
+      this.currentServer.abort(this.currentRegion, this.clientid, lockid);
     } catch(IOException e) {
-      currentServer = null;
-      currentRegion = null;
+      this.currentServer = null;
+      this.currentRegion = null;
       throw e;
     }
   }
@@ -489,11 +607,11 @@
   /** Finalize a row mutation */
   public void commit(long lockid) throws IOException {
     try {
-      currentServer.commit(currentRegion, clientid, lockid);
+      this.currentServer.commit(this.currentRegion, this.clientid, lockid);
       
     } finally {
-      currentServer = null;
-      currentRegion = null;
+      this.currentServer = null;
+      this.currentRegion = null;
     }
   }
   
@@ -515,7 +633,19 @@
       this.columns = columns;
       this.startRow = startRow;
       this.closed = false;
-      Collection<TableInfo> info = tableServers.tailMap(startRow).values();
+      
+      Text firstServer = null;
+      if(this.startRow == null || this.startRow.getLength() == 0) {
+        firstServer = tableServers.firstKey();
+        
+      } else if(tableServers.containsKey(startRow)) {
+        firstServer = startRow;
+        
+      } else {
+        firstServer = tableServers.headMap(startRow).lastKey();
+      }
+      Collection<TableInfo> info = tableServers.tailMap(firstServer).values();
+      
       this.regions = info.toArray(new TableInfo[info.size()]);
       this.currentRegion = -1;
       this.server = null;
@@ -528,19 +658,20 @@
      * Returns false if there are no more scanners.
      */
     private boolean nextScanner() throws IOException {
-      if(scannerId != -1L) {
-        server.close(scannerId);
-        scannerId = -1L;
+      if(this.scannerId != -1L) {
+        this.server.close(this.scannerId);
+        this.scannerId = -1L;
       }
-      currentRegion += 1;
-      if(currentRegion == regions.length) {
+      this.currentRegion += 1;
+      if(this.currentRegion == this.regions.length) {
         close();
         return false;
       }
       try {
-        server = getHRegionConnection(regions[currentRegion].serverAddress);
-        scannerId = server.openScanner(regions[currentRegion].regionInfo.regionName,
-            columns, startRow);
+        this.server = getHRegionConnection(this.regions[currentRegion].serverAddress);
+        this.scannerId = this.server.openScanner(
+            this.regions[currentRegion].regionInfo.regionName, this.columns,
+            this.startRow);
         
       } catch(IOException e) {
         close();
@@ -553,16 +684,18 @@
      * @see org.apache.hadoop.hbase.HScannerInterface#next(org.apache.hadoop.hbase.HStoreKey, java.util.TreeMap)
      */
     public boolean next(HStoreKey key, TreeMap<Text, byte[]> results) throws IOException {
-      if(closed) {
+      if(this.closed) {
         return false;
       }
       LabelledData[] values = null;
       do {
-        values = server.next(scannerId, key);
+        values = this.server.next(this.scannerId, key);
       } while(values.length == 0 && nextScanner());
 
       for(int i = 0; i < values.length; i++) {
-        results.put(values[i].getLabel(), values[i].getData().get());
+        byte[] bytes = new byte[values[i].getData().getSize()];
+        System.arraycopy(values[i].getData().get(), 0, bytes, 0, bytes.length);
+        results.put(values[i].getLabel(), bytes);
       }
       return values.length != 0;
     }
@@ -571,38 +704,112 @@
      * @see org.apache.hadoop.hbase.HScannerInterface#close()
      */
     public void close() throws IOException {
-      if(scannerId != -1L) {
-        server.close(scannerId);
+      if(this.scannerId != -1L) {
+        this.server.close(this.scannerId);
       }
-      server = null;
-      closed = true;
+      this.server = null;
+      this.closed = true;
     }
   }
   
   private void printUsage() {
+    printUsage(null);
+  }
+  
+  private void printUsage(final String message) {
+    if (message != null && message.length() > 0) {
+      System.err.println(message);
+    }
     System.err.println("Usage: java " + this.getClass().getName() +
-        " [--master=hostname:port]");
+        " [--master=host:port] <command> <args>");
+    System.err.println("Options:");
+    System.err.println(" master       Specify host and port of HBase " +
+        "cluster master. If not present,");
+    System.err.println("              address is read from configuration.");
+    System.err.println("Commands:");
+    System.err.println(" shutdown     Shutdown the HBase cluster.");
+    System.err.println(" createTable  Takes table name, column families, " +
+      "and maximum versions.");
+    System.err.println(" deleteTable  Takes a table name.");
+    System.err.println(" iistTables   List all tables.");
+    System.err.println("Example Usage:");
+    System.err.println(" % java " + this.getClass().getName() + " shutdown");
+    System.err.println(" % java " + this.getClass().getName() +
+        " createTable webcrawl contents: anchors: 10");
   }
   
-  private int doCommandLine(final String args[]) {
+  int doCommandLine(final String args[]) {
     // Process command-line args. TODO: Better cmd-line processing
-    // (but hopefully something not as painful as cli options).
-    for (String cmd: args) {
-      if (cmd.equals("-h") || cmd.startsWith("--h")) {
-        printUsage();
-        return 0;
-      }
-      
-      final String masterArgKey = "--master=";
-      if (cmd.startsWith(masterArgKey)) {
-        this.conf.set(MASTER_ADDRESS,
-            cmd.substring(masterArgKey.length()));
-      }
-    }
-    
+    // (but hopefully something not as painful as cli options).    
     int errCode = -1;
+    if (args.length < 1) {
+      printUsage();
+      return errCode;
+    }
     try {
-      locateRootRegion();
+      for (int i = 0; i < args.length; i++) {
+        String cmd = args[i];
+        if (cmd.equals("-h") || cmd.startsWith("--h")) {
+          printUsage();
+          errCode = 0;
+          break;
+        }
+        
+        final String masterArgKey = "--master=";
+        if (cmd.startsWith(masterArgKey)) {
+          this.conf.set(MASTER_ADDRESS, cmd.substring(masterArgKey.length()));
+          continue;
+        }
+       
+        if (cmd.equals("shutdown")) {
+          shutdown();
+          errCode = 0;
+          break;
+        }
+        
+        if (cmd.equals("listTables")) {
+          HTableDescriptor [] tables = listTables();
+          for (int ii = 0; ii < tables.length; ii++) {
+            System.out.println(tables[ii].getName());
+          }
+          errCode = 0;
+          break;
+        }
+        
+        if (cmd.equals("createTable")) {
+          if (i + 3 > args.length) {
+            throw new IllegalArgumentException("Must supply a table name " +
+              ", at least one column family and maximum number of versions");
+          }
+          int maxVersions = (Integer.parseInt(args[args.length - 1]));
+          HTableDescriptor desc =
+            new HTableDescriptor(args[i + 1], maxVersions);
+          boolean addedFamily = false;
+          for (int ii = i + 2; ii < (args.length - 1); ii++) {
+            desc.addFamily(new Text(args[ii]));
+            addedFamily = true;
+          }
+          if (!addedFamily) {
+            throw new IllegalArgumentException("Must supply at least one " +
+              "column family");
+          }
+          createTable(desc);
+          errCode = 0;
+          break;
+        }
+        
+        if (cmd.equals("deleteTable")) {
+          if (i + 1 > args.length) {
+            throw new IllegalArgumentException("Must supply a table name");
+          }
+          deleteTable(new Text(args[i + 1]));
+          errCode = 0;
+          break;
+        }
+        
+        printUsage();
+        break;
+      }
     } catch (Exception e) {
       e.printStackTrace();
     }

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConstants.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConstants.java?view=diff&rev=535970&r1=535969&r2=535970
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConstants.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HConstants.java Mon May  7 12:58:53 2007
@@ -15,30 +15,40 @@
  */
 package org.apache.hadoop.hbase;
 
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.Text;
 
-/*******************************************************************************
+/**
  * HConstants holds a bunch of HBase-related constants
- ******************************************************************************/
+ */
 public interface HConstants {
   
   // Configuration parameters
   
-  // TODO: URL for hbase master, like hdfs URLs with host and port.
-  // Or, like jdbc URLs:
+  // TODO: URL for hbase master like hdfs URLs with host and port.
+  // Like jdbc URLs?  URLs could be used to refer to table cells?
   // jdbc:mysql://[host][,failoverhost...][:port]/[database]
   // jdbc:mysql://[host][,failoverhost...][:port]/[database][?propertyName1][=propertyValue1][&propertyName2][=propertyValue2]...
   
-  static final String MASTER_ADDRESS = "hbase.master";
+  // Key into HBaseConfiguration for the hbase.master address.
   // TODO: Support 'local': i.e. default of all running in single
-  // process.  Same for regionserver.
+  // process.  Same for regionserver. TODO: Is having HBase homed
+  // on port 60k OK?
+  static final String MASTER_ADDRESS = "hbase.master";
   static final String DEFAULT_MASTER_ADDRESS = "localhost:60000";
+
+  // Key for hbase.regionserver address.
   static final String REGIONSERVER_ADDRESS = "hbase.regionserver";
-  static final String DEFAULT_REGIONSERVER_ADDRESS =
-    "localhost:60010";
+  static final String DEFAULT_REGIONSERVER_ADDRESS = "localhost:60010";
+
+  static final String THREAD_WAKE_FREQUENCY = "hbase.server.thread.wakefrequency";
   static final String HREGION_DIR = "hbase.regiondir";
   static final String DEFAULT_HREGION_DIR = "/hbase";
   static final String HREGIONDIR_PREFIX = "hregion_";
+  
+  // TODO: Someone may try to name a column family 'log'.  If they
+  // do, it will clash with the HREGION log dir subdirectory. FIX.
+  static final String HREGION_LOGDIR_NAME = "log";
 
   // Always store the location of the root table's HRegion.
   // This HRegion is never split.
@@ -46,22 +56,29 @@
   // region name = table + startkey + regionid. This is the row key.
   // each row in the root and meta tables describes exactly 1 region
   // Do we ever need to know all the information that we are storing?
-  
+
+  // The root tables' name.
   static final Text ROOT_TABLE_NAME = new Text("--ROOT--");
-  static final Text ROOT_COLUMN_FAMILY = new Text("info:");
-  static final Text ROOT_COL_REGIONINFO = new Text(ROOT_COLUMN_FAMILY + "regioninfo");
-  static final Text ROOT_COL_SERVER = new Text(ROOT_COLUMN_FAMILY + "server");
-  static final Text ROOT_COL_STARTCODE = new Text(ROOT_COLUMN_FAMILY + "serverstartcode");
 
+  // The META tables' name.
   static final Text META_TABLE_NAME = new Text("--META--");
-  static final Text META_COLUMN_FAMILY = new Text(ROOT_COLUMN_FAMILY);
-  static final Text META_COL_REGIONINFO = new Text(ROOT_COL_REGIONINFO);
-  static final Text META_COL_SERVER = new Text(ROOT_COL_SERVER);
-  static final Text META_COL_STARTCODE = new Text(ROOT_COL_STARTCODE);
+
+  // Defines for the column names used in both ROOT and META HBase 'meta'
+  // tables.
+  static final Text COLUMN_FAMILY = new Text("info:");
+  static final Text COL_REGIONINFO = new Text(COLUMN_FAMILY + "regioninfo");
+  static final Text COL_SERVER = new Text(COLUMN_FAMILY + "server");
+  static final Text COL_STARTCODE = new Text(COLUMN_FAMILY + "serverstartcode");
 
   // Other constants
   
   static final long DESIRED_MAX_FILE_SIZE = 128 * 1024 * 1024;        // 128MB
   static final String UTF8_ENCODING = "UTF-8";
   
+  static final BytesWritable DELETE_BYTES = 
+    new BytesWritable("HBASE::DELETEVAL".getBytes());
+  
+  static final BytesWritable COMPLETE_CACHEFLUSH =
+    new BytesWritable("HBASE::CACHEFLUSH".getBytes());
+
 }

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HGlobals.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HGlobals.java?view=diff&rev=535970&r1=535969&r2=535970
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HGlobals.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HGlobals.java Mon May  7 12:58:53 2007
@@ -15,9 +15,9 @@
  */
 package org.apache.hadoop.hbase;
 
-/*******************************************************************************
+/**
  * Global values used for finding and scanning the root and meta tables.
- ******************************************************************************/
+ */
 public class HGlobals implements HConstants {
   
   static HTableDescriptor rootTableDesc = null;
@@ -26,13 +26,11 @@
 
   static {
     rootTableDesc = new HTableDescriptor(ROOT_TABLE_NAME.toString(), 1);
-    rootTableDesc.addFamily(ROOT_COLUMN_FAMILY);
+    rootTableDesc.addFamily(COLUMN_FAMILY);
     
     rootRegionInfo = new HRegionInfo(0L, rootTableDesc, null, null);
     
     metaTableDesc = new HTableDescriptor(META_TABLE_NAME.toString(), 1);
-    metaTableDesc.addFamily(META_COLUMN_FAMILY);
+    metaTableDesc.addFamily(COLUMN_FAMILY);
   }
-  
-
 }

Added: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HInternalScannerInterface.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HInternalScannerInterface.java?view=auto&rev=535970
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HInternalScannerInterface.java (added)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HInternalScannerInterface.java Mon May  7 12:58:53 2007
@@ -0,0 +1,44 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase;
+
+import java.io.IOException;
+import java.util.TreeMap;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+
+/**
+ * Internally, we need to be able to determine if the scanner is doing wildcard
+ * column matches (when only a column family is specified or if a column regex
+ * is specified) or if multiple members of the same column family were specified.
+ * 
+ * If so, we need to ignore the timestamp to ensure that we get all the family
+ * members, as they may have been last updated at different times.
+ * 
+ * This interface exposes two APIs for querying the scanner.
+ */
+public interface HInternalScannerInterface {
+  
+  public boolean next(HStoreKey key, TreeMap<Text, BytesWritable> results) throws IOException;
+  public void close() throws IOException;
+  /** Returns true if the scanner is matching a column family or regex */
+  public boolean isWildcardScanner();
+  
+  /** Returns true if the scanner is matching multiple column family members */
+  public boolean isMultipleMatchScanner();
+  
+}

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java?view=diff&rev=535970&r1=535969&r2=535970
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLog.java Mon May  7 12:58:53 2007
@@ -18,40 +18,49 @@
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.io.*;
+import org.apache.hadoop.io.SequenceFile.Reader;
 import org.apache.hadoop.fs.*;
 import org.apache.hadoop.conf.*;
 
 import java.io.*;
 import java.util.*;
 
-/*******************************************************************************
+/**
  * HLog stores all the edits to the HStore.
  * 
  * It performs logfile-rolling, so external callers are not aware that the 
  * underlying file is being rolled.
  *
- * A single HLog is used by several HRegions simultaneously.
+ * <p>A single HLog is used by several HRegions simultaneously.
  * 
- * Each one is identified by a unique long int.  HRegions do not need to declare
- * themselves before using the HLog; they simply include their HRegion-id in the
- * append() or completeCacheFlush() calls.
+ * <p>Each HRegion is identified by a unique long int. HRegions do not need to
+ * declare themselves before using the HLog; they simply include their
+ * HRegion-id in the {@link #append(Text, Text, Text, TreeMap, long)} or 
+ * {@link #completeCacheFlush(Text, Text, long)} calls.
  *
- * An HLog consists of multiple on-disk files, which have a chronological order.
+ * <p>An HLog consists of multiple on-disk files, which have a chronological
+ * order. As data is flushed to other (better) on-disk structures, the log
+ * becomes obsolete.  We can destroy all the log messages for a given
+ * HRegion-id up to the most-recent CACHEFLUSH message from that HRegion.
  *
- * As data is flushed to other (better) on-disk structures, the log becomes 
- * obsolete.  We can destroy all the log messages for a given HRegion-id up to 
- * the most-recent CACHEFLUSH message from that HRegion.
- *
- * It's only practical to delete entire files.  Thus, we delete an entire 
+ * <p>It's only practical to delete entire files.  Thus, we delete an entire 
  * on-disk file F when all of the messages in F have a log-sequence-id that's 
  * older (smaller) than the most-recent CACHEFLUSH message for every HRegion 
  * that has a message in F.
- ******************************************************************************/
-public class HLog {
+ * 
+ * <p>TODO: Vuk Ercegovac also pointed out that keeping HBase HRegion edit logs
+ * in HDFS is currently flawed. HBase writes edits to logs and to a memcache.
+ * The 'atomic' write to the log is meant to serve as insurance against
+ * abnormal RegionServer exit: on startup, the log is rerun to reconstruct an
+ * HRegion's last wholesome state. But files in HDFS do not 'exist' until they
+ * are cleanly closed -- something that will not happen if RegionServer exits
+ * without running its 'close'.
+ */
+public class HLog implements HConstants {
   private static final Log LOG = LogFactory.getLog(HLog.class);
   
   static final String HLOG_DATFILE = "hlog.dat.";
-  static final Text METACOLUMN = new Text("METACOLUMN");
+  static final Text METACOLUMN = new Text("METACOLUMN:");
   static final Text METAROW = new Text("METAROW");
 
   FileSystem fs;
@@ -66,28 +75,40 @@
   long oldestOutstandingSeqNum = -1;
 
   boolean closed = false;
-  long logSeqNum = 0;
+  transient long logSeqNum = 0;
   long filenum = 0;
-  int numEntries = 0;
+  transient int numEntries = 0;
 
   Integer rollLock = new Integer(0);
 
   /**
    * Bundle up a bunch of log files (which are no longer being written to),
    * into a new file.  Delete the old log files when ready.
+   * @param srcDir Directory of log files to bundle:
+   * e.g. <code>${REGIONDIR}/log_HOST_PORT</code>
+   * @param dstFile Destination file:
+   * e.g. <code>${REGIONDIR}/oldlogfile_HOST_PORT</code>
+   * @param fs FileSystem
+   * @param conf HBaseConfiguration
+   * @throws IOException
    */
-  public static void consolidateOldLog(Path srcDir, Path dstFile, FileSystem fs, Configuration conf) throws IOException {
-    LOG.debug("consolidating log files");
+  public static void consolidateOldLog(Path srcDir, Path dstFile,
+      FileSystem fs, Configuration conf)
+  throws IOException {
+    if(LOG.isDebugEnabled()) {
+      LOG.debug("consolidating log files");
+    }
     
     Path logfiles[] = fs.listPaths(srcDir);
-    SequenceFile.Writer newlog = SequenceFile.createWriter(fs, conf, dstFile, HLogKey.class, HLogEdit.class);
+    SequenceFile.Writer newlog = SequenceFile.createWriter(fs, conf, dstFile,
+        HLogKey.class, HLogEdit.class);
     try {
       for(int i = 0; i < logfiles.length; i++) {
-        SequenceFile.Reader in = new SequenceFile.Reader(fs, logfiles[i], conf);
+        SequenceFile.Reader in =
+          new SequenceFile.Reader(fs, logfiles[i], conf);
         try {
           HLogKey key = new HLogKey();
           HLogEdit val = new HLogEdit();
-          
           while(in.next(key, val)) {
             newlog.append(key, val);
           }
@@ -111,11 +132,13 @@
         }
       }
     }
-    LOG.debug("log file consolidation completed");
+    if(LOG.isDebugEnabled()) {
+      LOG.debug("log file consolidation completed");
+    }
   }
 
   /**
-   * Create an edit log at the given location.
+   * Create an edit log at the given <code>dir</code> location.
    *
    * You should never have to load an existing log.  If there is a log
    * at startup, it should have already been processed and deleted by 
@@ -125,19 +148,16 @@
     this.fs = fs;
     this.dir = dir;
     this.conf = conf;
-    this.logSeqNum = 0;
 
-    if(fs.exists(dir)) {
+    if (fs.exists(dir)) {
       throw new IOException("Target HLog directory already exists: " + dir);
     }
     fs.mkdirs(dir);
-
     rollWriter();
   }
 
   /**
-   * Roll the log writer.  That is, start writing log messages to
-   * a new file.
+   * Roll the log writer.  That is, start writing log messages to a new file.
    *
    * The 'rollLock' prevents us from entering rollWriter() more than
    * once at a time.
@@ -170,7 +190,9 @@
           }
         }
         
-        LOG.debug("closing current log writer and getting a new one");
+        if(LOG.isDebugEnabled()) {
+          LOG.debug("closing current log writer and getting a new one");
+        }
 
         // Close the current writer (if any), and grab a new one.
         
@@ -178,14 +200,16 @@
           writer.close();
           
           if(filenum > 0) {
-            outputfiles.put(logSeqNum-1, computeFilename(filenum-1));
+            outputfiles.put(logSeqNum - 1, computeFilename(filenum - 1));
           }
         }
         
         Path newPath = computeFilename(filenum++);
         this.writer = SequenceFile.createWriter(fs, conf, newPath, HLogKey.class, HLogEdit.class);
 
-        LOG.debug("new log writer created");
+        if(LOG.isDebugEnabled()) {
+          LOG.debug("new log writer created");
+        }
         
         // Can we delete any of the old log files?
         // First, compute the oldest relevant log operation 
@@ -203,7 +227,9 @@
         // Next, remove all files with a final ID that's older
         // than the oldest pending region-operation.
 
-        LOG.debug("removing old log files");
+        if(LOG.isDebugEnabled()) {
+          LOG.debug("removing old log files");
+        }
         
         for(Iterator<Long> it = outputfiles.keySet().iterator(); it.hasNext(); ) {
           long maxSeqNum = it.next().longValue();
@@ -226,7 +252,9 @@
         fs.delete(p);
       }
 
-      LOG.debug("old log files deleted");
+      if(LOG.isDebugEnabled()) {
+        LOG.debug("old log files deleted");
+      }
       
       this.numEntries = 0;
     }
@@ -247,21 +275,29 @@
   }
 
   /**
-   * Append a set of edits to the log.
-   * Log edits are keyed by regionName, rowname, and log-sequence-id.
+   * Append a set of edits to the log. Log edits are keyed by regionName,
+   * rowname, and log-sequence-id.
    *
    * Later, if we sort by these keys, we obtain all the relevant edits for
-   * a given key-range of the HRegion.  Any edits that do not have a matching
-   * COMPLETE_CACHEFLUSH message can be discarded.
+   * a given key-range of the HRegion (TODO).  Any edits that do not have a
+   * matching {@link HConstants#COMPLETE_CACHEFLUSH} message can be discarded.
    *
-   * Logs cannot be restarted once closed, or once the HLog process dies.
+   * <p>Logs cannot be restarted once closed, or once the HLog process dies.
    * Each time the HLog starts, it must create a new log.  This means that
    * other systems should process the log appropriately upon each startup
    * (and prior to initializing HLog).
    *
    * We need to seize a lock on the writer so that writes are atomic.
+   * @param regionName
+   * @param tableName
+   * @param row
+   * @param columns
+   * @param timestamp
+   * @throws IOException
    */
-  public synchronized void append(Text regionName, Text tableName, Text row, TreeMap<Text, byte[]> columns, long timestamp) throws IOException {
+  public synchronized void append(Text regionName, Text tableName, Text row,
+      TreeMap<Text, BytesWritable> columns, long timestamp)
+  throws IOException {
     if(closed) {
       throw new IOException("Cannot append; log is closed");
     }
@@ -272,19 +308,16 @@
     // most recent flush for every regionName.  However, for regions
     // that don't have any flush yet, the relevant operation is the
     // first one that's been added.
-    
-    if(regionToLastFlush.get(regionName) == null) {
+    if (regionToLastFlush.get(regionName) == null) {
       regionToLastFlush.put(regionName, seqNum[0]);
     }
 
     int counter = 0;
-    for(Iterator<Text> it = columns.keySet().iterator(); it.hasNext(); ) {
-      Text column = it.next();
-      byte[] val = columns.get(column);
-      HLogKey logKey = new HLogKey(regionName, tableName, row, seqNum[counter++]);
-      HLogEdit logEdit = new HLogEdit(column, val, timestamp);
+    for (Text column: columns.keySet()) {
+      HLogKey logKey =
+        new HLogKey(regionName, tableName, row, seqNum[counter++]);
+      HLogEdit logEdit = new HLogEdit(column, columns.get(column), timestamp);
       writer.append(logKey, logEdit);
-
       numEntries++;
     }
   }
@@ -317,40 +350,76 @@
    * Set a flag so that we do not roll the log between the start
    * and complete of a cache-flush.  Otherwise the log-seq-id for
    * the flush will not appear in the correct logfile.
+   * @return sequence ID to pass {@link #completeCacheFlush(Text, Text, long)}
+   * @see {@link #completeCacheFlush(Text, Text, long)}
    */
   public synchronized long startCacheFlush() {
-    while(insideCacheFlush) {
+    while (insideCacheFlush) {
       try {
         wait();
       } catch (InterruptedException ie) {
       }
     }
-    
     insideCacheFlush = true;
     notifyAll();
     return obtainSeqNum();
   }
 
   /** Complete the cache flush */
-  public synchronized void completeCacheFlush(Text regionName, Text tableName, long logSeqId) throws IOException {
+  public synchronized void completeCacheFlush(final Text regionName,
+    final Text tableName, final long logSeqId)
+  throws IOException {
     if(closed) {
       return;
     }
     
     if(! insideCacheFlush) {
-      throw new IOException("Impossible situation: inside completeCacheFlush(), but 'insideCacheFlush' flag is false");
+      throw new IOException("Impossible situation: inside " +
+        "completeCacheFlush(), but 'insideCacheFlush' flag is false");
     }
     
     writer.append(new HLogKey(regionName, tableName, HLog.METAROW, logSeqId),
-        new HLogEdit(HLog.METACOLUMN, HStoreKey.COMPLETE_CACHEFLUSH, System.currentTimeMillis()));
+      new HLogEdit(HLog.METACOLUMN, COMPLETE_CACHEFLUSH,
+        System.currentTimeMillis()));
     numEntries++;
 
     // Remember the most-recent flush for each region.
     // This is used to delete obsolete log files.
-    
     regionToLastFlush.put(regionName, logSeqId);
 
     insideCacheFlush = false;
     notifyAll();
+  }
+
+  /**
+   * Pass a log file and it will dump out a text version on
+   * <code>stdout</code>.
+   * @param args
+   * @throws IOException
+   */
+  public static void main(String[] args) throws IOException {
+    if (args.length < 1) {
+      System.err.println("Usage: java org.apache.hbase.HLog <logfile>");
+      System.exit(-1);
+    }
+    Configuration conf = new HBaseConfiguration();
+    FileSystem fs = FileSystem.get(conf);
+    Path logfile = new Path(args[0]);
+    if (!fs.exists(logfile)) {
+      throw new FileNotFoundException(args[0] + " does not exist");
+    }
+    if (!fs.isFile(logfile)) {
+      throw new IOException(args[0] + " is not a file");
+    }
+    Reader log = new SequenceFile.Reader(fs, logfile, conf);
+    try {
+      HLogKey key = new HLogKey();
+      HLogEdit val = new HLogEdit();
+      while(log.next(key, val)) {
+        System.out.println(key.toString() + " " + val.toString());
+      }
+    } finally {
+      log.close();
+    }
   }
 }

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogEdit.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogEdit.java?view=diff&rev=535970&r1=535969&r2=535970
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogEdit.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogEdit.java Mon May  7 12:58:53 2007
@@ -34,9 +34,9 @@
   public HLogEdit() {
   }
 
-  public HLogEdit(Text column, byte[] bval, long timestamp) {
+  public HLogEdit(Text column, BytesWritable bval, long timestamp) {
     this.column.set(column);
-    this.val = new BytesWritable(bval);
+    this.val = bval;
     this.timestamp = timestamp;
   }
 
@@ -52,6 +52,12 @@
     return this.timestamp;
   }
 
+  @Override
+  public String toString() {
+    return getColumn().toString() + " " + this.getTimestamp() + " " +
+      new String(getVal().get()).trim();
+  }
+  
   //////////////////////////////////////////////////////////////////////////////
   // Writable
   //////////////////////////////////////////////////////////////////////////////

Modified: lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogKey.java
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogKey.java?view=diff&rev=535970&r1=535969&r2=535970
==============================================================================
--- lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogKey.java (original)
+++ lucene/hadoop/trunk/src/contrib/hbase/src/java/org/apache/hadoop/hbase/HLogKey.java Mon May  7 12:58:53 2007
@@ -66,6 +66,12 @@
   public long getLogSeqNum() {
     return logSeqNum;
   }
+  
+  @Override
+  public String toString() {
+    return getTablename().toString() + " " + getRegionName().toString() + " " +
+      getRow().toString() + " " + getLogSeqNum();
+  }
 
   //////////////////////////////////////////////////////////////////////////////
   // Comparable