You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2008/12/13 13:59:51 UTC

svn commit: r726195 - in /hadoop/hbase/branches/0.19_on_hadoop_0.18: ./ conf/ src/java/org/apache/hadoop/hbase/ src/java/org/apache/hadoop/hbase/client/ src/java/org/apache/hadoop/hbase/client/tableindexed/ src/java/org/apache/hadoop/hbase/io/ src/java...

Author: apurtell
Date: Sat Dec 13 04:59:50 2008
New Revision: 726195

URL: http://svn.apache.org/viewvc?rev=726195&view=rev
Log:
merge up to trunk (rev 726153)

Added:
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/HeapSize.java
Modified:
    hadoop/hbase/branches/0.19_on_hadoop_0.18/CHANGES.txt
    hadoop/hbase/branches/0.19_on_hadoop_0.18/conf/hbase-default.xml
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/HStoreKey.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/HTableDescriptor.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/HTable.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexedTable.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/BatchOperation.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/BatchUpdate.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/util/Bytes.java

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/CHANGES.txt?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/CHANGES.txt Sat Dec 13 04:59:50 2008
@@ -102,7 +102,11 @@
    HBASE-1000  Sleeper.sleep does not go back to sleep when interrupted
                and no stop flag given.
    HBASE-900   Regionserver memory leak causing OOME during relatively
-               modest bulk importing; part 1
+               modest bulk importing; part 1 and part 2
+   HBASE-1054  Index NPE on scanning (Clint Morgan via Andrew Purtell)
+   HBASE-1052  Stopping a HRegionServer with unflushed cache causes data loss
+               from org.apache.hadoop.hbase.DroppedSnapshotException
+   HBASE-1059  ConcurrentModificationException in notifyChangedReadersObservers
 
   IMPROVEMENTS
    HBASE-901   Add a limit to key length, check key and value length on client side

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/conf/hbase-default.xml
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/conf/hbase-default.xml?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/conf/hbase-default.xml (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/conf/hbase-default.xml Sat Dec 13 04:59:50 2008
@@ -53,9 +53,12 @@
   </property>
   <property>
     <name>hbase.client.write.buffer</name>
-    <value>10485760</value>
+    <value>2097152</value>
     <description>Size of the write buffer in bytes. A bigger buffer takes more
-    memory but reduces the number of RPC.
+    memory -- on both the client and server side since server instantiates
+    the passed write buffer to process it -- but reduces the number of RPC.  
+    For an estimate of server-side memory-used, evaluate
+    hbase.client.write.buffer * hbase.regionserver.handler.count
     </description>
   </property>
   <property>

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/HStoreKey.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/HStoreKey.java?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/HStoreKey.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/HStoreKey.java Sat Dec 13 04:59:50 2008
@@ -24,6 +24,7 @@
 import java.io.DataOutput;
 import java.io.IOException;
 
+import org.apache.hadoop.hbase.io.HeapSize;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.WritableComparable;
 import org.apache.hadoop.io.WritableComparator;
@@ -31,7 +32,7 @@
 /**
  * A Key for a stored row.
  */
-public class HStoreKey implements WritableComparable<HStoreKey> {
+public class HStoreKey implements WritableComparable<HStoreKey>, HeapSize {
   /**
    * Colon character in UTF-8
    */
@@ -46,6 +47,14 @@
    * It is not serialized.  See https://issues.apache.org/jira/browse/HBASE-832
    */
   private HRegionInfo regionInfo = null;
+  
+  /**
+   * Estimated size tax paid for each instance of HSK.  Estimate based on
+   * study of jhat and jprofiler numbers.
+   */
+  // In jprofiler, says shallow size is 48 bytes.  Add to it cost of two
+  // byte arrays and then something for the HRI hosting.
+  public static final int ESTIMATED_HEAP_TAX = 48;
 
   /** Default constructor used in conjunction with Writable interface */
   public HStoreKey() {
@@ -200,12 +209,7 @@
     this.timestamp = timestamp;
     this.regionInfo = regionInfo;
   }
-  
-  /** @return Approximate size in bytes of this key. */
-  public long getSize() {
-    return getRow().length + getColumn().length + Bytes.SIZEOF_LONG;
-  }
-  
+
   /**
    * Constructs a new HStoreKey from another
    * 
@@ -586,7 +590,13 @@
     this.column = Bytes.readByteArray(in);
     this.timestamp = in.readLong();
   }
-  
+
+  public long heapSize() {
+    return getRow().length + Bytes.ESTIMATED_HEAP_TAX +
+      getColumn().length + Bytes.ESTIMATED_HEAP_TAX +
+      ESTIMATED_HEAP_TAX;
+  }
+
   /**
    * Passed as comparator for memcache and for store files.  See HBASE-868.
    */
@@ -649,8 +659,8 @@
     }
 
     @Override
-    public long getSize() {
-      return this.beforeThisKey.getSize();
+    public long heapSize() {
+      return this.beforeThisKey.heapSize();
     }
 
     @Override

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/HTableDescriptor.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/HTableDescriptor.java?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/HTableDescriptor.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/HTableDescriptor.java Sat Dec 13 04:59:50 2008
@@ -188,6 +188,7 @@
         desc.values.entrySet()) {
       this.values.put(e.getKey(), e.getValue());
     }
+    this.indexes.putAll(desc.indexes);
   }
 
   /*
@@ -494,6 +495,11 @@
     s.append(FAMILIES);
     s.append(" => ");
     s.append(families.values());
+
+    s.append(", ");
+    s.append("INDEXES");
+    s.append(" => ");
+    s.append(indexes.values());
     s.append('}');
     return s.toString();
   }

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/HConnectionManager.java Sat Dec 13 04:59:50 2008
@@ -854,9 +854,6 @@
             throw new RetriesExhaustedException(callable.getServerName(),
                 callable.getRegionName(), callable.getRow(), tries, exceptions);
           }
-          if (LOG.isDebugEnabled()) {
-            LOG.debug("reloading table servers because: " + t.getMessage());
-          }
         }
         try {
           Thread.sleep(getPauseTime(tries));

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/HTable.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/HTable.java?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/HTable.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/HTable.java Sat Dec 13 04:59:50 2008
@@ -113,7 +113,7 @@
     this.connection.locateRegion(tableName, HConstants.EMPTY_START_ROW);
     this.writeBuffer = new ArrayList<BatchUpdate>();
     this.writeBufferSize = 
-      this.configuration.getLong("hbase.client.write.buffer", 10485760);
+      this.configuration.getLong("hbase.client.write.buffer", 2097152);
     this.autoFlush = true;
     this.currentWriteBufferSize = 0;
     this.scannerCaching = conf.getInt("hbase.client.scanner.caching", 30);
@@ -1233,8 +1233,8 @@
       batchUpdate.setRowLock(rl.getLockId());
     }
     writeBuffer.add(batchUpdate);
-    currentWriteBufferSize += batchUpdate.getSize();
-    if(autoFlush || currentWriteBufferSize > writeBufferSize) {
+    currentWriteBufferSize += batchUpdate.heapSize();
+    if (autoFlush || currentWriteBufferSize > writeBufferSize) {
       flushCommits();
     }
   }
@@ -1247,12 +1247,12 @@
    */ 
   public synchronized void commit(final List<BatchUpdate> batchUpdates)
       throws IOException {
-    for(BatchUpdate bu : batchUpdates) {
+    for (BatchUpdate bu : batchUpdates) {
       checkRowAndColumns(bu);
       writeBuffer.add(bu);
-      currentWriteBufferSize += bu.getSize();
+      currentWriteBufferSize += bu.heapSize();
     }
-    if(autoFlush || currentWriteBufferSize > writeBufferSize) {
+    if (autoFlush || currentWriteBufferSize > writeBufferSize) {
       flushCommits();
     }
   }

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java Sat Dec 13 04:59:50 2008
@@ -175,4 +175,14 @@
         .writeObject(out, keyGenerator, IndexKeyGenerator.class, conf);
   }
 
+  /** {@inheritDoc} */
+  @Override
+  public String toString() {
+    StringBuilder sb = new StringBuilder();
+    sb.append("ID => ");
+    sb.append(indexId);
+    return sb.toString();
+  }
+  
+  
 }

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexedTable.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexedTable.java?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexedTable.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexedTable.java Sat Dec 13 04:59:50 2008
@@ -164,7 +164,10 @@
         if (columns != null && columns.length > 0) {
           LOG.debug("Going to base table for remaining columns");
           RowResult baseResult = IndexedTable.this.getRow(baseRow, columns);
-          colValues.putAll(baseResult);
+          
+          if (baseResult != null) {
+            colValues.putAll(baseResult);
+          }
         }
         for (Entry<byte[], Cell> entry : row.entrySet()) {
           byte[] col = entry.getKey();

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/BatchOperation.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/BatchOperation.java?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/BatchOperation.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/BatchOperation.java Sat Dec 13 04:59:50 2008
@@ -35,11 +35,17 @@
  * a class per type because it makes the serialization easier.
  * @see BatchUpdate 
  */
-public class BatchOperation implements Writable {
+public class BatchOperation implements Writable, HeapSize {
+  /**
+   * Estimated size of this object.
+   */
+  // JHat says this is 32 bytes.
+  public final int ESTIMATED_HEAP_TAX = 36;
+  
   private byte [] column = null;
   
   // A null value defines DELETE operations.
-  private byte[] value = null;
+  private byte [] value = null;
   
   /**
    * Default constructor
@@ -132,4 +138,9 @@
       out.write(value);
     }
   }
+  
+  public long heapSize() {
+    return Bytes.ESTIMATED_HEAP_TAX * 2 + this.column.length +
+      this.value.length + ESTIMATED_HEAP_TAX;
+  }
 }
\ No newline at end of file

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/BatchUpdate.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/BatchUpdate.java?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/BatchUpdate.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/BatchUpdate.java Sat Dec 13 04:59:50 2008
@@ -22,12 +22,15 @@
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.lang.management.RuntimeMXBean;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Iterator;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.client.RowLock;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.io.WritableComparable;
 
@@ -38,8 +41,15 @@
  * can result in multiple BatchUpdate objects if the batch contains rows that
  * are served by multiple region servers.
  */
-public class BatchUpdate implements WritableComparable<BatchUpdate>, 
-  Iterable<BatchOperation> {
+public class BatchUpdate
+implements WritableComparable<BatchUpdate>, Iterable<BatchOperation>, HeapSize {
+  private static final Log LOG = LogFactory.getLog(BatchUpdate.class);
+  
+  /**
+   * Estimated 'shallow size' of this object not counting payload.
+   */
+  // Shallow size is 56.  Add 32 for the arraylist below.
+  public static final int ESTIMATED_HEAP_TAX = 56 + 32;
   
   // the row being updated
   private byte [] row = null;
@@ -142,13 +152,6 @@
   }
 
   /**
-   * @return BatchUpdate size in bytes.
-   */
-  public long getSize() {
-    return size;
-  }
-
-  /**
    * @return the timestamp this BatchUpdate will be committed with.
    */
   public long getTimestamp() {
@@ -247,8 +250,9 @@
       // If null, the PUT becomes a DELETE operation.
       throw new IllegalArgumentException("Passed value cannot be null");
     }
-    size += val.length + column.length;
-    operations.add(new BatchOperation(column, val));
+    BatchOperation bo = new BatchOperation(column, val);
+    this.size += bo.heapSize();
+    operations.add(bo);
   }
 
   /** 
@@ -336,4 +340,48 @@
   public int compareTo(BatchUpdate o) {
     return Bytes.compareTo(this.row, o.getRow());
   }
-}
+
+  public long heapSize() {
+    return this.row.length + Bytes.ESTIMATED_HEAP_TAX + this.size +
+      ESTIMATED_HEAP_TAX;
+  }
+  
+  /**
+   * Code to test sizes of BatchUpdate arrays.
+   * @param args
+   * @throws InterruptedException
+   */
+  public static void main(String[] args) throws InterruptedException {
+    RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();
+    LOG.info("vmName=" + runtime.getVmName() + ", vmVendor="
+        + runtime.getVmVendor() + ", vmVersion=" + runtime.getVmVersion());
+    LOG.info("vmInputArguments=" + runtime.getInputArguments());
+    final int count = 10000;
+    BatchUpdate[] batch1 = new BatchUpdate[count];
+    // TODO: x32 vs x64
+    long size = 0;
+    for (int i = 0; i < count; i++) {
+      BatchUpdate bu = new BatchUpdate(HConstants.EMPTY_BYTE_ARRAY);
+      bu.put(HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY);
+      batch1[i] = bu;
+      size += bu.heapSize();
+    }
+    LOG.info("batch1 estimated size=" + size);
+    // Make a variably sized memcache.
+    size = 0;
+    BatchUpdate[] batch2 = new BatchUpdate[count];
+    for (int i = 0; i < count; i++) {
+      BatchUpdate bu = new BatchUpdate(Bytes.toBytes(i));
+      bu.put(Bytes.toBytes(i), new byte[i]);
+      batch2[i] = bu;
+      size += bu.heapSize();
+    }
+    LOG.info("batch2 estimated size=" + size);
+    final int seconds = 30;
+    LOG.info("Waiting " + seconds + " seconds while heap dump is taken");
+    for (int i = 0; i < seconds; i++) {
+      Thread.sleep(1000);
+    }
+    LOG.info("Exiting.");
+  }
+}
\ No newline at end of file

Added: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/HeapSize.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/HeapSize.java?rev=726195&view=auto
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/HeapSize.java (added)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/io/HeapSize.java Sat Dec 13 04:59:50 2008
@@ -0,0 +1,33 @@
+/**
+ * Copyright 2008 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.io;
+
+/**
+ * Implementations can be asked for an estimate of their size in bytes.
+ * Useful for sizing caches.  Its a given that implementation approximations
+ * probably do not account for 32 vs 64 bit nor for different VM implemenations.
+ */
+public interface HeapSize {
+  /**
+   * @return Approximate 'exclusive deep size' of implementing object.  Includes
+   * count of payload and hosting object sizings.
+   */
+  public long heapSize();
+}
\ No newline at end of file

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Sat Dec 13 04:59:50 2008
@@ -24,6 +24,7 @@
 import java.lang.management.ManagementFactory;
 import java.lang.management.MemoryUsage;
 import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
 import java.net.InetSocketAddress;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -282,10 +283,6 @@
     for(int i = 0; i < nbBlocks; i++)  {
       reservedSpace.add(new byte[DEFAULT_SIZE_RESERVATION_BLOCK]);
     }
-    
-    // Register shutdown hook for HRegionServer, runs an orderly shutdown
-    // when a kill signal is recieved
-    Runtime.getRuntime().addShutdownHook(new ShutdownThread(this));
   }
 
   /**
@@ -522,6 +519,15 @@
       this.hbaseMaster = null;
     }
     join();
+
+    LOG.info("Running hdfs shutdown thread");
+    hdfsShutdownThread.start();
+    try {
+      hdfsShutdownThread.join();
+      LOG.info("Hdfs shutdown thread completed.");
+    } catch (InterruptedException e) {
+      LOG.warn("hdfsShutdownThread.join() was interrupted", e);
+    }
     LOG.info(Thread.currentThread().getName() + " exiting");
   }
 
@@ -552,6 +558,13 @@
       // to defaults).
       this.conf.set("fs.default.name", this.conf.get("hbase.rootdir"));
       this.fs = FileSystem.get(this.conf);
+
+      // Register shutdown hook for HRegionServer, runs an orderly shutdown
+      // when a kill signal is recieved
+      Runtime.getRuntime().addShutdownHook(new ShutdownThread(this,
+          Thread.currentThread()));
+      this.hdfsShutdownThread = suppressHdfsShutdownHook();
+
       this.rootDir = new Path(this.conf.get(HConstants.HBASE_DIR));
       this.log = setupHLog();
       this.logFlusher.setHLog(log);
@@ -693,25 +706,34 @@
    */
   private static class ShutdownThread extends Thread {
     private final HRegionServer instance;
+    private final Thread mainThread;
     
     /**
      * @param instance
+     * @param mainThread
      */
-    public ShutdownThread(HRegionServer instance) {
+    public ShutdownThread(HRegionServer instance, Thread mainThread) {
       this.instance = instance;
+      this.mainThread = mainThread;
     }
 
     @Override
     public void run() {
       LOG.info("Starting shutdown thread.");
       
-      // tell the region server to stop and wait for it to complete
+      // tell the region server to stop
       instance.stop();
-      instance.join();
+
+      // Wait for main thread to exit.
+      Threads.shutdown(mainThread);
+
       LOG.info("Shutdown thread complete");
     }    
   }
 
+  // We need to call HDFS shutdown when we are done shutting down
+  private Thread hdfsShutdownThread;
+
   /*
    * Inner class that runs on a long period checking if regions need major
    * compaction.
@@ -745,6 +767,43 @@
   }
   
   /**
+   * So, HDFS caches FileSystems so when you call FileSystem.get it's fast. In
+   * order to make sure things are cleaned up, it also creates a shutdown hook
+   * so that all filesystems can be closed when the process is terminated. This
+   * conveniently runs concurrently with our own shutdown handler, and
+   * therefore causes all the filesystems to be closed before the server can do
+   * all its necessary cleanup.
+   *
+   * The crazy dirty reflection in this method sneaks into the FileSystem cache
+   * and grabs the shutdown hook, removes it from the list of active shutdown
+   * hooks, and hangs onto it until later. Then, after we're properly done with
+   * our graceful shutdown, we can execute the hdfs hook manually to make sure
+   * loose ends are tied up.
+   *
+   * This seems quite fragile and susceptible to breaking if Hadoop changes
+   * anything about the way this cleanup is managed. Keep an eye on things.
+   */
+  private Thread suppressHdfsShutdownHook() {
+    try {
+      Field field = FileSystem.class.getDeclaredField ("clientFinalizer");
+      field.setAccessible(true);
+      Thread hdfsClientFinalizer = (Thread)field.get(null);
+      if (hdfsClientFinalizer == null) {
+        throw new RuntimeException("client finalizer is null, can't suppress!");
+      }
+      Runtime.getRuntime().removeShutdownHook(hdfsClientFinalizer);
+      return hdfsClientFinalizer;
+      
+    } catch (NoSuchFieldException nsfe) {
+      LOG.fatal("Couldn't find field 'clientFinalizer' in FileSystem!", nsfe);
+      throw new RuntimeException("Failed to suppress HDFS shutdown hook");
+    } catch (IllegalAccessException iae) {
+      LOG.fatal("Couldn't access field 'clientFinalizer' in FileSystem!", iae);
+      throw new RuntimeException("Failed to suppress HDFS shutdown hook");
+    }
+  }
+
+  /**
    * Report the status of the server. A server is online once all the startup 
    * is completed (setting up filesystem, starting service threads, etc.). This
    * method is designed mostly to be useful in tests.

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStore.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStore.java Sat Dec 13 04:59:50 2008
@@ -122,8 +122,10 @@
   private final Path compactionDir;
   private final Integer compactLock = new Integer(0);
   private final int compactionThreshold;
+  
+  // All access must be synchronized.
   private final Set<ChangedReadersObserver> changedReaderObservers =
-    Collections.synchronizedSet(new HashSet<ChangedReadersObserver>());
+    new HashSet<ChangedReadersObserver>();
 
   /**
    * An HStore is a set of zero or more MapFiles, which stretch backwards over 
@@ -632,8 +634,9 @@
     return compactionNeeded;
   }
   
-  private boolean internalFlushCache(SortedMap<HStoreKey, byte []> cache,
-      long logCacheFlushId) throws IOException {
+  private boolean internalFlushCache(final SortedMap<HStoreKey, byte []> cache,
+    final long logCacheFlushId)
+  throws IOException {
     long flushed = 0;
     // Don't flush if there are no entries.
     if (cache.size() == 0) {
@@ -672,7 +675,7 @@
             if (!isExpired(curkey, ttl, now)) {
               entries++;
               out.append(curkey, new ImmutableBytesWritable(bytes));
-              flushed += curkey.getSize() + (bytes == null ? 0 : bytes.length);
+              flushed += this.memcache.heapSize(curkey, bytes, null);
             }
           }
         }
@@ -691,7 +694,7 @@
       if(LOG.isDebugEnabled()) {
         LOG.debug("Added " + FSUtils.getPath(flushedFile.getMapFilePath()) +
           " with " + entries +
-          " entries, sequence id " + logCacheFlushId + ", data size " +
+          " entries, sequence id " + logCacheFlushId + ", data size ~" +
           StringUtils.humanReadableInt(flushed) + ", file size " +
           StringUtils.humanReadableInt(newStoreSize) + " to " +
           this.info.getRegionNameAsString());
@@ -740,15 +743,19 @@
    * @param o Observer who wants to know about changes in set of Readers
    */
   void addChangedReaderObserver(ChangedReadersObserver o) {
-    this.changedReaderObservers.add(o);
+    synchronized(this.changedReaderObservers) {
+      this.changedReaderObservers.add(o);
+    }
   }
   
   /*
    * @param o Observer no longer interested in changes in set of Readers.
    */
   void deleteChangedReaderObserver(ChangedReadersObserver o) {
-    if (!this.changedReaderObservers.remove(o)) {
-      LOG.warn("Not in set" + o);
+    synchronized (this.changedReaderObservers) {
+      if (!this.changedReaderObservers.remove(o)) {
+        LOG.warn("Not in set" + o);
+      }
     }
   }
 

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/Memcache.java Sat Dec 13 04:59:50 2008
@@ -21,6 +21,8 @@
 package org.apache.hadoop.hbase.regionserver;
 
 import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.lang.management.RuntimeMXBean;
 import java.rmi.UnexpectedException;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -52,7 +54,7 @@
  * this point we let the snapshot go.
  */
 class Memcache {
-  private final Log LOG = LogFactory.getLog(this.getClass().getName());
+  private static final Log LOG = LogFactory.getLog(Memcache.class);
   
   private final long ttl;
   
@@ -170,18 +172,48 @@
    * Write an update
    * @param key
    * @param value
-   * @return memcache size delta
+   * @return memcache Approximate size of the passed key and value.  Includes
+   * cost of hosting HSK and byte arrays as well as the Map.Entry this addition
+   * costs when we insert into the backing TreeMap.
    */
   long add(final HStoreKey key, final byte[] value) {
+    long size = -1;
     this.lock.readLock().lock();
     try {
-      byte[] oldValue = this.memcache.remove(key);
+      byte [] oldValue = this.memcache.remove(key);
       this.memcache.put(key, value);
-      return key.getSize() + (value == null ? 0 : value.length) -
-          (oldValue == null ? 0 : oldValue.length);
+      size = heapSize(key, value, oldValue);
     } finally {
       this.lock.readLock().unlock();
     }
+    return size;
+  }
+  
+  /*
+   * Calcuate how the memcache size has changed, approximately.
+   * Add in tax of TreeMap.Entry.
+   * @param key
+   * @param value
+   * @param oldValue
+   * @return
+   */
+  long heapSize(final HStoreKey key, final byte [] value,
+      final byte [] oldValue) {
+    // First add value length.
+    long keySize = key.heapSize();
+    // Add value.
+    long size = value == null? 0: value.length;
+    if (oldValue == null) {
+      size += keySize;
+      // Add overhead for value byte array and for Map.Entry -- 57 bytes
+      // on x64 according to jprofiler.
+      size += Bytes.ESTIMATED_HEAP_TAX + 57;
+    } else {
+      // If old value, don't add overhead again nor key size. Just add
+      // difference in  value sizes.
+      size -= oldValue.length;
+    }
+    return size;
   }
 
   /**
@@ -835,4 +867,47 @@
       }
     }
   }
-}
+
+  /**
+   * Code to help figure if our approximation of object heap sizes is close
+   * enough.  See hbase-900.  Fills memcaches then waits so user can heap
+   * dump and bring up resultant hprof in something like jprofiler which
+   * allows you get 'deep size' on objects.
+   * @param args
+   * @throws InterruptedException
+   */
+  public static void main(String [] args) throws InterruptedException {
+    RuntimeMXBean runtime = ManagementFactory.getRuntimeMXBean();
+    LOG.info("vmName=" + runtime.getVmName() + ", vmVendor=" +
+      runtime.getVmVendor() + ", vmVersion=" + runtime.getVmVersion());
+    LOG.info("vmInputArguments=" + runtime.getInputArguments());
+    Memcache memcache1 = new Memcache();
+    // TODO: x32 vs x64
+    long size = 0;
+    final int count = 10000;
+    for (int i = 0; i < count; i++) {
+      size += memcache1.add(new HStoreKey(Bytes.toBytes(i)),
+        HConstants.EMPTY_BYTE_ARRAY);
+    }
+    LOG.info("memcache1 estimated size=" + size);
+    for (int i = 0; i < count; i++) {
+      size += memcache1.add(new HStoreKey(Bytes.toBytes(i)),
+        HConstants.EMPTY_BYTE_ARRAY);
+    }
+    LOG.info("memcache1 estimated size (2nd loading of same data)=" + size);
+    // Make a variably sized memcache.
+    Memcache memcache2 = new Memcache();
+    for (int i = 0; i < count; i++) {
+      byte [] b = Bytes.toBytes(i);
+      size += memcache2.add(new HStoreKey(b, b),
+        new byte [i]);
+    }
+    LOG.info("memcache2 estimated size=" + size);
+    final int seconds = 30;
+    LOG.info("Waiting " + seconds + " seconds while heap dump is taken");
+    for (int i = 0; i < seconds; i++) {
+      Thread.sleep(1000);
+    }
+    LOG.info("Exiting.");
+  }
+}
\ No newline at end of file

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/util/Bytes.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/util/Bytes.java?rev=726195&r1=726194&r2=726195&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/util/Bytes.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/util/Bytes.java Sat Dec 13 04:59:50 2008
@@ -37,6 +37,13 @@
    * Size of double in bytes
    */
   public static final int SIZEOF_DOUBLE = Double.SIZE/Byte.SIZE;
+  
+  /**
+   * Estimate of size cost to pay beyond payload in jvm for instance of byte [].
+   * Estimate based on study of jhat and jprofiler numbers.
+   */
+  // JHat says BU is 56 bytes.
+  public static final int ESTIMATED_HEAP_TAX = 16;
 
   /**
    * Pass this to TreeMaps where byte [] are keys.