You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2008/11/25 22:05:01 UTC

svn commit: r720595 - in /hadoop/hbase/branches/0.19_on_hadoop_0.18: ./ conf/ src/java/ src/java/org/apache/hadoop/hbase/client/tableindexed/ src/java/org/apache/hadoop/hbase/regionserver/ src/java/org/apache/hadoop/hbase/regionserver/metrics/

Author: apurtell
Date: Tue Nov 25 13:05:00 2008
New Revision: 720595

URL: http://svn.apache.org/viewvc?rev=720595&view=rev
Log:
merge up to trunk (revision 720575)

Modified:
    hadoop/hbase/branches/0.19_on_hadoop_0.18/CHANGES.txt
    hadoop/hbase/branches/0.19_on_hadoop_0.18/conf/hadoop-metrics.properties
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexedTable.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/MemcacheFlusher.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java
    hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/overview.html

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/CHANGES.txt?rev=720595&r1=720594&r2=720595&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/CHANGES.txt (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/CHANGES.txt Tue Nov 25 13:05:00 2008
@@ -77,6 +77,12 @@
    HBASE-1003  If cell exceeds TTL but not VERSIONs, will not be removed during
                major compaction
    HBASE-1005  Regex and string comparison operators for ColumnValueFilter
+   HBASE-910   Scanner misses columns / rows when the scanner is obtained
+               durring a memcache flush
+   HBASE-1009  Master stuck in loop wanting to assign but regions are closing
+   HBASE-1016  Fix example in javadoc overvie
+   HBASE-1021  hbase metrics FileContext not working
+   HBASE-1023  Check global flusher
       
   IMPROVEMENTS
    HBASE-901   Add a limit to key length, check key and value length on client side
@@ -129,7 +135,12 @@
                the deleted cell
    HBASE-675   Report correct server hosting a table split for assignment to
                for MR Jobs
-        
+   HBASE-927   We don't recover if HRS hosting -ROOT-/.META. goes down
+   HBASE-1013  Add debugging around commit log cleanup
+   HBASE-972   Update hbase trunk to use released hadoop 0.19.0
+   HBASE-1022  Add storefile index size to hbase metrics
+   HBASE-1026  Tests in mapred are failing
+ 
   NEW FEATURES
    HBASE-875   Use MurmurHash instead of JenkinsHash [in bloomfilters]
                (Andrzej Bialecki via Stack)

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/conf/hadoop-metrics.properties
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/conf/hadoop-metrics.properties?rev=720595&r1=720594&r2=720595&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/conf/hadoop-metrics.properties (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/conf/hadoop-metrics.properties Tue Nov 25 13:05:00 2008
@@ -8,7 +8,7 @@
 hbase.class=org.apache.hadoop.metrics.spi.NullContext
 
 # Configuration of the "hbase" context for file
-# hbase.class=org.apache.hadoop.metrics.file.FileContext
+# hbase.class=org.apache.hadoop.hbase.metrics.file.TimeStampingFileContext
 # hbase.period=10
 # hbase.fileName=/tmp/metrics_hbase.log
 
@@ -21,7 +21,7 @@
 jvm.class=org.apache.hadoop.metrics.spi.NullContext
 
 # Configuration of the "jvm" context for file
-# jvm.class=org.apache.hadoop.metrics.file.FileContext
+# jvm.class=org.apache.hadoop.hbase.metrics.file.TimeStampingFileContext
 # jvm.period=10
 # jvm.fileName=/tmp/metrics_jvm.log
 
@@ -31,10 +31,10 @@
 # jvm.servers=GMETADHOST_IP:8649
 
 # Configuration of the "rpc" context for null
-hbase.class=org.apache.hadoop.metrics.spi.NullContext
+rpc.class=org.apache.hadoop.metrics.spi.NullContext
 
 # Configuration of the "rpc" context for file
-# rpc.class=org.apache.hadoop.metrics.file.FileContext
+# rpc.class=org.apache.hadoop.hbase.metrics.file.TimeStampingFileContext
 # rpc.period=10
 # rpc.fileName=/tmp/metrics_rpc.log
 

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java?rev=720595&r1=720594&r2=720595&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexSpecification.java Tue Nov 25 13:05:00 2008
@@ -59,7 +59,6 @@
    * @param indexedColumns
    * @param additionalColumns
    * @param keyGenerator
-   * @param keyComparator
    */
   public IndexSpecification(String indexId, byte[][] indexedColumns,
       byte[][] additionalColumns, IndexKeyGenerator keyGenerator) {

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexedTable.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexedTable.java?rev=720595&r1=720594&r2=720595&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexedTable.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/client/tableindexed/IndexedTable.java Tue Nov 25 13:05:00 2008
@@ -52,7 +52,6 @@
 
   private Map<String, HTable> indexIdToTable = new HashMap<String, HTable>();
 
-  /** {@inheritDoc} */
   public IndexedTable(final HBaseConfiguration conf, final byte[] tableName)
       throws IOException {
     super(conf, tableName);

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=720595&r1=720594&r2=720595&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Tue Nov 25 13:05:00 2008
@@ -297,6 +297,7 @@
       // Now ask master what it wants us to do and tell it what we have done
       for (int tries = 0; !stopRequested.get() && isHealthy();) {
         // Try to get the root region location from the master.
+        if (!haveRootRegion.get()) {
           HServerAddress rootServer = hbaseMaster.getRootRegionLocation();
           if (rootServer != null) {
             // By setting the root region location, we bypass the wait imposed on
@@ -305,6 +306,7 @@
                 new HRegionLocation(HRegionInfo.ROOT_REGIONINFO, rootServer));
             haveRootRegion.set(true);
           }
+        }
           long now = System.currentTimeMillis();
         if (lastMsg != 0 && (now - lastMsg) >= serverLeaseTimeout) {
           // It has been way too long since we last reported to the master.
@@ -650,21 +652,33 @@
     // Is this too expensive every three seconds getting a lock on onlineRegions
     // and then per store carried?  Can I make metrics be sloppier and avoid
     // the synchronizations?
+    int stores = 0;
     int storefiles = 0;
     long memcacheSize = 0;
+    long storefileIndexSize = 0;
     synchronized (this.onlineRegions) {
       for (Map.Entry<Integer, HRegion> e: this.onlineRegions.entrySet()) {
         HRegion r = e.getValue();
         memcacheSize += r.memcacheSize.get();
         synchronized(r.stores) {
+          stores += r.stores.size();
           for(Map.Entry<Integer, HStore> ee: r.stores.entrySet()) {
-            storefiles += ee.getValue().getStorefilesCount();
+            HStore store = ee.getValue(); 
+            storefiles += store.getStorefilesCount();
+            try {
+              storefileIndexSize += store.getStorefilesIndexSize();
+            } catch (IOException ex) {
+              LOG.warn("error getting store file index size for " + store +
+                ": " + StringUtils.stringifyException(ex));  
+            }
           }
         }
       }
     }
+    this.metrics.stores.set(stores);
     this.metrics.storefiles.set(storefiles);
     this.metrics.memcacheSizeMB.set((int)(memcacheSize/(1024*1024)));
+    this.metrics.storefileIndexSizeMB.set((int)(storefileIndexSize/(1024*1024)));
   }
 
   /**

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStore.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStore.java?rev=720595&r1=720594&r2=720595&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStore.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStore.java Tue Nov 25 13:05:00 2008
@@ -2137,7 +2137,19 @@
   int getStorefilesCount() {
     return this.storefiles.size();
   }
-  
+
+  /**
+   * @return The size of the store file indexes, in bytes.
+   * @throws IOException if there was a problem getting file sizes from the
+   * filesystem
+   */
+  long getStorefilesIndexSize() throws IOException {
+    long size = 0;
+    for (HStoreFile s: storefiles.values())
+      size += s.indexLength();
+    return size;
+  }
+
   /*
    * Datastructure that holds size and key.
    */

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java?rev=720595&r1=720594&r2=720595&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/HStoreFile.java Tue Nov 25 13:05:00 2008
@@ -52,7 +52,7 @@
  * <p>An HStoreFile usually tracks 4 things: its parent dir, the region
  * identifier, the column family, and the file identifier.  If you know those
  * four things, you know how to obtain the right HStoreFile.  HStoreFiles may
- * also refernce store files in another region serving either from
+ * also reference store files in another region serving either from
  * the top-half of the remote file or from the bottom-half.  Such references
  * are made fast splitting regions.
  * 
@@ -101,6 +101,7 @@
   /* If true, this file was product of a major compaction.
    */
   private boolean majorCompaction = false;
+  private long indexLength;
 
   /**
    * Constructor that fully initializes the object
@@ -381,7 +382,7 @@
       out.close();
     }
   }
-  
+
   /**
    * Delete store map files.
    * @throws IOException 
@@ -477,6 +478,18 @@
     return (isReference())? l / 2: l;
   }
 
+  /**
+   * @return Length of the store map file index.
+   * @throws IOException
+   */
+  public synchronized long indexLength() throws IOException {
+    if (indexLength == 0) {
+      Path p = new Path(getMapFilePath(reference), MapFile.INDEX_FILE_NAME);
+      indexLength = p.getFileSystem(conf).getFileStatus(p).getLen();
+    }
+    return indexLength;
+  }
+
   @Override
   public String toString() {
     return encodedRegionName + "/" + Bytes.toString(colFamily) + "/" + fileId +

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/MemcacheFlusher.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/MemcacheFlusher.java?rev=720595&r1=720594&r2=720595&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/MemcacheFlusher.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/MemcacheFlusher.java Tue Nov 25 13:05:00 2008
@@ -220,17 +220,23 @@
    */
   private synchronized void flushSomeRegions() {
     // keep flushing until we hit the low water mark
+    long globalMemcacheSize = -1;
     for (SortedMap<Long, HRegion> m =
         this.server.getCopyOfOnlineRegionsSortedBySize();
-      server.getGlobalMemcacheSize() >= globalMemcacheLimitLowMark;) {
+      (globalMemcacheSize = server.getGlobalMemcacheSize()) >=
+        this.globalMemcacheLimitLowMark;) {
       // flush the region with the biggest memcache
       if (m.size() <= 0) {
         LOG.info("No online regions to flush though we've been asked flush " +
-            "some; globalMemcacheSize=" + this.server.getGlobalMemcacheSize() +
+            "some; globalMemcacheSize=" + globalMemcacheSize +
             ", globalMemcacheLimitLowMark=" + this.globalMemcacheLimitLowMark);
         break;
       }
       HRegion biggestMemcacheRegion = m.remove(m.firstKey());
+      LOG.info("Forced flushing of " +  biggestMemcacheRegion.toString() +
+        " because global memcache limit of " + this.globalMemcacheLimit +
+        " exceeded; currenly " + globalMemcacheSize + " and flushing till " +
+        this.globalMemcacheLimitLowMark);
       if (!flushRegion(biggestMemcacheRegion, true)) {
         // Something bad happened - give up.
         break;

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java?rev=720595&r1=720594&r2=720595&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/org/apache/hadoop/hbase/regionserver/metrics/RegionServerMetrics.java Tue Nov 25 13:05:00 2008
@@ -48,12 +48,23 @@
    * Count of requests to the regionservers since last call to metrics update
    */
   private final MetricsIntValue requests = new MetricsIntValue("requests");
-  
+
+  /**
+   * Count of stores open on the regionserver.
+   */
+  public final MetricsIntValue stores = new MetricsIntValue("stores");
+
   /**
    * Count of storefiles open on the regionserver.
    */
   public final MetricsIntValue storefiles = new MetricsIntValue("storefiles");
-  
+
+  /**
+   * Sum of all the storefile index sizes in this regionserver in MB
+   */
+  public final MetricsIntValue storefileIndexSizeMB =
+    new MetricsIntValue("storefileIndexSizeMB");
+
   /**
    * Sum of all the memcache sizes in this regionserver in MB
    */
@@ -80,7 +91,9 @@
    */
   public void doUpdates(@SuppressWarnings("unused") MetricsContext unused) {
     synchronized (this) {
+      this.stores.pushMetric(this.metricsRecord);
       this.storefiles.pushMetric(this.metricsRecord);
+      this.storefileIndexSizeMB.pushMetric(this.metricsRecord);
       this.memcacheSizeMB.pushMetric(this.metricsRecord);
       this.regions.pushMetric(this.metricsRecord);
       synchronized(this.requests) {
@@ -124,8 +137,13 @@
     sb.append(this.requests.get()/seconds);
     sb.append(", regions=");
     sb.append(this.regions.get());
+    sb.append(", stores=");
+    sb.append(this.stores.get());
     sb.append(", storefiles=");
     sb.append(this.storefiles.get());
+    sb.append(", storefileIndexSize=");
+    sb.append(this.storefileIndexSizeMB.get());
+    sb.append("MB");
     sb.append(", memcacheSize=");
     sb.append(this.memcacheSizeMB.get());
     sb.append("MB");

Modified: hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/overview.html
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/overview.html?rev=720595&r1=720594&r2=720595&view=diff
==============================================================================
--- hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/overview.html (original)
+++ hadoop/hbase/branches/0.19_on_hadoop_0.18/src/java/overview.html Tue Nov 25 13:05:00 2008
@@ -195,6 +195,7 @@
 import org.apache.hadoop.hbase.io.BatchUpdate;
 import org.apache.hadoop.hbase.io.Cell;
 import org.apache.hadoop.hbase.io.RowResult;
+import org.apache.hadoop.hbase.util.Bytes;
 
 public class MyClient {
 
@@ -209,17 +210,20 @@
 
     // To do any sort of update on a row, you use an instance of the BatchUpdate
     // class. A BatchUpdate takes a row and optionally a timestamp which your
-    // updates will affect. 
+    // updates will affect.  If no timestamp, the server applies current time
+    // to the edits.
     BatchUpdate batchUpdate = new BatchUpdate("myRow");
 
-    // The BatchUpdate#put method takes a Text that describes what cell you want
-    // to put a value into, and a byte array that is the value you want to 
-    // store. Note that if you want to store strings, you have to getBytes() 
-    // from the string for HBase to understand how to store it. (The same goes
-    // for primitives like ints and longs and user-defined classes - you must 
-    // find a way to reduce it to bytes.)
+    // The BatchUpdate#put method takes a byte [] (or String) that designates
+    // what cell you want to put a value into, and a byte array that is the
+    // value you want to store. Note that if you want to store Strings, you
+    // have to getBytes() from the String for HBase to store it since HBase is
+    // all about byte arrays. The same goes for primitives like ints and longs
+    // and user-defined classes - you must find a way to reduce it to bytes.
+    // The Bytes class from the hbase util package has utility for going from
+    // String to utf-8 bytes and back again and help for other base types.
     batchUpdate.put("myColumnFamily:columnQualifier1", 
-      "columnQualifier1 value!".getBytes());
+      Bytes.toBytes("columnQualifier1 value!"));
 
     // Deletes are batch operations in HBase as well. 
     batchUpdate.delete("myColumnFamily:cellIWantDeleted");
@@ -235,7 +239,9 @@
     // value contained is a string and want an actual string, then you must 
     // convert it yourself.
     Cell cell = table.get("myRow", "myColumnFamily:columnQualifier1");
-    String valueStr = new String(cell.getValue());
+    // This could throw a NullPointerException if there was no value at the cell
+    // location.
+    String valueStr = Bytes.toString(cell.getValue());
     
     // Sometimes, you won't know the row you're looking for. In this case, you
     // use a Scanner. This will give you cursor-like interface to the contents
@@ -245,30 +251,31 @@
       table.getScanner(new String[]{"myColumnFamily:columnQualifier1"});
     
     
-    // Scanners in HBase 0.2 return RowResult instances. A RowResult is like the
-    // row key and the columns all wrapped up in a single interface. 
+    // Scanners return RowResult instances. A RowResult is like the
+    // row key and the columns all wrapped up in a single Object. 
     // RowResult#getRow gives you the row key. RowResult also implements 
     // Map, so you can get to your column results easily. 
     
     // Now, for the actual iteration. One way is to use a while loop like so:
     RowResult rowResult = scanner.next();
     
-    while(rowResult != null) {
+    while (rowResult != null) {
       // print out the row we found and the columns we were looking for
-      System.out.println("Found row: " + new String(rowResult.getRow()) + " with value: " +
-       rowResult.get("myColumnFamily:columnQualifier1".getBytes()));
-      
+      System.out.println("Found row: " + Bytes.toString(rowResult.getRow()) +
+        " with value: " + rowResult.get(Bytes.toBytes("myColumnFamily:columnQualifier1")));
       rowResult = scanner.next();
     }
     
     // The other approach is to use a foreach loop. Scanners are iterable!
     for (RowResult result : scanner) {
       // print out the row we found and the columns we were looking for
-      System.out.println("Found row: " + new String(result.getRow()) + " with value: " +
-       result.get("myColumnFamily:columnQualifier1".getBytes()));
+      System.out.println("Found row: " + Bytes.toString(rowResult.getRow()) +
+        " with value: " + rowResult.get(Bytes.toBytes("myColumnFamily:columnQualifier1")));
     }
     
     // Make sure you close your scanners when you are done!
+    // Its probably best to put the iteration into a try/finally with the below
+    // inside the finally clause.
     scanner.close();
   }
 }