You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2010/01/09 22:10:05 UTC

svn commit: r897547 [3/10] - in /hadoop/hbase/branches/0.20_on_hadoop-0.18.3: ./ bin/ conf/ lib/ src/contrib/ src/contrib/ec2/ src/contrib/ec2/bin/ src/contrib/ec2/bin/image/ src/contrib/indexed/ src/contrib/indexed/lib/ src/contrib/indexed/lib/fmpp-0....

Added: hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/package.html
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/package.html?rev=897547&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/package.html (added)
+++ hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/package.html Sat Jan  9 21:09:59 2010
@@ -0,0 +1,339 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<!--
+   Copyright 2010 The Apache Software Foundation
+
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<head />
+<body bgcolor="white">
+<h2>Indexed HBase</h2>
+      <p>
+        This page gives the high levels for the indexed hbase contrib.
+        It is assumed that the reader has in-depth knowledge of HBase.
+      </p>
+<h2>Table Of Contents</h2>
+        <ol>
+          <li>
+            <a href=#IndexedHBase-IndexedHBase>Indexed HBase</a>
+            <ol>
+              <li>
+                <a href=#IndexedHBase-Purpose>Purpose</a>
+              </li>
+              <li>
+                <a href=#IndexedHBase-WhydowethinkIHbaseoutperformsTHBase%3F>Why do we think IHbase outperforms ITHBase?</a>
+              </li>
+            </ol>
+          </li>
+          <li>
+            <a href=#IndexedHBase-Usage>Usage</a>
+          </li>
+          <li>
+            <a href=#IndexedHBase-Implementationnotes>Implementation notes</a>
+          </li>
+        </ol>
+
+      <h3>
+        <a name=IndexedHBase-Purpose></a>Purpose
+      </h3>
+      <p>
+        The goal of the indexed HBase contrib is to speed up scans by indexing HBase columns.
+        Indexed HBase (IHBase) is different from the indexed tables in transactional HBase (ITHBase):
+        while the indexes in ITHBase are, in fact, hbase tables using the indexed column's values
+        as row keys, IHBase creates indexes at the region level.
+        The differences are summarized in the table below.
+      </p>
+      <table >
+        <tbody>
+        <tr>
+          <th >
+            Feature
+          </th>
+          <th >
+            ITHBase
+          </th>
+          <th >
+            IHBase
+          </th>
+          <th >
+            Comment
+          </th>
+        </tr>
+        <tr>
+          <td >
+            global ordering
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            IHBase has an index for each region. The flip side of not having global ordering
+            is compatibility with the good old HRegion: results are coming back in row
+            order (and not value order as in ITHBase)
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Full table scan?
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            THbase does a partial scan on the index table. ITHBase supports specifying start/end rows to limit the number of scanned regions
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Multiple Index Usage<br clear=all>
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            IHBase can take advantage of multiple indexes in the same scan. IHBase IdxScan object accepts an Expression which allows intersection/unison of several indexed column criteria
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Extra disk storage
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            IHBase indexes are created when the region starts/flushes and do not require any extra storage
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Extra RAM
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            IHBase indexes are in memory and hence increase the memory overhead.
+            THBbase indexes increase the number of regions each region server has to support thus costing memory too
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Parallel scanning support
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            In ITHBase the index table needs to be consulted and then GETs are issued
+            for each matching row. The behavior of IHBase (as perceived by the client)
+            is no different than a regular scan and hence supports parallel
+            scanning seamlessly. <font color=darkgray>parallel GET can be implemented
+            to speedup THbase scans</font>
+          </td>
+        </tr>
+        </tbody>
+      </table>
+      <h3>
+        <a name=IndexedHBase-WhydowethinkIHbaseoutperformsTHBase%3F></a>Why do we think IHbase outperforms THBase?
+      </h3>
+      <ol>
+        <li>
+          More flexible:
+          <ol>
+            <li>
+              Supports range queries and multi-index queries
+            </li>
+            <li>
+              Supports different types - not only byte arrays
+            </li>
+          </ol>
+        </li>
+        <li>
+          Less overhead: THBase pays at least two 'table roundtrips' - one for the index table and the other for the main table
+        </li>
+        <li>
+          Quicker index expression evaluation: IHBase is using dedicated index data structures while ITHBase is using the regular HRegion scan facilities
+        </li>
+      </ol>
+      <h2>
+        <a name=IndexedHBase-Usage></a>Usage
+      </h2>
+      <p>
+        To use Indexed HBase do the following:
+      </p>
+      <ol>
+        <li>
+          Set the hbase.region.impl property to IdxRegion
+          <div class=panelMacro>
+            <table >
+              <tbody>
+              <tr>
+                <td valign=top>
+                  <img align=absmiddle alt="" border=0 height=16 width=16>
+                </td>
+                <td>
+                  <b>IdxRegion HBase configuration snippet</b><br>
+                  <div class="code panel" style=BORDER-WIDTH:1px>
+                    <div class="codeContent panelContent">
+                    <pre class=code-java>&lt;property&gt;
+  &lt;name&gt;hbase.hregion.impl&lt;/name&gt;
+  &lt;value&gt;org.apache.hadoop.hbase.regionserver.IdxRegion&lt;/value&gt;
+&lt;/property&gt;</pre>
+                    </div>
+                  </div>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+        </li>
+        <li>
+          When creating a table define which columns to index using IdxColumnDescriptor.
+          The supported types are all the <a href="http://java.sun.com/docs/books/tutorial/java/nutsandbolts/datatypes.html"> java primitive data types</a>
+          except boolean, byte[], char[] and BigDecimal
+          <div class=panelMacro>
+            <table class="infoMacro zeroBorder">
+              <tbody>
+              <tr>
+                <td valign=top>
+                  <img align=absmiddle alt="" border=0 height=16 width=16>
+                </td>
+                <td>
+                  <b>Creating an HTable with an index on family:qual column</b><br>
+                  <p>
+                    Note that this snippet assumes that all the values assigned to family:qual are exactly 8 bytes, preferrably created using Bytes.toBytes(long). The table may have rows in which family:qual is missing, those rows will not be included in the index.
+                  </p>
+                  <div class="code panel" style=BORDER-WIDTH:1px>
+                    <div class="codeContent panelContent">
+                    <pre class=code-java><span class=code-object>byte</span>[] tableName = Bytes.toBytes(<span class=code-quote>"table"</span>);
+<span class=code-object>byte</span>[] familyName = Bytes.toBytes(<span class=code-quote>"family"</span>);
+<span class=code-object>byte</span>[] qualifier = Bytes.toBytes(<span class=code-quote>"qual"</span>);
+
+IdxColumnDescriptor idxColumnDescriptor = <span class=code-keyword>new</span> IdxColumnDescriptor(familyPairName);
+IdxIndexDescriptor indexDescriptor  = <span class=code-keyword>new</span> IdxIndexDescriptor(qualifier, IdxQualifierType.LONG);
+idxColumnDescriptor.addIndexDescriptor(indexDescriptor);
+HTableDescriptor htd = <span class=code-keyword>new</span> HTableDescriptor(tableName);
+htd.addFamily(idxColumnDescriptor);
+    
+HBaseConfiguration conf = <span class=code-keyword>new</span> HBaseConfiguration();
+conf.setClass(HConstants.REGION_IMPL, IdxRegion.class, IdxRegion.class);
+HBaseAdmin admin = <span class=code-keyword>new</span> HBaseAdmin(conf);
+admin.createTable(htd);
+HTable table = <span class=code-keyword>new</span> HTable(conf, desc.getName());
+     . . .</pre>
+                    </div>
+                  </div>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+        </li>
+        <li>
+          When scanning make sure you instantiate an IdxScan and that you set the Expression property
+          <div class=panelMacro>
+            <table class="infoMacro zeroBorder">
+              <tbody>
+              <tr>
+                <td valign=top>
+                  <img align=absmiddle alt="" border=0 height=16 width=16>
+                </td>
+                <td>
+                  <b>Indexed scans</b><br>
+                  <p>
+                    Notes:
+                  </p>
+                  <ul>
+                    <li>
+                    <font color=brown><b>Setting an expression doesn't exclude setting a mathcing filter. This duplication is absolutely essential for getting correct scan results</b> </font>
+                    </li>
+                    <li>
+                    The index expression must accept any row accepted by the filter
+                    </li>
+                    <li>
+                    The filter may accept a subset of the rows accepted by the index expression (e.g. narrow down the results set)
+                    </li>
+                    <li>
+                    Setting a filter without setting an expression is supported and would revert to a 'good old scan'
+                    </li>
+                    <li>
+                    The supported expression types are comparison, and, or. Comparisons support GT, GTE, EQ, LTE, LT
+                    </li>
+                    <li>
+                    The caller may combine any number of index expressions using any of the existing indexes. Trying to add an expression for a non-indexed column would result in a runtime error
+                    <div class="code panel" style=BORDER-WIDTH:1px>
+                    <div class="codeContent panelContent">
+                    <pre class=code-java>. . .
+IdxScan idxScan = <span class=code-keyword>new</span> IdxScan();
+idxScan.setExpression(Expression.comparison(familyName, qualifier, Comparison.Operator.EQ, Bytes.toBytes(42L));
+idxScan.setFilter(<span class=code-keyword>new</span> SingleColumnValueFilter(familyName, qualifier, CompareFilter.CompareOp.EQUAL, Bytes.toBytes(42L)));
+idxScan.setCaching(1000);
+
+ResultScanner scanner = table.getScanner(idxScan);
+<span class=code-keyword>for</span> (Result res : scanner) {
+   <span class=code-comment>// Do stuff with res
+</span>}</pre>
+                    </div>
+                    </div>
+                    </li>
+                  </ul>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+        </li>
+      </ol>
+      <h2>
+        <a name=IndexedHBase-Implementationnotes></a>Implementation notes
+      </h2>
+      <ul>
+        <li>
+          We only index Store files. Every index scan performs a full memstore scan. Indexing the memstore will be implemented only if scanning the memstore will prove to be a performance bottleneck
+        </li>
+        <li>
+          Index expression evaluation is performed using bitsets. There are two types of bitsets: compressed and expanded. An index will typically store a compressed bitset while an expression evaluator will most probably use an expanded bitset
+        </li>
+        <li>
+          TODO
+        </li>
+      </ul>
+    </div>
+  </div>
+</div>
+<div id=footer>
+</div>
+<br></body>
+</html>

Added: hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndex.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndex.java?rev=897547&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndex.java (added)
+++ hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndex.java Sat Jan  9 21:09:59 2010
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.BinarySearch;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.List;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSetBuilder;
+import static org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSetBuilder.calcHeapSize;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.ClassSize;
+
+/**
+ * A complete index implementation - all keys are put in the keystore, no skips.
+ */
+class CompleteIndex implements IdxIndex {
+  /**
+   * The fixed part in the heap size calcualtion.
+   */
+  static final long FIXED_SIZE = ClassSize.align(ClassSize.OBJECT +
+    ClassSize.REFERENCE + 3 * (ClassSize.ARRAY + ClassSize.REFERENCE) +
+    Bytes.SIZEOF_LONG + 2 * Bytes.SIZEOF_INT
+  );
+
+  /**
+   * The capacity of the sets.
+   */
+  private int numKeyValues;
+  /**
+   * The key store - holds the col:qual values.
+   */
+  private List<?> keyStore;
+  /**
+   * The value store - holds sets with {@link numKeyValues} capacity.
+   */
+  private IntSet[] valueStore;
+  /**
+   * Sets containing partial calculations of the tail operation.
+   */
+  private IntSet[] heads;
+  /**
+   * Sets containing partial calculations of the head operation.
+   */
+  private IntSet[] tails;
+  /**
+   * The partial calculation interval (used to determine up to which point
+   * to use the valueStore before grabbing a pre-calculated set.
+   */
+  private int precalcInterval;
+  /**
+   * The heap size.
+   */
+  private long heapSize;
+
+  /**
+   * Construct a new complete index.
+   *
+   * @param keyStore        the key store
+   * @param valueStore      the value store
+   * @param heads           a list of precalculated heads
+   * @param tails           a list of precalculated tails
+   * @param numKeyValues    the total number of KeyValues for this region
+   * @param precalcInterval the interval by which tails/heads are precalculated
+   */
+  CompleteIndex(List<?> keyStore, IntSet[] valueStore,
+    IntSet[] heads, IntSet[] tails,
+    int numKeyValues, int precalcInterval) {
+    this.keyStore = keyStore;
+    this.valueStore = valueStore;
+    this.heads = heads;
+    this.tails = tails;
+    this.numKeyValues = numKeyValues;
+    this.precalcInterval = precalcInterval;
+    heapSize = FIXED_SIZE + keyStore.heapSize() + calcHeapSize(valueStore) +
+      calcHeapSize(heads) + calcHeapSize(tails);
+  }
+
+  /**
+   * Looks up a key in the index.
+   *
+   * @param probe the probe to lookup
+   * @return the set of results, may be empty
+   */
+  @Override
+  public IntSet lookup(byte[] probe) {
+    int index = BinarySearch.search(keyStore, keyStore.size(), probe);
+    return index >= 0 ? valueStore[index].clone() :
+      IntSetBuilder.newEmptyIntSet(numKeyValues);
+  }
+
+  /**
+   * Find all the results which are greater and perhaps equal to the probe.
+   *
+   * @param probe     the probe to lookup
+   * @param inclusive if greater equal
+   * @return a possibly empty set of results
+   */
+  @Override
+  public IntSet tail(byte[] probe, boolean inclusive) {
+    int index = BinarySearch.search(keyStore, keyStore.size(), probe);
+    if (index < 0 || !inclusive) {
+      index++;
+    }
+    if (index < 0) {
+      index = -index;
+    }
+    int tailIndex = index / precalcInterval + 1;
+    IntSet result = tailIndex < tails.length ?
+      tails[tailIndex].clone() :
+      IntSetBuilder.newEmptyIntSet(numKeyValues);
+    int stopIndex = Math.min(tailIndex * precalcInterval, valueStore.length);
+    for (int i = index; i < stopIndex; i++) {
+      result = result.unite(valueStore[i]);
+    }
+    return result;
+  }
+
+  /**
+   * Find all the results which are less than and perhaps equal to the probe.
+   *
+   * @param probe     the probe to lookup
+   * @param inclusive if greater equal
+   * @return a possibly empty set of results
+   */
+  @Override
+  public IntSet head(byte[] probe, boolean inclusive) {
+    int index = BinarySearch.search(keyStore, keyStore.size(), probe);
+    if (index >= 0 && inclusive) {
+      index++;
+    }
+    if (index < 0) {
+      index = -(index + 1);
+    }
+
+    int headIndex = (index - 1) / precalcInterval;
+    IntSet result = headIndex > 0 ?
+      heads[headIndex].clone() :
+      IntSetBuilder.newEmptyIntSet(numKeyValues);
+    int startIndex = Math.max(headIndex * precalcInterval, 0);
+    for (int i = startIndex; i < index; i++) {
+      result = result.unite(valueStore[i]);
+    }
+    return result;
+  }
+
+  @Override
+  public long heapSize() {
+    return heapSize;
+  }
+
+  public String probeToString(byte[] bytes) {
+    return ArrayUtils.toString(keyStore.fromBytes(bytes));
+  }
+}

Added: hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndexBuilder.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndexBuilder.java?rev=897547&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndexBuilder.java (added)
+++ hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndexBuilder.java Sat Jan  9 21:09:59 2010
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.idx.IdxIndexDescriptor;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.BinarySearch;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ObjectArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.BigDecimalArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ByteArrayArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ByteArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.CharArrayArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.CharArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.DoubleArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.FloatArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.IntegerArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.List;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.LongArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ShortArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSetBuilder;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * A builder class used to create complete indexes.
+ */
+public class CompleteIndexBuilder {
+
+  private HColumnDescriptor columnDescriptor;
+  private IdxIndexDescriptor indexDescriptor;
+
+  /**
+   * The target keystore.
+   */
+  private List<?> keyStore;
+  /**
+   * The value store set builders.
+   */
+  private ObjectArrayList<IntSetBuilder> valueStoreBuilders;
+
+  /**
+   * Construct a new complete index builder.
+   *
+   * @param columnDescriptor the column descriptor
+   * @param indexDescriptor  the index descriptor
+   */
+  public CompleteIndexBuilder(HColumnDescriptor columnDescriptor,
+    IdxIndexDescriptor indexDescriptor) {
+    this.columnDescriptor = columnDescriptor;
+    this.indexDescriptor = indexDescriptor;
+
+    switch (this.indexDescriptor.getQualifierType()) {
+      case BYTE_ARRAY:
+        keyStore = new ByteArrayArrayList();
+        break;
+      case LONG:
+        keyStore = new LongArrayList();
+        break;
+      case DOUBLE:
+        keyStore = new DoubleArrayList();
+        break;
+      case BYTE:
+        keyStore = new ByteArrayList();
+        break;
+      case CHAR:
+        keyStore = new CharArrayList();
+        break;
+      case SHORT:
+        keyStore = new ShortArrayList();
+        break;
+      case INT:
+        keyStore = new IntegerArrayList();
+        break;
+      case FLOAT:
+        keyStore = new FloatArrayList();
+        break;
+      case BIG_DECIMAL:
+        keyStore = new BigDecimalArrayList();
+        break;
+      case CHAR_ARRAY:
+        keyStore = new CharArrayArrayList();
+        break;
+      default:
+        throw new IllegalStateException("Unsupported type " +
+          this.indexDescriptor.getQualifierType());
+    }
+    valueStoreBuilders = new ObjectArrayList<IntSetBuilder>();
+
+  }
+
+  /**
+   * Add a new key value to the index. The keyvalues are added in 'key' order.
+   *
+   * @param kv the keyvalue.
+   * @param id the id of the keyvalue (e.g. its place in the sorted order)
+   */
+  public void addKeyValue(KeyValue kv, int id) {
+    assert Bytes.equals(indexDescriptor.getQualifierName(), kv.getQualifier())
+      && Bytes.equals(columnDescriptor.getName(), kv.getFamily());
+    byte[] key = kv.getValue();
+    int index = BinarySearch.search(keyStore, keyStore.size(), key);
+    IntSetBuilder intsetBuilder;
+    if (index < 0) {
+      intsetBuilder = new IntSetBuilder().start();
+      index = -(index + 1);
+      keyStore.insert(index, key);
+      valueStoreBuilders.insert(index, intsetBuilder);
+    } else {
+      intsetBuilder = valueStoreBuilders.get(index);
+    }
+    intsetBuilder.addNext(id);
+  }
+
+  /**
+   * Finalized the index creation and creates the new index.
+   *
+   * @param numKeyValues the total number of keyvalues in the region
+   * @return a new complete index
+   */
+  @SuppressWarnings({"unchecked"})
+  public IdxIndex finalizeIndex(int numKeyValues) {
+    if (valueStoreBuilders.size() > 0) {
+      assert numKeyValues > 0;
+      int indexSize = keyStore.size();
+
+      IntSet[] valueStore = new IntSet[indexSize];
+      for (int i = 0; i < indexSize; i++) {
+        valueStore[i] = valueStoreBuilders.get(i).finish(numKeyValues);
+      }
+      int interval = (int) Math.round(Math.sqrt(indexSize));
+      int precalcSize = indexSize / interval +
+        Integer.signum(indexSize % interval);
+
+      IntSet[] tails = new IntSet[precalcSize];
+      IntSet currentTail = IntSetBuilder.newEmptyIntSet(numKeyValues);
+      for (int i = indexSize - 1; i >= 0; i--) {
+        currentTail = currentTail.unite(valueStore[i]);
+        if (i % interval == 0) {
+          tails[i / interval] = currentTail;
+          currentTail = currentTail.clone();
+        }
+      }
+
+      IntSet[] heads = new IntSet[precalcSize];
+      IntSet currentHead = IntSetBuilder.newEmptyIntSet(numKeyValues);
+      for (int i = 0; i < indexSize; i++) {
+        currentHead = currentHead.unite(valueStore[i]);
+        if (i % interval == 0) {
+          heads[i / interval] = currentHead;
+          currentHead = currentHead.clone();
+        }
+      }
+
+      return new CompleteIndex(keyStore, valueStore, heads, tails,
+        numKeyValues, interval);
+    } else {
+      return new EmptyIndex(keyStore, numKeyValues);
+    }
+  }
+
+}

Added: hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/EmptyIndex.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/EmptyIndex.java?rev=897547&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/EmptyIndex.java (added)
+++ hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/EmptyIndex.java Sat Jan  9 21:09:59 2010
@@ -0,0 +1,91 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.List;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSetBuilder;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.ClassSize;
+
+/**
+ * An empty index.
+ */
+public class EmptyIndex implements IdxIndex {
+
+  private static final int HEAP_SIZE = ClassSize.align(ClassSize.OBJECT +
+    ClassSize.REFERENCE + Bytes.SIZEOF_INT);
+
+  private List<?> keyStore;
+  private int numKeyValues;
+
+  /**
+   * Construct a new empty index with a given capacity. All sets genreated by
+   * this empty index will be initiazlized using the provided capacity.
+   *
+   * @param keyStore the key store
+   * @param capacity the capacity
+   */
+  EmptyIndex(List<?> keyStore, int capacity) {
+    this.keyStore = keyStore;
+    this.numKeyValues = capacity;
+  }
+
+  /**
+   * {@inheritDoc}
+   * <p/>
+   * Returns an empty set.
+   */
+  @Override
+  public IntSet lookup(byte[] probe) {
+    return IntSetBuilder.newEmptyIntSet(numKeyValues);
+  }
+
+  /**
+   * {@inheritDoc}
+   * <p/>
+   * Returns an empty set.
+   */
+  @Override
+  public IntSet tail(byte[] probe, boolean inclusive) {
+    return IntSetBuilder.newEmptyIntSet(numKeyValues);
+  }
+
+  /**
+   * {@inheritDoc}
+   * <p/>
+   * Returns an empty set.
+   */
+  @Override
+  public IntSet head(byte[] probe, boolean inclusive) {
+    return IntSetBuilder.newEmptyIntSet(numKeyValues);
+  }
+
+  @Override
+  public String probeToString(byte[] bytes) {
+    return ArrayUtils.toString(keyStore.fromBytes(bytes));
+  }
+
+  @Override
+  public long heapSize() {
+    return HEAP_SIZE;
+  }
+}

Added: hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxExpressionEvaluator.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxExpressionEvaluator.java?rev=897547&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxExpressionEvaluator.java (added)
+++ hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxExpressionEvaluator.java Sat Jan  9 21:09:59 2010
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.hbase.client.idx.exp.And;
+import org.apache.hadoop.hbase.client.idx.exp.Comparison;
+import org.apache.hadoop.hbase.client.idx.exp.Expression;
+import org.apache.hadoop.hbase.client.idx.exp.Or;
+import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.ClassSize;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Evaluates an {@link Expression}.
+ */
+public class IdxExpressionEvaluator implements HeapSize {
+  private static final Log LOG = LogFactory.getLog(IdxExpressionEvaluator.class);
+  /**
+   * Evaluates the expression using the provided search context.
+   *
+   * @param searchContext the search context to use whe evaluating the
+   *                      exrpession
+   * @param expression    the expression to evaluate.
+   * @return a set which contains ids of rows matching the expression provided
+   */
+  public IntSet evaluate(IdxSearchContext searchContext, Expression expression) {
+    if (expression == null) return null;
+
+    if (expression instanceof And) {
+      return evaluate(searchContext, (And) expression);
+    } else if (expression instanceof Or) {
+      return evaluate(searchContext, (Or) expression);
+    } else if (expression instanceof Comparison) {
+      return evaluate(searchContext, (Comparison) expression);
+    } else {
+      throw new IllegalArgumentException("Could not evaluate expression type " +
+        expression.getClass().getName());
+    }
+  }
+
+  protected IntSet evaluate(IdxSearchContext searchContext, And and) {
+    IntSet result = null;
+    for (Expression expression : and.getChildren()) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Intersecting expression:");
+      }
+      IntSet childResult = evaluate(searchContext, expression);
+      if (result == null) {
+        result = childResult;
+      } else if (childResult != null) {
+        result = result.intersect(childResult);
+      }
+    }
+    return result;
+  }
+
+  protected IntSet evaluate(IdxSearchContext searchContext, Or or) {
+    IntSet result = null;
+    for (Expression expression : or.getChildren()) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Uniting expression:");
+      }
+      IntSet childResult = evaluate(searchContext, expression);
+      if (result == null) {
+        result = childResult;
+      } else if (childResult != null) {
+        result = result.unite(childResult);
+      }
+    }
+    return result;
+  }
+
+  protected IntSet evaluate(IdxSearchContext searchContext, Comparison comparison) {
+    IdxIndex index = searchContext.getIndex(comparison.getColumnName(), comparison.getQualifier());
+    if (index == null) throw new IllegalStateException(
+            String.format("Could not find an index for column: '%s', qualifier: '%s'",
+                    Bytes.toString(comparison.getColumnName()),
+                    Bytes.toString(comparison.getQualifier())));
+
+    IntSet matched = null;
+    switch (comparison.getOperator()) {
+      case EQ:
+        matched = index.lookup(comparison.getValue());
+        break;
+      case GT:
+        matched = index.tail(comparison.getValue(), false);
+        break;
+      case GTE:
+        matched = index.tail(comparison.getValue(), true);
+        break;
+      case LT:
+        matched = index.head(comparison.getValue(), false);
+        break;
+      case LTE:
+        matched = index.head(comparison.getValue(), true);
+        break;
+    }
+
+    if (LOG.isDebugEnabled() && matched != null) {
+      LOG.debug(String.format("Evaluation of comparison on column: '%s', " +
+          "qualifier: '%s', operator: %s, value: '%s' yielded %s matches",
+          Bytes.toString(comparison.getColumnName()),
+          Bytes.toString(comparison.getQualifier()), 
+          comparison.getOperator(),
+          index.probeToString(comparison.getValue()), matched.size()));
+    }
+
+    return matched != null ? matched : null;
+  }
+
+  @Override
+  public long heapSize() {
+    return ClassSize.OBJECT;
+  }
+}

Added: hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxIndex.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxIndex.java?rev=897547&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxIndex.java (added)
+++ hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxIndex.java Sat Jan  9 21:09:59 2010
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSetBuilder;
+
+/**
+ * An index interface.
+ */
+public interface IdxIndex extends HeapSize {
+
+  /**
+   * Looks up an object. returns only exact matches.
+   *
+   * @param probe the probe to lookup
+   * @return the result set
+   */
+  IntSet lookup(byte[] probe);
+
+  /**
+   * Gets all hte objects which are greater (or greater equal) than the probe.
+   *
+   * @param probe     the probe to lookup
+   * @param inclusive if greater equal
+   * @return the result set
+   */
+  IntSet tail(byte[] probe, boolean inclusive);
+
+  /**
+   * Gets all hte objects which are lesser (or lesser equal) than the probe.
+   *
+   * @param probe     the probe to lookup
+   * @param inclusive if greater equal
+   * @return the result set
+   */
+  IntSet head(byte[] probe, boolean inclusive);
+
+  /**
+   * Returns a string representation of the provided bytes probe.
+   * @param bytes the bytes
+   * @return the string representation
+   */
+  String probeToString(byte[] bytes);
+}

Added: hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegion.java?rev=897547&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegion.java (added)
+++ hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegion.java Sat Jan  9 21:09:59 2010
@@ -0,0 +1,482 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.JmxHelper;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.idx.IdxScan;
+import org.apache.hadoop.hbase.client.idx.exp.Expression;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.ClassSize;
+import org.apache.hadoop.metrics.util.MBeanUtil;
+import org.apache.hadoop.util.Progressable;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * An indexed region - has the capability to index a subset of the columns and speedup scans which specify a
+ * filter expression.
+ */
+public class IdxRegion extends HRegion {
+  static final Log LOG = LogFactory.getLog(IdxRegion.class);
+
+  private static final int INDEX_BUILD_TIME_HISTORY_SIZE = 10;
+
+  private static final long FIXED_OVERHEAD = ClassSize.REFERENCE * 2 +
+    2 * (ClassSize.REFERENCE + ClassSize.ATOMIC_INTEGER) +
+    ClassSize.REFERENCE + ClassSize.ATOMIC_LONG +
+    ClassSize.REFERENCE + ClassSize.ARRAY +
+    INDEX_BUILD_TIME_HISTORY_SIZE * Bytes.SIZEOF_LONG +
+    Bytes.SIZEOF_INT;
+
+  private IdxRegionIndexManager indexManager;
+  private IdxExpressionEvaluator expressionEvaluator;
+
+  // todo add a total number of ongoing scans to the HRegion
+  private AtomicInteger numberOfOngoingIndexedScans;
+  // todo add a a resetable total scan count when HRegion has jmx support
+  private AtomicLong totalIndexedScans;
+  private AtomicLong totalNonIndexedScans;
+  // the index build time history
+  private volatile long[] buildTimes;
+  private volatile int currentBuildTimesIndex;
+
+  /**
+   * A default constructor matching the default region constructor.
+   */
+  public IdxRegion() {
+    super();
+  }
+
+  /**
+   * See {@link HRegion#HRegion(org.apache.hadoop.fs.Path, HLog, org.apache.hadoop.fs.FileSystem, org.apache.hadoop.hbase.HBaseConfiguration, org.apache.hadoop.hbase.HRegionInfo, FlushRequester)}.
+   * <p/>
+   * Initializes the index manager and the expression evaluator.
+   */
+  public IdxRegion(Path basedir, HLog log, FileSystem fs, HBaseConfiguration
+    conf, HRegionInfo regionInfo, FlushRequester flushListener) {
+    super(basedir, log, fs, conf, regionInfo, flushListener);
+    indexManager = new IdxRegionIndexManager(this);
+    expressionEvaluator = new IdxExpressionEvaluator();
+    // monitoring parameters
+    numberOfOngoingIndexedScans = new AtomicInteger(0);
+    totalIndexedScans = new AtomicLong(0);
+    totalNonIndexedScans = new AtomicLong(0);
+    buildTimes = new long[INDEX_BUILD_TIME_HISTORY_SIZE];
+    resetIndexBuildTimes();
+  }
+
+  /**
+   * {@inheritDoc}
+   * <p/>
+   * calls super and then initialized the index manager.
+   */
+  @Override
+  public void initialize(Path initialFiles, Progressable reporter)
+    throws IOException {
+    super.initialize(initialFiles, reporter);
+    rebuildIndexes();
+    JmxHelper.registerMBean(
+      IdxRegionMBeanImpl.generateObjectName(getRegionInfo()),
+      IdxRegionMBeanImpl.newIdxRegionMBeanImpl(this));
+  }
+
+  /**
+   * {@inheritDoc}
+   * </p>
+   * Rebuilds the index.
+   */
+  @Override
+  protected void internalPreFlashcacheCommit() throws IOException {
+    rebuildIndexes();
+    super.internalPreFlashcacheCommit();
+  }
+
+  private void rebuildIndexes() throws IOException {
+    long time = indexManager.rebuildIndexes();
+    buildTimes[currentBuildTimesIndex] = time;
+    currentBuildTimesIndex = (currentBuildTimesIndex + 1) % buildTimes.length;
+  }
+
+
+  @Override
+  public List<StoreFile> close(boolean abort) throws IOException {
+    MBeanUtil.unregisterMBean(
+      IdxRegionMBeanImpl.generateObjectName(getRegionInfo()));
+    return super.close(abort);
+  }
+
+  /**
+   * {@inheritDoc}
+   * <p/>
+   * Constructs an internal scanner based on the scan expression. Reverts
+   * to default region scan in case an expression was not provided.
+   */
+  @Override
+  protected InternalScanner instantiateInternalScanner(Scan scan,
+    List<KeyValueScanner> additionalScanners) throws IOException {
+    Expression expression = IdxScan.getExpression(scan);
+    if (scan == null || expression == null) {
+      totalNonIndexedScans.incrementAndGet();
+      return super.instantiateInternalScanner(scan, additionalScanners);
+    } else {
+      totalIndexedScans.incrementAndGet();
+      // Grab a new search context
+      IdxSearchContext searchContext = indexManager.newSearchContext();
+      // use the expression evaluator to determine the final set of ints
+      IntSet matchedExpression = expressionEvaluator.evaluate(searchContext,
+        expression);
+      if (LOG.isDebugEnabled()) {
+        LOG.debug(String.format("%s rows matched the index expression",
+          matchedExpression.size()));
+      }
+      return new IdxRegionScanner(scan, searchContext, matchedExpression);
+    }
+  }
+
+  /**
+   * A monitoring operation which exposes the number of indexed keys.
+   *
+   * @return the number of indexed keys.
+   */
+  public int getNumberOfIndexedKeys() {
+    return indexManager.getNumberOfKeys();
+  }
+
+  /**
+   * The total heap size consumed by all indexes.
+   *
+   * @return the index heap size.
+   */
+  public long getIndexesTotalHeapSize() {
+    return indexManager.heapSize();
+  }
+
+  /**
+   * Gets the total number of indexed scan since the last reset.
+   *
+   * @return the total number of indexed scans.
+   */
+  public long getTotalIndexedScans() {
+    return totalIndexedScans.get();
+  }
+
+  /**
+   * Resets the total number of indexed scans.
+   *
+   * @return the number of indexed scans just before the reset
+   */
+  public long resetTotalIndexedScans() {
+    return totalIndexedScans.getAndSet(0);
+  }
+
+  /**
+   * Gets the total number of non indexed scan since the last reset.
+   *
+   * @return the total number of indexed scans.
+   */
+  public long getTotalNonIndexedScans() {
+    return totalNonIndexedScans.get();
+  }
+
+  /**
+   * Resets the total number of non indexed scans.
+   *
+   * @return the number of indexed scans just before the reset
+   */
+  public long resetTotalNonIndexedScans() {
+    return totalNonIndexedScans.getAndSet(0);
+  }
+
+  /**
+   * Exposes the number of indexed scans currently ongoing in the system.
+   *
+   * @return the number of ongoing indexed scans
+   */
+  public long getNumberOfOngoingIndexedScans() {
+    return numberOfOngoingIndexedScans.get();
+  }
+
+  /**
+   * Gets the index build times buffer.
+   *
+   * @return a rolling buffer of index build times
+   */
+  public long[] getIndexBuildTimes() {
+    return buildTimes;
+  }
+
+  /**
+   * Resets the index build times array.
+   *
+   * @return the previous build times array
+   */
+  public long[] resetIndexBuildTimes() {
+    // note: this may 'corrupt' the array if ran in parallel with a build time
+    // array modification and that's ok. we are not changing pointers so
+    // no catastrophy should happen - worse case the manager would reset again
+    long[] prevTimes = buildTimes.clone();
+    Arrays.fill(buildTimes, -1L);
+    currentBuildTimesIndex = 0;
+    return prevTimes;
+  }
+
+  /**
+   * The size of the index on the specified column in bytes.
+   *
+   * @param columnName the column to check.
+   * @return the size fo the requesed index
+   */
+  public long getIndexHeapSize(String columnName) {
+    return indexManager.getIndexHeapSize(columnName);
+  }
+
+  class IdxRegionScanner extends RegionScanner {
+    private final KeyProvider keyProvider;
+    private KeyValue lastKeyValue;
+
+    IdxRegionScanner(Scan scan, IdxSearchContext idxSearchContext,
+      IntSet matchedExpression) throws IOException {
+      super(scan);
+      numberOfOngoingIndexedScans.incrementAndGet();
+      keyProvider = new KeyProvider(idxSearchContext, matchedExpression, scan);
+    }
+
+    @Override
+    public boolean next(List<KeyValue> outResults) throws IOException {
+      // Seek to the next key value
+      seekNext();
+      boolean result = super.next(outResults);
+
+      // if there are results we need to key track of the key to ensure that the
+      // nextInternal method doesn't seek backwards on it's next invocation
+      if (!outResults.isEmpty()) {
+        lastKeyValue = outResults.get(0);
+      }
+
+      return result;
+    }
+
+    protected void seekNext() throws IOException {
+      KeyValue keyValue;
+      do {
+        keyValue = keyProvider.next();
+
+        if (keyValue == null) {
+          // out of results keys, nothing more to process
+          super.getStoreHeap().close();
+          return;
+        } else if (lastKeyValue == null) {
+          // first key returned from the key provider
+          break;
+        } else {
+          // it's possible that the super nextInternal method progressed past the
+          // ketProvider's next key.  We need to keep calling next on the keyProvider
+          // until the key returned is after the last key returned from the
+          // next(List<KeyValue>) method.
+
+          // determine which of the two keys is less than the other
+          // when the keyValue is greater than the lastKeyValue then we're good
+          int comparisonResult = comparator.compareRows(keyValue, lastKeyValue);
+          if (comparisonResult > 0) {
+            break;
+          }
+        }
+      } while (true);
+
+      // seek the store heap to the next key
+      // (this is what makes the scanner faster)
+      getStoreHeap().seek(keyValue);
+    }
+
+    @Override
+    public void close() {
+      numberOfOngoingIndexedScans.decrementAndGet();
+      keyProvider.close();
+      super.close();
+    }
+  }
+
+  class KeyProvider {
+    private final KeyValueHeap memstoreHeap;
+    private final IdxSearchContext indexSearchContext;
+    private final IntSet.IntSetIterator matchedExpressionIterator;
+
+    private KeyValue currentMemstoreKey = null;
+    private KeyValue currentExpressionKey = null;
+    private boolean exhausted = false;
+    private byte[] startRow;
+    private boolean isStartRowSatisfied;
+
+    KeyProvider(IdxSearchContext idxSearchContext,
+      IntSet matchedExpression, Scan scan) {
+      this.indexSearchContext = idxSearchContext;
+      this.matchedExpressionIterator = matchedExpression.iterator();
+
+      memstoreHeap = initMemstoreHeap(scan);
+
+      startRow = scan.getStartRow();
+      isStartRowSatisfied = startRow == null;
+    }
+
+    private KeyValueHeap initMemstoreHeap(Scan scan) {
+      List<KeyValueScanner> scanners = new ArrayList<KeyValueScanner>();
+      // todo: can we reduce the cost of scanning the memory stores by
+      // only using one entry from the family map
+
+      for (byte[] family : regionInfo.getTableDesc().getFamiliesKeys()) {
+        Store store = stores.get(family);
+        scanners.addAll(getMemstoreScanners(store, scan.getStartRow()));
+        break;  // we only need one
+      }
+      return new KeyValueHeap(scanners.toArray(new KeyValueScanner[scanners.size()]), comparator);
+    }
+
+    /*
+     * @return List of scanners ordered properly.
+     */
+    private List<KeyValueScanner> getMemstoreScanners(Store store, byte[] startRow) {
+      List<KeyValueScanner> scanners = new ArrayList<KeyValueScanner>();
+      // this seems a bit pointless because the memstore only ever returns an
+      // array with only one element, but just incase...
+      KeyValueScanner[] memstorescanners = store.memstore.getScanners();
+      // to make sure we don't provide rows that the scan is not interested in
+      // we seekTo the scan's startRow
+      KeyValue seekTo = KeyValue.createFirstOnRow(startRow);
+      for (int i = memstorescanners.length - 1; i >= 0; i--) {
+        memstorescanners[i].seek(seekTo);
+        scanners.add(memstorescanners[i]);
+      }
+
+      return scanners;
+    }
+
+    public KeyValue next() throws IOException {
+      if (exhausted) return null;
+
+      KeyValue currentKey;
+      /*
+         If either the current memstore or expression row is null get the next.
+         Note: the row will be nulled when it's consumed.
+       */
+      if (currentMemstoreKey == null)
+        currentMemstoreKey = nextMemstoreRow();
+      if (currentExpressionKey == null)
+        currentExpressionKey = nextExpressionRow();
+
+      if (currentMemstoreKey == null && currentExpressionKey == null) {
+        exhausted = true;
+        // if both rows are null then the scanner is done
+        return null;
+      } else if (currentMemstoreKey == null) {
+        currentKey = currentExpressionKey;
+        currentExpressionKey = null;
+      } else if (currentExpressionKey == null) {
+        currentKey = currentMemstoreKey;
+        currentMemstoreKey = null;
+      } else {
+        // determine which of the two keys is smaller (before the other) so that
+        // the scan is processed in-order
+        int comparisonResult = comparator.compareRows(currentMemstoreKey, currentExpressionKey);
+
+        if (comparisonResult == 0) {
+          // if the two rows are equal then we'll use the memstore and clear the
+          // current expression row so that the next invocation progresses...
+          currentExpressionKey = null;
+          currentKey = currentMemstoreKey;
+        } else if (comparisonResult < 0) {
+          currentKey = currentMemstoreKey;
+          currentMemstoreKey = null;
+        } else { // if (comparisonResult > 0)
+          currentKey = currentExpressionKey;
+          currentExpressionKey = null;
+        }
+      }
+
+      return currentKey;
+    }
+
+    private KeyValue nextExpressionRow() {
+      KeyValue nextExpressionKey = null;
+      while (matchedExpressionIterator.hasNext()) {
+        int index = matchedExpressionIterator.next();
+        nextExpressionKey = indexSearchContext.lookupRow(index);
+
+        // if the scan has specified a startRow we need to keep looping
+        // over the keys returned from the index until it's satisfied
+        if (!isStartRowSatisfied) {
+          if (comparator.compareRows(nextExpressionKey, startRow) >= 0) {
+            isStartRowSatisfied = true;
+            break;
+          }
+        } else {
+          break;
+        }
+      }
+
+      return nextExpressionKey;
+    }
+
+    private KeyValue nextMemstoreRow() {
+      /*
+      This may appear a little expensive but the initial version is not concerned
+      with the performance of the memstore.
+       */
+      final KeyValue firstOnNextRow = this.memstoreHeap.next();
+      KeyValue nextKeyValue = this.memstoreHeap.peek();
+      while (firstOnNextRow != null && nextKeyValue != null &&
+        comparator.compareRows(firstOnNextRow, nextKeyValue) == 0) {
+        // progress to the next row
+        // todo: is there a faster way of doing this?
+        this.memstoreHeap.next();
+        nextKeyValue = this.memstoreHeap.peek();
+      }
+
+      return firstOnNextRow;
+    }
+
+    /**
+     * Close this key provider - delegate close and free memory.
+     */
+    public void close() {
+      this.indexSearchContext.close();
+      this.memstoreHeap.close();
+    }
+  }
+
+  @Override
+  public long heapSize() {
+    return FIXED_OVERHEAD + super.heapSize() +
+      indexManager.heapSize() + expressionEvaluator.heapSize();
+  }
+}

Added: hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionIndexManager.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionIndexManager.java?rev=897547&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionIndexManager.java (added)
+++ hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionIndexManager.java Sat Jan  9 21:09:59 2010
@@ -0,0 +1,290 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.commons.lang.time.StopWatch;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.NotServingRegionException;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.client.idx.IdxColumnDescriptor;
+import org.apache.hadoop.hbase.client.idx.IdxIndexDescriptor;
+import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.regionserver.idx.support.IdxClassSize;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ObjectArrayList;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.ClassSize;
+import org.apache.hadoop.hbase.util.Pair;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+/**
+ * Manages the indexes for a single region.
+ */
+public class IdxRegionIndexManager implements HeapSize {
+  private static final Log LOG = LogFactory.getLog(IdxRegionIndexManager.class);
+
+  static final long FIXED_SIZE =
+    ClassSize.align(ClassSize.OBJECT + 4 * ClassSize.REFERENCE +
+      IdxClassSize.HASHMAP + IdxClassSize.OBJECT_ARRAY_LIST +
+      Bytes.SIZEOF_LONG + ClassSize.REENTRANT_LOCK);
+
+
+  /**
+   * The wrapping region.
+   */
+  private IdxRegion region;
+  /**
+   * The index map. Each pair holds the column and qualifier.
+   */
+  private volatile Map<Pair<byte[], byte[]>, IdxIndex> indexMap;
+  /**
+   * The keys ordered by their id. The IntSet in the {@link IdxIndex} have
+   * members corresponding to the indices of this array.
+   */
+  private volatile ObjectArrayList<KeyValue> keys;
+  /**
+   * The heap size.
+   */
+  private long heapSize;
+
+  private ReadWriteLock indexSwitchLock;
+
+  /**
+   * Create and initialize a new index manager.
+   *
+   * @param region the region to connect to
+   */
+  public IdxRegionIndexManager(IdxRegion region) {
+    this.region = region;
+    indexSwitchLock = new ReentrantReadWriteLock();
+    heapSize = FIXED_SIZE;
+  }
+
+  /**
+   * Creates and populates all indexes. Bruteforce scan fetching everything
+   * into memory, creating indexes out of that.
+   *
+   * @return total time in millis to rebuild the indexes
+   * @throws IOException in case scan throws
+   */
+  public long rebuildIndexes() throws IOException {
+    long startMillis = System.currentTimeMillis();
+    if (LOG.isInfoEnabled()) {
+      LOG.info(String.format("Initializing index manager for region: %s",
+        region.toString()));
+    }
+    heapSize = FIXED_SIZE;
+    Map<Pair<byte[], byte[]>, CompleteIndexBuilder>
+      builderTable = initIndexTable();
+    // if the region is closing/closed then a fillIndex method will throw a
+    // NotServingRegion exection when an attempt to obtain a scanner is made
+    // NOTE: when the region is being created isClosing() returns true
+    if (!(region.isClosing() || region.isClosed()) && !builderTable.isEmpty()) {
+      try {
+        ObjectArrayList<KeyValue> newKeys = fillIndex(builderTable);
+        Map<Pair<byte[], byte[]>, IdxIndex> newIndexMap =
+          finalizeIndex(builderTable, newKeys);
+        switchIndex(newKeys, newIndexMap);
+      } catch (NotServingRegionException e) {
+        // the not serving exception may also be thrown during the scan if
+        // the region was closed during the scan
+        LOG.warn("Aborted index initialization", e);
+      }
+    } else {
+    switchIndex(new ObjectArrayList<KeyValue>(),
+      Collections.<Pair<byte[], byte[]>, IdxIndex>emptyMap());
+    }
+    return System.currentTimeMillis() - startMillis;
+  }
+
+  private void switchIndex(ObjectArrayList<KeyValue> newKeys,
+    Map<Pair<byte[], byte[]>, IdxIndex> newIndexMap) {
+    indexSwitchLock.writeLock().lock();
+    try {
+      this.keys = newKeys;
+      this.indexMap = newIndexMap;
+    } finally {
+      indexSwitchLock.writeLock().unlock();
+    }
+  }
+
+  /**
+   * Initiate the index table. Read the column desciprtors, extract the index
+   * descriptors from them and instantiate index builders for those columns.
+   *
+   * @return the initiated map of builders keyed by column:qualifer pair
+   * @throws IOException thrown by {@link IdxColumnDescriptor#getIndexDescriptors(org.apache.hadoop.hbase.HColumnDescriptor)}
+   */
+  private Map<Pair<byte[], byte[]>,
+    CompleteIndexBuilder> initIndexTable() throws IOException {
+    Map<Pair<byte[], byte[]>, CompleteIndexBuilder> indexBuilders =
+      new HashMap<Pair<byte[], byte[]>, CompleteIndexBuilder>();
+    for (HColumnDescriptor columnDescriptor :
+      region.getRegionInfo().getTableDesc().getColumnFamilies()) {
+      Collection<IdxIndexDescriptor> indexDescriptors =
+        IdxColumnDescriptor.getIndexDescriptors(columnDescriptor).values();
+      for (IdxIndexDescriptor indexDescriptor : indexDescriptors) {
+        LOG.info(String.format("Adding index for region: '%s' index: %s",
+          region.getRegionNameAsString(), indexDescriptor.toString()));
+        indexBuilders.put(Pair.of(columnDescriptor.getName(),
+          indexDescriptor.getQualifierName()),
+          new CompleteIndexBuilder(columnDescriptor, indexDescriptor));
+      }
+    }
+    return indexBuilders;
+  }
+
+  /**
+   * Fills the index. Scans the region for latest rows and sends key values
+   * to the matching index builder
+   *
+   * @param builders the map of builders keyed by column:qualifer pair
+   * @return the keyset (a fresh set)
+   * @throws IOException may be thrown by the scan
+   */
+  private ObjectArrayList<KeyValue> fillIndex(Map<Pair<byte[], byte[]>,
+    CompleteIndexBuilder> builders) throws IOException {
+    ObjectArrayList<KeyValue> newKeys = this.keys == null ?
+      new ObjectArrayList<KeyValue>() :
+      new ObjectArrayList<KeyValue>(this.keys.size());
+
+    StopWatch stopWatch = new StopWatch();
+    stopWatch.start();
+
+    InternalScanner scanner = region.getScanner(new Scan());
+    boolean moreRows;
+    int id = 0;
+    do {
+      List<KeyValue> nextRow = new ArrayList<KeyValue>();
+      moreRows = scanner.next(nextRow);
+      if (nextRow.size() > 0) {
+        KeyValue
+          firstOnRow = KeyValue.createFirstOnRow(nextRow.get(0).getRow());
+        newKeys.add(firstOnRow);
+        // add keyvalue to the heapsize
+        heapSize += firstOnRow.heapSize();
+        for (KeyValue keyValue : nextRow) {
+          CompleteIndexBuilder idx = builders.get(Pair.of(keyValue.getFamily(),
+            keyValue.getQualifier()));
+          if (idx != null) {
+            idx.addKeyValue(keyValue, id);
+          }
+        }
+        id++;
+      }
+    } while (moreRows);
+
+    stopWatch.stop();
+    LOG.info("Filled indices for region: '" + region.getRegionNameAsString()
+      + "' with " + id + " entries in " + stopWatch.toString());
+    return newKeys;
+  }
+
+  /**
+   * Converts the map of builders into complete indexes, calling
+   * {@link CompleteIndexBuilder#finalizeIndex(int)} on each builder.
+   *
+   * @param builders the map of builders
+   * @param newKeys  the set of keys for the new index to be finalized
+   * @return the new index map
+   */
+  private Map<Pair<byte[], byte[]>, IdxIndex>
+  finalizeIndex(Map<Pair<byte[], byte[]>,
+    CompleteIndexBuilder> builders, ObjectArrayList<KeyValue> newKeys) {
+    Map<Pair<byte[], byte[]>, IdxIndex>
+      newIndexes = new HashMap<Pair<byte[], byte[]>, IdxIndex>();
+    for (Map.Entry<Pair<byte[], byte[]>, CompleteIndexBuilder> indexEntry :
+      builders.entrySet()) {
+      IdxIndex index = indexEntry.getValue().finalizeIndex(newKeys.size());
+      newIndexes.put(indexEntry.getKey(), index);
+      // adjust the heapsize
+      heapSize += ClassSize.align(ClassSize.MAP_ENTRY +
+        ClassSize.align(ClassSize.OBJECT + 2 * ClassSize.ARRAY +
+          indexEntry.getKey().getFirst().length +
+          indexEntry.getKey().getSecond().length) + index.heapSize()
+      );
+    }
+    return newIndexes;
+  }
+
+  public IdxSearchContext newSearchContext() {
+    indexSwitchLock.readLock().lock();
+    try {
+      return new IdxSearchContext(keys, indexMap);
+    } finally {
+      indexSwitchLock.readLock().unlock();
+    }
+  }
+
+  @Override
+  public long heapSize() {
+    return heapSize;
+  }
+
+  /**
+   * Exposes the number of keys in the index manager.
+   *
+   * @return the number of keys.
+   */
+  public int getNumberOfKeys() {
+    indexSwitchLock.readLock().lock();
+    try {
+      return keys.size();
+    } finally {
+      indexSwitchLock.readLock().unlock();
+    }
+  }
+
+  /**
+   * A monitoring operation which returns the byte size of a given index.
+   *
+   * @param columnName in [family]:[qualifier] format
+   * @return the byte size of the index
+   */
+  public long getIndexHeapSize(String columnName) {
+    String[] familyAndQualifier = columnName.split(":");
+    if (familyAndQualifier != null && familyAndQualifier.length == 2) {
+      Pair fqPair = Pair.of(Bytes.toBytes(familyAndQualifier[0]),
+        Bytes.toBytes(familyAndQualifier[1]));
+      indexSwitchLock.readLock().lock();
+      IdxIndex idx = null;
+      try {
+        idx = indexMap.get(fqPair);
+      } finally {
+        indexSwitchLock.readLock().unlock();
+      }
+      if (idx != null) {
+        return idx.heapSize();
+      }
+    }
+    throw new IllegalArgumentException("No index for " + columnName);
+  }
+}

Added: hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionMBean.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionMBean.java?rev=897547&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionMBean.java (added)
+++ hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionMBean.java Sat Jan  9 21:09:59 2010
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+/**
+ * An MBean exposing various region ops and info.
+ */
+public interface IdxRegionMBean {
+
+  /**
+   * Checks whether the region being exposed by this MBean is still alive.
+   *
+   * @return whether the region being exposed by this MBean is still alive.
+   */
+  boolean isValid();
+
+  /**
+   * The number of keys in the index which is equivalent to the number of top
+   * level rows in this region.
+   *
+   * @return the number of keys in the index.
+   */
+  int getNumberOfIndexedKeys();
+
+  /**
+   * The total heap size, in bytes, used by the indexes and their overhead.
+   *
+   * @return the total index heap size in bytes.
+   */
+  long getIndexesTotalHeapSize();
+
+  /**
+   * Gets the total number of indexed scan since the last reset.
+   *
+   * @return the total number of indexed scans.
+   */
+  public long getTotalIndexedScans();
+
+  /**
+   * Resets the total number of indexed scans.
+   *
+   * @return the number of indexed scans just before the reset
+   */
+  public long resetTotalIndexedScans();
+
+  /**
+   * Gets the total number of non indexed scan since the last reset.
+   *
+   * @return the total number of indexed scans.
+   */
+  public long getTotalNonIndexedScans();
+
+  /**
+   * Resets the total number of non indexed scans.
+   *
+   * @return the number of indexed scans just before the reset
+   */
+  public long resetTotalNonIndexedScans();
+
+  /**
+   * Exposes the number of indexed scans currently ongoing in the system.
+   *
+   * @return the number of ongoing indexed scans
+   */
+  public long getNumberOfOngoingIndexedScans();
+
+  /**
+   * Gets the index build times buffer as a comma delimited string
+   * where each item is the milliseconds required to rebuild the indexes.
+   *
+   * @return a rolling buffer of index build times
+   */
+  public String getIndexBuildTimes();
+
+  /**
+   * Resets the index build times buffer.
+   *
+   * @return the previous build times buffer
+   */
+  public String resetIndexBuildTimes();
+
+}

Added: hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionMBeanImpl.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionMBeanImpl.java?rev=897547&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionMBeanImpl.java (added)
+++ hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxRegionMBeanImpl.java Sat Jan  9 21:09:59 2010
@@ -0,0 +1,353 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.client.idx.IdxColumnDescriptor;
+import org.apache.hadoop.hbase.client.idx.IdxIndexDescriptor;
+import org.apache.hadoop.hbase.util.Bytes;
+
+import javax.management.AttributeNotFoundException;
+import javax.management.MBeanAttributeInfo;
+import javax.management.MBeanException;
+import javax.management.MBeanInfo;
+import javax.management.MalformedObjectNameException;
+import javax.management.NotCompliantMBeanException;
+import javax.management.ObjectName;
+import javax.management.ReflectionException;
+import javax.management.StandardMBean;
+import java.io.IOException;
+import java.lang.ref.WeakReference;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+/**
+ * A delegate MBean to get stats / poke an idx region.
+ */
+public class IdxRegionMBeanImpl extends StandardMBean
+  implements IdxRegionMBean {
+  static final Log LOG = LogFactory.getLog(IdxRegionMBeanImpl.class);
+
+  private static final HashMap<String, String> ATTRIBUTE_DESCRIPTIONS =
+    new HashMap<String, String>() {{
+      put("Valid", "Indicates whether the region being exposed by " +
+        "this MBean is still alive");
+      put("NumberOfIndexedKeys", "The number of keys in the index " +
+        "which is equivalent to the number of top-level rows in this region");
+      put("IndexesTotalHeapSize", "The total heap size, in bytes, used by " +
+        "the indexes and their overhead");
+    }};
+
+
+  /**
+   * Instantiate an new IdxRegion MBean. this is a convenience method which
+   * allows the caller to avoid catching the
+   * {@link javax.management.NotCompliantMBeanException}.
+   *
+   * @param idxRegion the region to wrap
+   * @return a new instance of IdxRegionMBeanImpl
+   */
+  static IdxRegionMBeanImpl newIdxRegionMBeanImpl(IdxRegion idxRegion) {
+    try {
+      return new IdxRegionMBeanImpl(idxRegion);
+    } catch (NotCompliantMBeanException e) {
+      throw new IllegalStateException("Could not instantiate mbean", e);
+    }
+  }
+
+  /**
+   * Generate object name from the hregion info.
+   *
+   * @param regionInfo the region info to create the object name from.
+   * @return an valid object name.
+   */
+  static ObjectName generateObjectName(HRegionInfo regionInfo) {
+    StringBuilder builder =
+      new StringBuilder(IdxRegionMBeanImpl.class.getPackage().getName());
+    builder.append(':');
+    builder.append("table=");
+    builder.append(regionInfo.getTableDesc().getNameAsString());
+    builder.append(',');
+
+    builder.append("id=");
+    builder.append(regionInfo.getRegionId());
+    builder.append(',');
+
+    if (regionInfo.getStartKey() != null &&
+      regionInfo.getStartKey().length > 0) {
+      builder.append("startKey=");
+      builder.append(Bytes.toString(regionInfo.getStartKey()));
+      builder.append(',');
+    }
+
+    if (regionInfo.getEndKey() != null &&
+      regionInfo.getEndKey().length > 0) {
+      builder.append("endKey=");
+      builder.append(Bytes.toString(regionInfo.getEndKey()));
+      builder.append(',');
+    }
+
+    builder.append("type=IdxRegion");
+    try {
+      return ObjectName.getInstance(builder.toString());
+    } catch (MalformedObjectNameException e) {
+      throw new IllegalStateException("Failed to create a legal object name",
+        e);
+    }
+  }
+
+  /**
+   * Using a weak reference to the idx region.
+   * This way a failure to clear this MBean from the mbean server won't prevent
+   * the idx region to be garbage collected.
+   */
+  private WeakReference<IdxRegion> idxRegionRef;
+
+  private IdxRegionMBeanImpl(IdxRegion idxRegion)
+    throws NotCompliantMBeanException {
+    super(IdxRegionMBean.class);
+    idxRegionRef = new WeakReference<IdxRegion>(idxRegion);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public int getNumberOfIndexedKeys() {
+    IdxRegion region = idxRegionRef.get();
+    if (region != null) {
+      return region.getNumberOfIndexedKeys();
+    } else {
+      return -1;
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public long getIndexesTotalHeapSize() {
+    IdxRegion region = idxRegionRef.get();
+    if (region != null) {
+      return region.getIndexesTotalHeapSize();
+    } else {
+      return -1;
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public boolean isValid() {
+    IdxRegion region = idxRegionRef.get();
+    if (region != null) {
+      return !(region.isClosed() || region.isClosing());
+    } else {
+      return false;
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public long getTotalIndexedScans() {
+    IdxRegion region = idxRegionRef.get();
+    if (region != null) {
+      return region.getTotalIndexedScans();
+    } else {
+      return -1L;
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public long resetTotalIndexedScans() {
+    IdxRegion region = idxRegionRef.get();
+    if (region != null) {
+      return region.resetTotalIndexedScans();
+    } else {
+      return -1L;
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public long getTotalNonIndexedScans() {
+    IdxRegion region = idxRegionRef.get();
+    if (region != null) {
+      return region.getTotalNonIndexedScans();
+    } else {
+      return -1L;
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public long resetTotalNonIndexedScans() {
+    IdxRegion region = idxRegionRef.get();
+    if (region != null) {
+      return region.resetTotalNonIndexedScans();
+    } else {
+      return -1L;
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public long getNumberOfOngoingIndexedScans() {
+    IdxRegion region = idxRegionRef.get();
+    if (region != null) {
+      return region.getNumberOfOngoingIndexedScans();
+    } else {
+      return -1L;
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public String getIndexBuildTimes() {
+    IdxRegion region = idxRegionRef.get();
+    if (region != null) {
+      return toIndexBuildTimesString(region.getIndexBuildTimes());
+    } else {
+      return "";
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public String resetIndexBuildTimes() {
+    IdxRegion region = idxRegionRef.get();
+    if (region != null) {
+      return toIndexBuildTimesString(region.resetIndexBuildTimes());
+    } else {
+      return "";
+    }
+  }
+
+  private String toIndexBuildTimesString(long[] buildTimes) {
+    StringBuilder builder = new StringBuilder();
+    for (long indexBuildTime : buildTimes) {
+      if (indexBuildTime >= 0) {
+        builder.append(indexBuildTime);
+        builder.append(", ");
+      }
+    }
+    return builder.length() > 2 ? builder.substring(0, builder.length() - 2) :
+      "";
+  }
+
+/* StandardMBean hooks and overrides */
+
+  @Override
+  protected String getDescription(MBeanAttributeInfo info) {
+    String description = ATTRIBUTE_DESCRIPTIONS.get(info.getName());
+    return description == null ? super.getDescription(info) : description;
+  }
+
+  @Override
+  public Object getAttribute(String attribute)
+    throws AttributeNotFoundException, MBeanException, ReflectionException {
+    if (attribute.endsWith(".heapSize")) {
+      String columnName = attribute.substring(0, attribute.indexOf('.'));
+      return getIndexHeapSize(columnName);
+    }
+    return super.getAttribute(attribute);
+  }
+
+  @Override
+  protected void cacheMBeanInfo(MBeanInfo info) {
+    IdxRegion region = idxRegionRef.get();
+    if (region != null) {
+      Set<String> columnNames;
+      try {
+        columnNames = extractIndexedColumnNames(region.getRegionInfo());
+      } catch (IOException e) {
+        throw new IllegalStateException("Invalid region info for " + region);
+      }
+      final MBeanAttributeInfo[] existingInfos = info.getAttributes();
+      for (MBeanAttributeInfo attributeInfo : existingInfos) {
+        String name = attributeInfo.getName();
+        if (name.indexOf('.') >= 0) {
+          columnNames.remove(name.substring(0, name.indexOf('.')));
+        }
+      }
+      MBeanAttributeInfo[] attributeInfos = new
+        MBeanAttributeInfo[columnNames.size() + existingInfos.length];
+      System.arraycopy(existingInfos, 0, attributeInfos, 0,
+        existingInfos.length);
+      Iterator<String> columnNameIterator = columnNames.iterator();
+      for (int i = existingInfos.length; i < attributeInfos.length; i++) {
+        String name = columnNameIterator.next() + ".heapSize";
+        attributeInfos[i] = new MBeanAttributeInfo(name,
+          "long", "The amount of heap space occupied by this index", true,
+          false, false);
+      }
+      info = new MBeanInfo(info.getClassName(), info.getDescription(),
+        attributeInfos, info.getConstructors(), info.getOperations(),
+        info.getNotifications(), info.getDescriptor());
+    }
+    super.cacheMBeanInfo(info);
+  }
+
+  private static Set<String> extractIndexedColumnNames(HRegionInfo regionInfo)
+    throws IOException {
+    Set<String> idxColumns = new HashSet<String>();
+    for (HColumnDescriptor columnDescriptor :
+      regionInfo.getTableDesc().getColumnFamilies()) {
+      Collection<IdxIndexDescriptor> indexDescriptors =
+        IdxColumnDescriptor.getIndexDescriptors(columnDescriptor).values();
+      for (IdxIndexDescriptor indexDescriptor : indexDescriptors) {
+        idxColumns.add(columnDescriptor.getNameAsString() + ":" +
+          Bytes.toString(indexDescriptor.getQualifierName()));
+      }
+    }
+    return idxColumns;
+  }
+
+  private long getIndexHeapSize(String columnName) {
+    IdxRegion region = idxRegionRef.get();
+    if (region != null) {
+      return region.getIndexHeapSize(columnName);
+    } else {
+      return -1L;
+    }
+  }
+}

Added: hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxSearchContext.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxSearchContext.java?rev=897547&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxSearchContext.java (added)
+++ hadoop/hbase/branches/0.20_on_hadoop-0.18.3/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxSearchContext.java Sat Jan  9 21:09:59 2010
@@ -0,0 +1,107 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.regionserver.idx.support.Callback;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ObjectArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
+import org.apache.hadoop.hbase.util.Pair;
+
+import java.util.Map;
+
+/**
+ * The search context holds the context for a specific search request.
+ * It takes a snapshot of the indexes at the time the search was requested.
+ * <p/>
+ * The search context is a transient object whose life spans only while the
+ * search (typically a region scan) is in progress.
+ */
+public class IdxSearchContext {
+
+  private ObjectArrayList<KeyValue> keys;
+  private Map<Pair<byte[], byte[]>, IdxIndex> indexMap;
+
+  /**
+   * Construct a new search context.
+   *
+   * @param keys     the keys to use when searching.
+   * @param indexMap the matching index map
+   */
+  public IdxSearchContext(ObjectArrayList<KeyValue> keys,
+    Map<Pair<byte[], byte[]>, IdxIndex> indexMap) {
+    this.keys = keys;
+    this.indexMap = indexMap;
+  }
+
+  /**
+   * Looks up an index based on the column and the qualifier. May return null if
+   * no such index is found.
+   *
+   * @param column    the column
+   * @param qualifier the column qualifier
+   * @return the index for the column/qualifer
+   */
+  public IdxIndex getIndex(byte[] column, byte[] qualifier) {
+    return indexMap.get(Pair.of(column, qualifier));
+  }
+
+  /**
+   * Process a set of rows, typically to convert a query to a scan. Rows are
+   * processed in sorted order.
+   *
+   * @param rowSet   the row set to process
+   * @param callback the callback to use to process those rows
+   */
+  public void processRows(IntSet rowSet, Callback<KeyValue> callback) {
+    IntSet.IntSetIterator iterator = rowSet.iterator();
+    while (iterator.hasNext()) {
+      int i = iterator.next();
+      callback.call(keys.get(i));
+    }
+  }
+
+  /**
+   * Unmap a index key to a actual key.
+   *
+   * @param index the index offset
+   * @return the byte
+   */
+  public KeyValue lookupRow(int index) {
+    return keys.get(index);
+  }
+
+  /**
+   * The number of rows indexed in this search context.
+   *
+   * @return the number of indexed rows.
+   */
+  public int rowCount() {
+    return keys.size();
+  }
+
+  /**
+   * close this search context
+   */
+  public void close(){
+    keys = null;
+    indexMap = null;
+  }
+}