You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by st...@apache.org on 2010/01/05 18:26:53 UTC

svn commit: r896138 [2/9] - in /hadoop/hbase/branches/0.20: ./ src/contrib/ src/contrib/indexed/ src/contrib/indexed/lib/ src/contrib/indexed/lib/fmpp-0.19.14/ src/contrib/indexed/src/ src/contrib/indexed/src/fmpp/ src/contrib/indexed/src/fmpp/src/ src...

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxIndexDescriptor.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxIndexDescriptor.java?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxIndexDescriptor.java (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxIndexDescriptor.java Tue Jan  5 17:26:49 2010
@@ -0,0 +1,154 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client.idx;
+
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.WritableUtils;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * The description of an indexed column family qualifier.
+ */
+public class IdxIndexDescriptor implements Writable {
+  /**
+   * Qualifier name;
+   */
+  private byte[] qualifierName;
+
+  /**
+   * The qualifier type - affects the translation of bytes into indexed
+   * properties. 
+   */
+  private IdxQualifierType qualifierType;
+
+  /**
+   * Empty constructor to support the writable interface - DO NOT USE.
+   */
+  public IdxIndexDescriptor() {
+  }
+
+  /**
+   * Construct a new index descriptor.
+   * @param qualifierName the qualifier name
+   * @param qualifierType the qualifier type
+   */
+  public IdxIndexDescriptor(byte[] qualifierName,
+    IdxQualifierType qualifierType) {
+    this.qualifierName = qualifierName;
+    this.qualifierType = qualifierType;
+  }
+
+  /**
+   * The column family qualifier name.
+   * @return column family qualifier name
+   */
+  public byte[] getQualifierName() {
+    return qualifierName;
+  }
+
+  /**
+   * The column family qualifier name.
+   * @param qualifierName column family qualifier name
+   */
+  public void setQualifierName(byte[] qualifierName) {
+    this.qualifierName = qualifierName;
+  }
+
+  /**
+   * The data type that the column family qualifier contains.
+   * @return data type that the column family qualifier contains
+   */
+  public IdxQualifierType getQualifierType() {
+    return qualifierType;
+  }
+
+  /**
+   * The data type that the column family qualifier contains.
+   * @param qualifierType data type that the column family qualifier contains
+   */
+  public void setQualifierType(IdxQualifierType qualifierType) {
+    this.qualifierType = qualifierType;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void write(DataOutput dataOutput) throws IOException {
+    Bytes.writeByteArray(dataOutput, qualifierName);
+    WritableUtils.writeEnum(dataOutput, qualifierType);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void readFields(DataInput dataInput) throws IOException {
+    qualifierName = Bytes.readByteArray(dataInput);
+    qualifierType = WritableUtils.readEnum(dataInput, IdxQualifierType.class);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    IdxIndexDescriptor that = (IdxIndexDescriptor) o;
+
+    if (!Arrays.equals(qualifierName, that.qualifierName)) return false;
+
+    return true;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public int hashCode() {
+    return Arrays.hashCode(qualifierName);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public String toString() {
+    StringBuffer s = new StringBuffer();
+    s.append('{');
+    s.append("QUALIFIER");
+    s.append(" => '");
+    s.append(Bytes.toString(qualifierName));
+    s.append("',");
+    s.append("TYPE");
+    s.append(" => '");
+    s.append(qualifierType);
+    s.append("'}");
+    return s.toString();
+  }
+}

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxQualifierType.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxQualifierType.java?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxQualifierType.java (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxQualifierType.java Tue Jan  5 17:26:49 2010
@@ -0,0 +1,88 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client.idx;
+
+/**
+ * Indicates the data type contained in the column family qualifier.
+ * This type affects index construction and value ordering in the index
+ */
+public enum IdxQualifierType {
+  /**
+   * Values qualified by this qualifier are bytes.
+   * Each entry is a byte array of size 1 which should be treated as numerical
+   * byte.
+   */
+  BYTE,
+  /**
+   * Values qualified by this qualifier are characters.
+   * Each entry is a byte array of size 2 which should be treated as
+   * a character.
+   */
+  CHAR,
+  /**
+   * Values qualified by this qualifier are short integers.
+   * Each entry is a byte array of size 2 which should be treated as
+   * a short integer.
+   */
+  SHORT,
+  /**
+   * Values qualified by this qualifier are integers.
+   * Each entry is a byte array of size 4 which should be treated as
+   * a an integer.
+   */
+  INT,
+  /**
+   * Values qualified by this qualifier are long integers.
+   * Each entry is a byte array of size 8 which should be treated as
+   * a long integer.
+   */
+  LONG,
+  /**
+   * Values qualified by this qualifier are floats.
+   * Each entry is a byte array of size 4 which should be treated as
+   * a float.
+   */
+  FLOAT,
+  /**
+   * Values qualified by this qualifier are doubles.
+   * Each entry is a byte array of size 8 which should be treated as
+   * a double.
+   */
+  DOUBLE,
+  /**
+   * Values qualified by this qualifier are big decimals.
+   * Each entry is a byte array of variable size which should be treated as
+   * a big decimal. See also conversion methods in
+   * {@link org.apache.hadoop.hbase.util.Bytes}
+   */
+  BIG_DECIMAL,
+  /**
+   * Values qualified by this qualifier are byte arrays.
+   * Each entry is a byte array of variable size which should be compared
+   * based on the byte array's bytes numerical order.
+   */
+  BYTE_ARRAY,
+  /**
+   * Values qualified by this qualifier are character arrays.
+   * Each entry is a byte array of variable size which should be compared
+   * based on the char array's characters lexicographical order.
+   */
+  CHAR_ARRAY,
+}

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxScan.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxScan.java?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxScan.java (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/IdxScan.java Tue Jan  5 17:26:49 2010
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client.idx;
+
+import org.apache.hadoop.hbase.WritableHelper;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import org.apache.hadoop.hbase.client.Scan;
+import org.apache.hadoop.hbase.filter.Filter;
+import org.apache.hadoop.hbase.client.idx.exp.Expression;
+import org.apache.hadoop.io.DataInputBuffer;
+import org.apache.hadoop.io.DataOutputBuffer;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Map;
+import java.util.Collection;
+
+/**
+ * Extends the {@link Scan} class to provide an {@link Expression}
+ * that is used to quickly reduce the scope of the scan.
+ */
+public class IdxScan extends Scan {
+  /**
+   * The key used to store and retrieve the scan index expression.
+   */
+  public static final ImmutableBytesWritable EXPRESSION =
+      new ImmutableBytesWritable(Bytes.toBytes("EXPRESSION"));
+
+  private Expression expression;
+
+  /**
+   * No-args constructor.
+   */
+  public IdxScan() {
+  }
+
+  /**
+   * Constructs a scan.
+   * @param expression the index expression
+   */
+  public IdxScan(Expression expression) {
+    this.expression = expression;
+  }
+
+  /**
+   * Constructs a scan.
+   * @param startRow   row to start scanner at or after (inclusive)
+   * @param filter     the filter that will applied to the scan
+   * @param expression the index expression
+   */
+  public IdxScan(byte[] startRow, Filter filter, Expression expression) {
+    super(startRow, filter);
+    this.expression = expression;
+  }
+
+  /**
+   * Constructs a scan.
+   * @param startRow   row to start scanner at or after (inclusive)
+   * @param expression the index expression
+   */
+  public IdxScan(byte[] startRow, Expression expression) {
+    super(startRow);
+    this.expression = expression;
+  }
+
+  /**
+   * Constructs a scan.
+   * @param startRow   row to start scanner at or after (inclusive)
+   * @param stopRow    row to stop scanner before (exclusive)
+   * @param expression the index expression
+   */
+  public IdxScan(byte[] startRow, byte[] stopRow, Expression expression) {
+    super(startRow, stopRow);
+    this.expression = expression;
+  }
+
+  /**
+   * Constructs a scan from the provided scan with the expression.
+   * @param scan       the scan to copy from
+   * @param expression the index expression
+   * @throws IOException if thrown by {@link Scan#Scan(org.apache.hadoop.hbase.client.Scan)}
+   */
+  public IdxScan(Scan scan, Expression expression) throws IOException {
+    super(scan);
+    this.expression = expression;
+  }
+
+  /**
+   * Returns the index expression used by the scan.
+   * @return the index expression
+   */
+  public Expression getExpression() {
+    return expression;
+  }
+
+  /**
+   * Sets the index expression used by the scan.
+   * @param expression the index expression
+   */
+  public void setExpression(Expression expression) {
+    this.expression = expression;
+  }
+
+  /**
+   * Scanning all versions is not currently supported.
+   * @return never
+   * @throws IllegalStateException if this method is called
+   */
+  @Override
+  public Scan setMaxVersions() {
+    throw new IllegalStateException("Scanning all versions is not currently supported.");
+  }
+
+  /**
+   * Scanning all versions is not currently supported.
+   * @param maxVersions maximum versions for each column
+   * @return never
+   * @throws IllegalStateException if this method is called
+   */
+  @Override
+  public Scan setMaxVersions(int maxVersions) {
+    throw new IllegalStateException("Scanning all versions is not currently supported.");
+  }
+
+  /**
+   * {@inheritDoc}.
+   * <p/>
+   * Also writes the optional {@link #getExpression()}.
+   */
+  @Override
+  public void write(DataOutput out) throws IOException {
+    if (expression != null) {
+      values.put(EXPRESSION, writeExpression(expression));
+    } else {
+      values.remove(EXPRESSION);
+    }
+    super.write(out);
+  }
+
+  private static ImmutableBytesWritable writeExpression(Expression expression) throws IOException {
+    DataOutputBuffer out = new DataOutputBuffer();
+
+    WritableHelper.writeInstanceNullable(out, expression);
+
+    return new ImmutableBytesWritable(out.getData());
+  }
+
+  /**
+   * {@inheritDoc}.
+   * <p/>
+   * Also reads the optional {@link #getExpression()}.
+   */
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    super.readFields(in);
+    this.expression = getExpression(this);
+  }
+
+  public static Expression getExpression(Scan scan) throws IOException {
+    if (scan instanceof IdxScan && ((IdxScan) scan).getExpression() != null) {
+      return ((IdxScan) scan).getExpression();
+    }
+
+    Map<ImmutableBytesWritable,ImmutableBytesWritable> values = scan.getValues();
+    if (values.containsKey(EXPRESSION)) {
+      DataInputBuffer in = new DataInputBuffer();
+      byte[] bytes = values.get(EXPRESSION).get();
+      in.reset(bytes, bytes.length);
+
+      return WritableHelper.readInstanceNullable(in, Expression.class);
+    } else {
+      return null;
+    }
+  }
+}

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/And.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/And.java?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/And.java (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/And.java Tue Jan  5 17:26:49 2010
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client.idx.exp;
+
+import java.util.Collection;
+
+/**
+ * This class implements boolean AND - all sub-expressions must be true in order
+ * for it to be true.
+ */
+public class And extends Compound {
+  /**
+   * Internal constructor.
+   */
+  public And() {
+    super();
+  }
+
+  /**
+   * Constructs an and expression with provided expression.
+   * @param expressions the expression
+   */
+  public And(Expression... expressions) {
+    super(expressions);
+  }
+
+  /**
+   * Constructs an and expression with provided expression.
+   * @param expressions the expression
+   */
+  public And(Collection<Expression> expressions) {
+    super(expressions);
+  }
+
+  /**
+   * Adds the expression to the set of expression.
+   * @param expression the expression
+   * @return this
+   * @see Compound#add(Expression)
+   */
+  public And and(Expression expression) {
+    return (And) super.add(expression);
+  }
+}

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Comparison.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Comparison.java?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Comparison.java (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Comparison.java Tue Jan  5 17:26:49 2010
@@ -0,0 +1,186 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client.idx.exp;
+
+import org.apache.hadoop.hbase.util.Bytes;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * The comparison expression.
+ */
+public class Comparison extends Expression {
+  private byte[] columnName;
+  private byte[] qualifier;
+  private Operator operator;
+  private byte[] value;
+
+  /**
+   * No args constructor.
+   */
+  public Comparison() {
+  }
+
+  /**
+   * Convenience constrcutor that takes strings and converts from to byte[].
+   * @param columnName the column name
+   * @param qualifier  the column qualifier
+   * @param operator   the operator
+   * @param value      the value
+   */
+  public Comparison(String columnName, String qualifier, Operator operator, byte[] value) {
+    this(Bytes.toBytes(columnName), Bytes.toBytes(qualifier), operator, value);
+  }
+
+  /**
+   * Full constructor with all required fields.
+   * @param columnName the column name
+   * @param qualifier  the column qualifier
+   * @param operator   the operator
+   * @param value      the value
+   */
+  public Comparison(byte[] columnName, byte[] qualifier, Operator operator,
+                    byte[] value) {
+    assert columnName != null : "The columnName must not be null";
+    assert qualifier != null : "The qualifier must not be null";
+    assert operator != null : "The operator must not be null";
+    assert value != null : "The value must not be null";
+
+    this.columnName = columnName;
+    this.qualifier = qualifier;
+    this.operator = operator;
+    this.value = value;
+  }
+
+  /**
+   * The {@link org.apache.hadoop.hbase.HColumnDescriptor#getName() column
+   * family name} that the {@link #getQualifier() qualifier} is a member of.
+   * @return the column family name
+   */
+  public byte[] getColumnName() {
+    return columnName;
+  }
+
+  /**
+   * The column qualifier.
+   * @return the qualifier
+   */
+  public byte[] getQualifier() {
+    return qualifier;
+  }
+
+  /**
+   * The operator.
+   * @return the operator
+   */
+  public Operator getOperator() {
+    return operator;
+  }
+
+  /**
+   * The value.
+   * @return the value
+   */
+  public byte[] getValue() {
+    return value;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void write(DataOutput dataOutput) throws IOException {
+    Bytes.writeByteArray(dataOutput, columnName);
+    Bytes.writeByteArray(dataOutput, qualifier);
+    Bytes.writeByteArray(dataOutput, Bytes.toBytes(operator.toString()));
+    Bytes.writeByteArray(dataOutput, value);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void readFields(DataInput dataInput) throws IOException {
+    columnName = Bytes.readByteArray(dataInput);
+    qualifier = Bytes.readByteArray(dataInput);
+    operator = Operator.valueOf(Bytes.toString(Bytes.readByteArray(dataInput)));
+    value = Bytes.readByteArray(dataInput);
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    Comparison that = (Comparison) o;
+
+    if (!Arrays.equals(columnName, that.columnName)) return false;
+    if (operator != that.operator) return false;
+    if (!Arrays.equals(qualifier, that.qualifier)) return false;
+    if (!Arrays.equals(value, that.value)) return false;
+
+    return true;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public int hashCode() {
+    int result = Arrays.hashCode(columnName);
+    result = 31 * result + Arrays.hashCode(qualifier);
+    result = 31 * result + operator.hashCode();
+    result = 31 * result + Arrays.hashCode(value);
+    return result;
+  }
+
+  /**
+   * The enum for specifying the function we're performing in a {@link
+   * Comparison}.
+   */
+  public enum Operator {
+    /**
+     * The equals function.
+     */
+    EQ,
+    /**
+     * The greater than function.
+     */
+    GT,
+    /**
+     * The greater than or equals function.
+     */
+    GTE,
+    /**
+     * The less than function.
+     */
+    LT,
+    /**
+     * The less than or equals function.
+     */
+    LTE
+  }
+}

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Compound.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Compound.java?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Compound.java (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Compound.java Tue Jan  5 17:26:49 2010
@@ -0,0 +1,130 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hbase.client.idx.exp;
+
+import org.apache.hadoop.hbase.WritableHelper;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Set;
+
+/**
+ * A compound expression has no built-in tests but aggregates a set of child
+ * expressions into a logical group such as boolean logic.
+ */
+public abstract class Compound extends Expression {
+  /**
+   * The set of child expressions.
+   */
+  protected Set<Expression> children;
+
+  /**
+   * Class constructor.
+   * @param expressions the expressions to be evaluated
+   */
+  public Compound(Expression... expressions) {
+    assert expressions != null : "expressions cannot be null or empty";
+    this.children = new HashSet<Expression>(Arrays.asList(expressions));
+  }
+
+  /**
+   * Class constructor.
+   * @param expressions the expressions to be evaluated
+   */
+  public Compound(Collection<Expression> expressions) {
+    this.children = new HashSet<Expression>(expressions);
+  }
+
+  /**
+   * Add an expression to the child set.
+   * @param expression the expression to add
+   * @return this
+   */
+  public Compound add(Expression expression) {
+    this.children.add(expression);
+    return this;
+  }
+
+  /**
+   * Returns the set of child expressions.
+   * @return the expression set
+   */
+  public Set<Expression> getChildren() {
+    return children;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+
+    Compound that = (Compound) o;
+
+    if (!children.equals(that.children)) {
+      return false;
+    }
+
+    return true;
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public int hashCode() {
+    return children.hashCode();
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void write(DataOutput dataOutput) throws IOException {
+    dataOutput.writeInt(children.size());
+    for (Expression child : children) {
+      WritableHelper.writeInstance(dataOutput, child);
+    }
+  }
+
+  /**
+   * {@inheritDoc}
+   */
+  @Override
+  public void readFields(DataInput dataInput) throws IOException {
+    int size = dataInput.readInt();
+    children = new HashSet<Expression>(size);
+    for (int i = 0; i < size; i++) {
+      Expression expression = WritableHelper.readInstance(dataInput, Expression.class);
+      children.add(expression);
+    }
+  }
+}

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Expression.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Expression.java?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Expression.java (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Expression.java Tue Jan  5 17:26:49 2010
@@ -0,0 +1,81 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client.idx.exp;
+
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Class representing an expression.
+ */
+public abstract class Expression implements Writable {
+  /**
+   * {@inheritDoc}
+   */
+  public abstract int hashCode();
+
+  /**
+   * {@inheritDoc}
+   */
+  public abstract boolean equals(Object o);
+
+  /**
+   * Creates and returns an {@link Or} instance.
+   * @param expressions the expressions
+   * @return an instance
+   */
+  public static Or or(Expression... expressions) {
+    return new Or(expressions);
+  }
+
+  /**
+   * Creates and returns an {@link And} instance.
+   * @param expressions the expressions
+   * @return an instance
+   */
+  public static And and(Expression... expressions) {
+    return new And(expressions);
+  }
+
+  /**
+   * Creates and returns an {@link Comparison}
+   * instance.
+   * @param family the column family name
+   * @param qualifier  the qualifier
+   * @param operator   the operator
+   * @param value      the value
+   * @return the instance
+   */
+  public static Comparison comparison(byte[] family, byte[] qualifier, Comparison.Operator operator, byte[] value) {
+    return new Comparison(family, qualifier, operator, value);
+  }
+
+  /**
+   * Creates and returns an {@link Comparison}
+   * instance.
+   * @param family the column family name
+   * @param qualifier  the qualifier
+   * @param operator   the operator
+   * @param value      the value
+   * @return the instance
+   */
+  public static Comparison comparison(String family, String qualifier, Comparison.Operator operator, byte[] value) {
+    return new Comparison(family, qualifier, operator, value);
+  }
+}

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Or.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Or.java?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Or.java (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/exp/Or.java Tue Jan  5 17:26:49 2010
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.client.idx.exp;
+
+import java.util.Collection;
+
+/**
+ * This class implements boolean OR - any sub-expressions can be true in order
+ * for it to be true.
+ */
+public class Or extends Compound {
+  /**
+   * Internal constructor.
+   */
+  public Or() {
+    super();
+  }
+
+  /**
+   * Constructs an or expression with provided expression.
+   * @param expressions the expression
+   */
+  public Or(Expression... expressions) {
+    super(expressions);
+  }
+
+  /**
+   * Constructs an or expression with provided expression.
+   * @param expressions the expression
+   */
+  public Or(Collection<Expression> expressions) {
+    super(expressions);
+  }
+
+  /**
+   * Adds the expression to the set of expression.
+   * @param expression the expression
+   * @return this
+   * @see Compound#add(Expression)
+   */
+  public Or or(Expression expression) {
+    return (Or) super.add(expression);
+  }
+}

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/package.html
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/package.html?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/package.html (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/client/idx/package.html Tue Jan  5 17:26:49 2010
@@ -0,0 +1,339 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<!--
+   Copyright 2010 The Apache Software Foundation
+
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+<head />
+<body bgcolor="white">
+<h2>Indexed HBase</h2>
+      <p>
+        This page gives the high levels for the indexed hbase contrib.
+        It is assumed that the reader has in-depth knowledge of HBase.
+      </p>
+<h2>Table Of Contents</h2>
+        <ol>
+          <li>
+            <a href=#IndexedHBase-IndexedHBase>Indexed HBase</a>
+            <ol>
+              <li>
+                <a href=#IndexedHBase-Purpose>Purpose</a>
+              </li>
+              <li>
+                <a href=#IndexedHBase-WhydowethinkIHbaseoutperformsTHBase%3F>Why do we think IHbase outperforms ITHBase?</a>
+              </li>
+            </ol>
+          </li>
+          <li>
+            <a href=#IndexedHBase-Usage>Usage</a>
+          </li>
+          <li>
+            <a href=#IndexedHBase-Implementationnotes>Implementation notes</a>
+          </li>
+        </ol>
+
+      <h3>
+        <a name=IndexedHBase-Purpose></a>Purpose
+      </h3>
+      <p>
+        The goal of the indexed HBase contrib is to speed up scans by indexing HBase columns.
+        Indexed HBase (IHBase) is different from the indexed tables in transactional HBase (ITHBase):
+        while the indexes in ITHBase are, in fact, hbase tables using the indexed column's values
+        as row keys, IHBase creates indexes at the region level.
+        The differences are summarized in the table below.
+      </p>
+      <table >
+        <tbody>
+        <tr>
+          <th >
+            Feature
+          </th>
+          <th >
+            ITHBase
+          </th>
+          <th >
+            IHBase
+          </th>
+          <th >
+            Comment
+          </th>
+        </tr>
+        <tr>
+          <td >
+            global ordering
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            IHBase has an index for each region. The flip side of not having global ordering
+            is compatibility with the good old HRegion: results are coming back in row
+            order (and not value order as in ITHBase)
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Full table scan?
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            THbase does a partial scan on the index table. ITHBase supports specifying start/end rows to limit the number of scanned regions
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Multiple Index Usage<br clear=all>
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            IHBase can take advantage of multiple indexes in the same scan. IHBase IdxScan object accepts an Expression which allows intersection/unison of several indexed column criteria
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Extra disk storage
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            IHBase indexes are created when the region starts/flushes and do not require any extra storage
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Extra RAM
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            IHBase indexes are in memory and hence increase the memory overhead.
+            THBbase indexes increase the number of regions each region server has to support thus costing memory too
+          </td>
+        </tr>
+        <tr>
+          <td >
+            Parallel scanning support
+          </td>
+          <td >
+            no
+          </td>
+          <td >
+            yes
+          </td>
+          <td >
+            In ITHBase the index table needs to be consulted and then GETs are issued
+            for each matching row. The behavior of IHBase (as perceived by the client)
+            is no different than a regular scan and hence supports parallel
+            scanning seamlessly. <font color=darkgray>parallel GET can be implemented
+            to speedup THbase scans</font>
+          </td>
+        </tr>
+        </tbody>
+      </table>
+      <h3>
+        <a name=IndexedHBase-WhydowethinkIHbaseoutperformsTHBase%3F></a>Why do we think IHbase outperforms THBase?
+      </h3>
+      <ol>
+        <li>
+          More flexible:
+          <ol>
+            <li>
+              Supports range queries and multi-index queries
+            </li>
+            <li>
+              Supports different types - not only byte arrays
+            </li>
+          </ol>
+        </li>
+        <li>
+          Less overhead: THBase pays at least two 'table roundtrips' - one for the index table and the other for the main table
+        </li>
+        <li>
+          Quicker index expression evaluation: IHBase is using dedicated index data structures while ITHBase is using the regular HRegion scan facilities
+        </li>
+      </ol>
+      <h2>
+        <a name=IndexedHBase-Usage></a>Usage
+      </h2>
+      <p>
+        To use Indexed HBase do the following:
+      </p>
+      <ol>
+        <li>
+          Set the hbase.region.impl property to IdxRegion
+          <div class=panelMacro>
+            <table >
+              <tbody>
+              <tr>
+                <td valign=top>
+                  <img align=absmiddle alt="" border=0 height=16 width=16>
+                </td>
+                <td>
+                  <b>IdxRegion HBase configuration snippet</b><br>
+                  <div class="code panel" style=BORDER-WIDTH:1px>
+                    <div class="codeContent panelContent">
+                    <pre class=code-java>&lt;property&gt;
+  &lt;name&gt;hbase.hregion.impl&lt;/name&gt;
+  &lt;value&gt;org.apache.hadoop.hbase.regionserver.IdxRegion&lt;/value&gt;
+&lt;/property&gt;</pre>
+                    </div>
+                  </div>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+        </li>
+        <li>
+          When creating a table define which columns to index using IdxColumnDescriptor.
+          The supported types are all the <a href="http://java.sun.com/docs/books/tutorial/java/nutsandbolts/datatypes.html"> java primitive data types</a>
+          except boolean, byte[], char[] and BigDecimal
+          <div class=panelMacro>
+            <table class="infoMacro zeroBorder">
+              <tbody>
+              <tr>
+                <td valign=top>
+                  <img align=absmiddle alt="" border=0 height=16 width=16>
+                </td>
+                <td>
+                  <b>Creating an HTable with an index on family:qual column</b><br>
+                  <p>
+                    Note that this snippet assumes that all the values assigned to family:qual are exactly 8 bytes, preferrably created using Bytes.toBytes(long). The table may have rows in which family:qual is missing, those rows will not be included in the index.
+                  </p>
+                  <div class="code panel" style=BORDER-WIDTH:1px>
+                    <div class="codeContent panelContent">
+                    <pre class=code-java><span class=code-object>byte</span>[] tableName = Bytes.toBytes(<span class=code-quote>"table"</span>);
+<span class=code-object>byte</span>[] familyName = Bytes.toBytes(<span class=code-quote>"family"</span>);
+<span class=code-object>byte</span>[] qualifier = Bytes.toBytes(<span class=code-quote>"qual"</span>);
+
+IdxColumnDescriptor idxColumnDescriptor = <span class=code-keyword>new</span> IdxColumnDescriptor(familyPairName);
+IdxIndexDescriptor indexDescriptor  = <span class=code-keyword>new</span> IdxIndexDescriptor(qualifier, IdxQualifierType.LONG);
+idxColumnDescriptor.addIndexDescriptor(indexDescriptor);
+HTableDescriptor htd = <span class=code-keyword>new</span> HTableDescriptor(tableName);
+htd.addFamily(idxColumnDescriptor);
+    
+HBaseConfiguration conf = <span class=code-keyword>new</span> HBaseConfiguration();
+conf.setClass(HConstants.REGION_IMPL, IdxRegion.class, IdxRegion.class);
+HBaseAdmin admin = <span class=code-keyword>new</span> HBaseAdmin(conf);
+admin.createTable(htd);
+HTable table = <span class=code-keyword>new</span> HTable(conf, desc.getName());
+     . . .</pre>
+                    </div>
+                  </div>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+        </li>
+        <li>
+          When scanning make sure you instantiate an IdxScan and that you set the Expression property
+          <div class=panelMacro>
+            <table class="infoMacro zeroBorder">
+              <tbody>
+              <tr>
+                <td valign=top>
+                  <img align=absmiddle alt="" border=0 height=16 width=16>
+                </td>
+                <td>
+                  <b>Indexed scans</b><br>
+                  <p>
+                    Notes:
+                  </p>
+                  <ul>
+                    <li>
+                    <font color=brown><b>Setting an expression doesn't exclude setting a mathcing filter. This duplication is absolutely essential for getting correct scan results</b> </font>
+                    </li>
+                    <li>
+                    The index expression must accept any row accepted by the filter
+                    </li>
+                    <li>
+                    The filter may accept a subset of the rows accepted by the index expression (e.g. narrow down the results set)
+                    </li>
+                    <li>
+                    Setting a filter without setting an expression is supported and would revert to a 'good old scan'
+                    </li>
+                    <li>
+                    The supported expression types are comparison, and, or. Comparisons support GT, GTE, EQ, LTE, LT
+                    </li>
+                    <li>
+                    The caller may combine any number of index expressions using any of the existing indexes. Trying to add an expression for a non-indexed column would result in a runtime error
+                    <div class="code panel" style=BORDER-WIDTH:1px>
+                    <div class="codeContent panelContent">
+                    <pre class=code-java>. . .
+IdxScan idxScan = <span class=code-keyword>new</span> IdxScan();
+idxScan.setExpression(Expression.comparison(familyName, qualifier, Comparison.Operator.EQ, Bytes.toBytes(42L));
+idxScan.setFilter(<span class=code-keyword>new</span> SingleColumnValueFilter(familyName, qualifier, CompareFilter.CompareOp.EQUAL, Bytes.toBytes(42L)));
+idxScan.setCaching(1000);
+
+ResultScanner scanner = table.getScanner(idxScan);
+<span class=code-keyword>for</span> (Result res : scanner) {
+   <span class=code-comment>// Do stuff with res
+</span>}</pre>
+                    </div>
+                    </div>
+                    </li>
+                  </ul>
+                </td>
+              </tr>
+              </tbody>
+            </table>
+          </div>
+        </li>
+      </ol>
+      <h2>
+        <a name=IndexedHBase-Implementationnotes></a>Implementation notes
+      </h2>
+      <ul>
+        <li>
+          We only index Store files. Every index scan performs a full memstore scan. Indexing the memstore will be implemented only if scanning the memstore will prove to be a performance bottleneck
+        </li>
+        <li>
+          Index expression evaluation is performed using bitsets. There are two types of bitsets: compressed and expanded. An index will typically store a compressed bitset while an expression evaluator will most probably use an expanded bitset
+        </li>
+        <li>
+          TODO
+        </li>
+      </ul>
+    </div>
+  </div>
+</div>
+<div id=footer>
+</div>
+<br></body>
+</html>

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndex.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndex.java?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndex.java (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndex.java Tue Jan  5 17:26:49 2010
@@ -0,0 +1,172 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.BinarySearch;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.List;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSetBuilder;
+import static org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSetBuilder.calcHeapSize;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.ClassSize;
+
+/**
+ * A complete index implementation - all keys are put in the keystore, no skips.
+ */
+class CompleteIndex implements IdxIndex {
+  /**
+   * The fixed part in the heap size calcualtion.
+   */
+  static final long FIXED_SIZE = ClassSize.align(ClassSize.OBJECT +
+    ClassSize.REFERENCE + 3 * (ClassSize.ARRAY + ClassSize.REFERENCE) +
+    Bytes.SIZEOF_LONG + 2 * Bytes.SIZEOF_INT
+  );
+
+  /**
+   * The capacity of the sets.
+   */
+  private int numKeyValues;
+  /**
+   * The key store - holds the col:qual values.
+   */
+  private List<?> keyStore;
+  /**
+   * The value store - holds sets with {@link numKeyValues} capacity.
+   */
+  private IntSet[] valueStore;
+  /**
+   * Sets containing partial calculations of the tail operation.
+   */
+  private IntSet[] heads;
+  /**
+   * Sets containing partial calculations of the head operation.
+   */
+  private IntSet[] tails;
+  /**
+   * The partial calculation interval (used to determine up to which point
+   * to use the valueStore before grabbing a pre-calculated set.
+   */
+  private int precalcInterval;
+  /**
+   * The heap size.
+   */
+  private long heapSize;
+
+  /**
+   * Construct a new complete index.
+   *
+   * @param keyStore        the key store
+   * @param valueStore      the value store
+   * @param heads           a list of precalculated heads
+   * @param tails           a list of precalculated tails
+   * @param numKeyValues    the total number of KeyValues for this region
+   * @param precalcInterval the interval by which tails/heads are precalculated
+   */
+  CompleteIndex(List<?> keyStore, IntSet[] valueStore,
+    IntSet[] heads, IntSet[] tails,
+    int numKeyValues, int precalcInterval) {
+    this.keyStore = keyStore;
+    this.valueStore = valueStore;
+    this.heads = heads;
+    this.tails = tails;
+    this.numKeyValues = numKeyValues;
+    this.precalcInterval = precalcInterval;
+    heapSize = FIXED_SIZE + keyStore.heapSize() + calcHeapSize(valueStore) +
+      calcHeapSize(heads) + calcHeapSize(tails);
+  }
+
+  /**
+   * Looks up a key in the index.
+   *
+   * @param probe the probe to lookup
+   * @return the set of results, may be empty
+   */
+  @Override
+  public IntSet lookup(byte[] probe) {
+    int index = BinarySearch.search(keyStore, keyStore.size(), probe);
+    return index >= 0 ? valueStore[index].clone() :
+      IntSetBuilder.newEmptyIntSet(numKeyValues);
+  }
+
+  /**
+   * Find all the results which are greater and perhaps equal to the probe.
+   *
+   * @param probe     the probe to lookup
+   * @param inclusive if greater equal
+   * @return a possibly empty set of results
+   */
+  @Override
+  public IntSet tail(byte[] probe, boolean inclusive) {
+    int index = BinarySearch.search(keyStore, keyStore.size(), probe);
+    if (index < 0 || !inclusive) {
+      index++;
+    }
+    if (index < 0) {
+      index = -index;
+    }
+    int tailIndex = index / precalcInterval + 1;
+    IntSet result = tailIndex < tails.length ?
+      tails[tailIndex].clone() :
+      IntSetBuilder.newEmptyIntSet(numKeyValues);
+    int stopIndex = Math.min(tailIndex * precalcInterval, valueStore.length);
+    for (int i = index; i < stopIndex; i++) {
+      result = result.unite(valueStore[i]);
+    }
+    return result;
+  }
+
+  /**
+   * Find all the results which are less than and perhaps equal to the probe.
+   *
+   * @param probe     the probe to lookup
+   * @param inclusive if greater equal
+   * @return a possibly empty set of results
+   */
+  @Override
+  public IntSet head(byte[] probe, boolean inclusive) {
+    int index = BinarySearch.search(keyStore, keyStore.size(), probe);
+    if (index >= 0 && inclusive) {
+      index++;
+    }
+    if (index < 0) {
+      index = -(index + 1);
+    }
+
+    int headIndex = (index - 1) / precalcInterval;
+    IntSet result = headIndex > 0 ?
+      heads[headIndex].clone() :
+      IntSetBuilder.newEmptyIntSet(numKeyValues);
+    int startIndex = Math.max(headIndex * precalcInterval, 0);
+    for (int i = startIndex; i < index; i++) {
+      result = result.unite(valueStore[i]);
+    }
+    return result;
+  }
+
+  @Override
+  public long heapSize() {
+    return heapSize;
+  }
+
+  public String probeToString(byte[] bytes) {
+    return ArrayUtils.toString(keyStore.fromBytes(bytes));
+  }
+}

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndexBuilder.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndexBuilder.java?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndexBuilder.java (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/CompleteIndexBuilder.java Tue Jan  5 17:26:49 2010
@@ -0,0 +1,179 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.hbase.HColumnDescriptor;
+import org.apache.hadoop.hbase.KeyValue;
+import org.apache.hadoop.hbase.client.idx.IdxIndexDescriptor;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.BinarySearch;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ObjectArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.BigDecimalArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ByteArrayArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ByteArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.CharArrayArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.CharArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.DoubleArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.FloatArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.IntegerArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.List;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.LongArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.ShortArrayList;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSetBuilder;
+import org.apache.hadoop.hbase.util.Bytes;
+
+/**
+ * A builder class used to create complete indexes.
+ */
+public class CompleteIndexBuilder {
+
+  private HColumnDescriptor columnDescriptor;
+  private IdxIndexDescriptor indexDescriptor;
+
+  /**
+   * The target keystore.
+   */
+  private List<?> keyStore;
+  /**
+   * The value store set builders.
+   */
+  private ObjectArrayList<IntSetBuilder> valueStoreBuilders;
+
+  /**
+   * Construct a new complete index builder.
+   *
+   * @param columnDescriptor the column descriptor
+   * @param indexDescriptor  the index descriptor
+   */
+  public CompleteIndexBuilder(HColumnDescriptor columnDescriptor,
+    IdxIndexDescriptor indexDescriptor) {
+    this.columnDescriptor = columnDescriptor;
+    this.indexDescriptor = indexDescriptor;
+
+    switch (this.indexDescriptor.getQualifierType()) {
+      case BYTE_ARRAY:
+        keyStore = new ByteArrayArrayList();
+        break;
+      case LONG:
+        keyStore = new LongArrayList();
+        break;
+      case DOUBLE:
+        keyStore = new DoubleArrayList();
+        break;
+      case BYTE:
+        keyStore = new ByteArrayList();
+        break;
+      case CHAR:
+        keyStore = new CharArrayList();
+        break;
+      case SHORT:
+        keyStore = new ShortArrayList();
+        break;
+      case INT:
+        keyStore = new IntegerArrayList();
+        break;
+      case FLOAT:
+        keyStore = new FloatArrayList();
+        break;
+      case BIG_DECIMAL:
+        keyStore = new BigDecimalArrayList();
+        break;
+      case CHAR_ARRAY:
+        keyStore = new CharArrayArrayList();
+        break;
+      default:
+        throw new IllegalStateException("Unsupported type " +
+          this.indexDescriptor.getQualifierType());
+    }
+    valueStoreBuilders = new ObjectArrayList<IntSetBuilder>();
+
+  }
+
+  /**
+   * Add a new key value to the index. The keyvalues are added in 'key' order.
+   *
+   * @param kv the keyvalue.
+   * @param id the id of the keyvalue (e.g. its place in the sorted order)
+   */
+  public void addKeyValue(KeyValue kv, int id) {
+    assert Bytes.equals(indexDescriptor.getQualifierName(), kv.getQualifier())
+      && Bytes.equals(columnDescriptor.getName(), kv.getFamily());
+    byte[] key = kv.getValue();
+    int index = BinarySearch.search(keyStore, keyStore.size(), key);
+    IntSetBuilder intsetBuilder;
+    if (index < 0) {
+      intsetBuilder = new IntSetBuilder().start();
+      index = -(index + 1);
+      keyStore.insert(index, key);
+      valueStoreBuilders.insert(index, intsetBuilder);
+    } else {
+      intsetBuilder = valueStoreBuilders.get(index);
+    }
+    intsetBuilder.addNext(id);
+  }
+
+  /**
+   * Finalized the index creation and creates the new index.
+   *
+   * @param numKeyValues the total number of keyvalues in the region
+   * @return a new complete index
+   */
+  @SuppressWarnings({"unchecked"})
+  public IdxIndex finalizeIndex(int numKeyValues) {
+    if (valueStoreBuilders.size() > 0) {
+      assert numKeyValues > 0;
+      int indexSize = keyStore.size();
+
+      IntSet[] valueStore = new IntSet[indexSize];
+      for (int i = 0; i < indexSize; i++) {
+        valueStore[i] = valueStoreBuilders.get(i).finish(numKeyValues);
+      }
+      int interval = (int) Math.round(Math.sqrt(indexSize));
+      int precalcSize = indexSize / interval +
+        Integer.signum(indexSize % interval);
+
+      IntSet[] tails = new IntSet[precalcSize];
+      IntSet currentTail = IntSetBuilder.newEmptyIntSet(numKeyValues);
+      for (int i = indexSize - 1; i >= 0; i--) {
+        currentTail = currentTail.unite(valueStore[i]);
+        if (i % interval == 0) {
+          tails[i / interval] = currentTail;
+          currentTail = currentTail.clone();
+        }
+      }
+
+      IntSet[] heads = new IntSet[precalcSize];
+      IntSet currentHead = IntSetBuilder.newEmptyIntSet(numKeyValues);
+      for (int i = 0; i < indexSize; i++) {
+        currentHead = currentHead.unite(valueStore[i]);
+        if (i % interval == 0) {
+          heads[i / interval] = currentHead;
+          currentHead = currentHead.clone();
+        }
+      }
+
+      return new CompleteIndex(keyStore, valueStore, heads, tails,
+        numKeyValues, interval);
+    } else {
+      return new EmptyIndex(keyStore, numKeyValues);
+    }
+  }
+
+}

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/EmptyIndex.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/EmptyIndex.java?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/EmptyIndex.java (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/EmptyIndex.java Tue Jan  5 17:26:49 2010
@@ -0,0 +1,91 @@
+/**
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.hbase.regionserver.idx.support.arrays.List;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSetBuilder;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.ClassSize;
+
+/**
+ * An empty index.
+ */
+public class EmptyIndex implements IdxIndex {
+
+  private static final int HEAP_SIZE = ClassSize.align(ClassSize.OBJECT +
+    ClassSize.REFERENCE + Bytes.SIZEOF_INT);
+
+  private List<?> keyStore;
+  private int numKeyValues;
+
+  /**
+   * Construct a new empty index with a given capacity. All sets genreated by
+   * this empty index will be initiazlized using the provided capacity.
+   *
+   * @param keyStore the key store
+   * @param capacity the capacity
+   */
+  EmptyIndex(List<?> keyStore, int capacity) {
+    this.keyStore = keyStore;
+    this.numKeyValues = capacity;
+  }
+
+  /**
+   * {@inheritDoc}
+   * <p/>
+   * Returns an empty set.
+   */
+  @Override
+  public IntSet lookup(byte[] probe) {
+    return IntSetBuilder.newEmptyIntSet(numKeyValues);
+  }
+
+  /**
+   * {@inheritDoc}
+   * <p/>
+   * Returns an empty set.
+   */
+  @Override
+  public IntSet tail(byte[] probe, boolean inclusive) {
+    return IntSetBuilder.newEmptyIntSet(numKeyValues);
+  }
+
+  /**
+   * {@inheritDoc}
+   * <p/>
+   * Returns an empty set.
+   */
+  @Override
+  public IntSet head(byte[] probe, boolean inclusive) {
+    return IntSetBuilder.newEmptyIntSet(numKeyValues);
+  }
+
+  @Override
+  public String probeToString(byte[] bytes) {
+    return ArrayUtils.toString(keyStore.fromBytes(bytes));
+  }
+
+  @Override
+  public long heapSize() {
+    return HEAP_SIZE;
+  }
+}

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxExpressionEvaluator.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxExpressionEvaluator.java?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxExpressionEvaluator.java (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxExpressionEvaluator.java Tue Jan  5 17:26:49 2010
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.hbase.client.idx.exp.And;
+import org.apache.hadoop.hbase.client.idx.exp.Comparison;
+import org.apache.hadoop.hbase.client.idx.exp.Expression;
+import org.apache.hadoop.hbase.client.idx.exp.Or;
+import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hbase.util.ClassSize;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
+/**
+ * Evaluates an {@link Expression}.
+ */
+public class IdxExpressionEvaluator implements HeapSize {
+  private static final Log LOG = LogFactory.getLog(IdxExpressionEvaluator.class);
+  /**
+   * Evaluates the expression using the provided search context.
+   *
+   * @param searchContext the search context to use whe evaluating the
+   *                      exrpession
+   * @param expression    the expression to evaluate.
+   * @return a set which contains ids of rows matching the expression provided
+   */
+  public IntSet evaluate(IdxSearchContext searchContext, Expression expression) {
+    if (expression == null) return null;
+
+    if (expression instanceof And) {
+      return evaluate(searchContext, (And) expression);
+    } else if (expression instanceof Or) {
+      return evaluate(searchContext, (Or) expression);
+    } else if (expression instanceof Comparison) {
+      return evaluate(searchContext, (Comparison) expression);
+    } else {
+      throw new IllegalArgumentException("Could not evaluate expression type " +
+        expression.getClass().getName());
+    }
+  }
+
+  protected IntSet evaluate(IdxSearchContext searchContext, And and) {
+    IntSet result = null;
+    for (Expression expression : and.getChildren()) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Intersecting expression:");
+      }
+      IntSet childResult = evaluate(searchContext, expression);
+      if (result == null) {
+        result = childResult;
+      } else if (childResult != null) {
+        result = result.intersect(childResult);
+      }
+    }
+    return result;
+  }
+
+  protected IntSet evaluate(IdxSearchContext searchContext, Or or) {
+    IntSet result = null;
+    for (Expression expression : or.getChildren()) {
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Uniting expression:");
+      }
+      IntSet childResult = evaluate(searchContext, expression);
+      if (result == null) {
+        result = childResult;
+      } else if (childResult != null) {
+        result = result.unite(childResult);
+      }
+    }
+    return result;
+  }
+
+  protected IntSet evaluate(IdxSearchContext searchContext, Comparison comparison) {
+    IdxIndex index = searchContext.getIndex(comparison.getColumnName(), comparison.getQualifier());
+    if (index == null) throw new IllegalStateException(
+            String.format("Could not find an index for column: '%s', qualifier: '%s'",
+                    Bytes.toString(comparison.getColumnName()),
+                    Bytes.toString(comparison.getQualifier())));
+
+    IntSet matched = null;
+    switch (comparison.getOperator()) {
+      case EQ:
+        matched = index.lookup(comparison.getValue());
+        break;
+      case GT:
+        matched = index.tail(comparison.getValue(), false);
+        break;
+      case GTE:
+        matched = index.tail(comparison.getValue(), true);
+        break;
+      case LT:
+        matched = index.head(comparison.getValue(), false);
+        break;
+      case LTE:
+        matched = index.head(comparison.getValue(), true);
+        break;
+    }
+
+    if (LOG.isDebugEnabled() && matched != null) {
+      LOG.debug(String.format("Evaluation of comparison on column: '%s', " +
+          "qualifier: '%s', operator: %s, value: '%s' yielded %s matches",
+          Bytes.toString(comparison.getColumnName()),
+          Bytes.toString(comparison.getQualifier()), 
+          comparison.getOperator(),
+          index.probeToString(comparison.getValue()), matched.size()));
+    }
+
+    return matched != null ? matched : null;
+  }
+
+  @Override
+  public long heapSize() {
+    return ClassSize.OBJECT;
+  }
+}

Added: hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxIndex.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxIndex.java?rev=896138&view=auto
==============================================================================
--- hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxIndex.java (added)
+++ hadoop/hbase/branches/0.20/src/contrib/indexed/src/java/org/apache/hadoop/hbase/regionserver/IdxIndex.java Tue Jan  5 17:26:49 2010
@@ -0,0 +1,63 @@
+/*
+ * Copyright 2010 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.regionserver;
+
+import org.apache.hadoop.hbase.io.HeapSize;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSet;
+import org.apache.hadoop.hbase.regionserver.idx.support.sets.IntSetBuilder;
+
+/**
+ * An index interface.
+ */
+public interface IdxIndex extends HeapSize {
+
+  /**
+   * Looks up an object. returns only exact matches.
+   *
+   * @param probe the probe to lookup
+   * @return the result set
+   */
+  IntSet lookup(byte[] probe);
+
+  /**
+   * Gets all hte objects which are greater (or greater equal) than the probe.
+   *
+   * @param probe     the probe to lookup
+   * @param inclusive if greater equal
+   * @return the result set
+   */
+  IntSet tail(byte[] probe, boolean inclusive);
+
+  /**
+   * Gets all hte objects which are lesser (or lesser equal) than the probe.
+   *
+   * @param probe     the probe to lookup
+   * @param inclusive if greater equal
+   * @return the result set
+   */
+  IntSet head(byte[] probe, boolean inclusive);
+
+  /**
+   * Returns a string representation of the provided bytes probe.
+   * @param bytes the bytes
+   * @return the string representation
+   */
+  String probeToString(byte[] bytes);
+}