You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by ap...@apache.org on 2009/07/20 08:22:21 UTC
svn commit: r795701 [2/3] - in /hadoop/hbase/trunk_on_hadoop-0.18.3: ./
src/contrib/transactional/src/java/org/apache/hadoop/hbase/client/transactional/
src/java/org/apache/hadoop/hbase/ src/java/org/apache/hadoop/hbase/client/
src/java/org/apache/hado...
Added: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/CountingBloomFilter.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/CountingBloomFilter.java?rev=795701&view=auto
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/CountingBloomFilter.java (added)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/CountingBloomFilter.java Mon Jul 20 06:22:20 2009
@@ -0,0 +1,314 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ * - Neither the name of the University Catholique de Louvain - UCL
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays; //TODO: remove
+
+import org.apache.hadoop.hbase.util.Hash;
+
+/**
+ * Implements a <i>counting Bloom filter</i>, as defined by Fan et al. in a ToN
+ * 2000 paper.
+ * <p>
+ * A counting Bloom filter is an improvement to standard a Bloom filter as it
+ * allows dynamic additions and deletions of set membership information. This
+ * is achieved through the use of a counting vector instead of a bit vector.
+ *
+ * contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ *
+ * @version 1.1 - 19 Jan. 08
+ *
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ *
+ * @see <a href="http://portal.acm.org/citation.cfm?id=343571.343572">Summary cache: a scalable wide-area web cache sharing protocol</a>
+ */
+public final class CountingBloomFilter extends Filter {
+ /** Storage for the counting buckets */
+ private long[] buckets;
+
+ /** We are using 4bit buckets, so each bucket can count to 15 */
+ private final static long BUCKET_MAX_VALUE = 15;
+
+ /** Default constructor - use with readFields */
+ public CountingBloomFilter() {}
+
+ /**
+ * Constructor
+ * @param vectorSize The vector size of <i>this</i> filter.
+ * @param nbHash The number of hash function to consider.
+ * @param hashType type of the hashing function (see {@link Hash}).
+ */
+ public CountingBloomFilter(int vectorSize, int nbHash, int hashType){
+ super(vectorSize, nbHash, hashType);
+ buckets = new long[buckets2words(vectorSize)];
+ }//end constructor
+
+ /** returns the number of 64 bit words it would take to hold vectorSize buckets */
+ private static int buckets2words(int vectorSize) {
+ return ((vectorSize - 1) >>> 4) + 1;
+ }
+
+
+ @Override
+ public void add(Key key) {
+ if(key == null) {
+ throw new NullPointerException("key can not be null");
+ }
+
+ int[] h = hash.hash(key);
+ hash.clear();
+
+ for(int i = 0; i < nbHash; i++) {
+ // find the bucket
+ int wordNum = h[i] >> 4; // div 16
+ int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
+
+ long bucketMask = 15L << bucketShift;
+ long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+
+ // only increment if the count in the bucket is less than BUCKET_MAX_VALUE
+ if(bucketValue < BUCKET_MAX_VALUE) {
+ // increment by 1
+ buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue + 1) << bucketShift);
+ }
+ }
+ }//end add()
+
+ /**
+ * Removes a specified key from <i>this</i> counting Bloom filter.
+ * <p>
+ * <b>Invariant</b>: nothing happens if the specified key does not belong to <i>this</i> counter Bloom filter.
+ * @param key The key to remove.
+ */
+ public void delete(Key key) {
+ if(key == null) {
+ throw new NullPointerException("Key may not be null");
+ }
+ if(!membershipTest(key)) {
+ throw new IllegalArgumentException("Key is not a member");
+ }
+
+ int[] h = hash.hash(key);
+ hash.clear();
+
+ for(int i = 0; i < nbHash; i++) {
+ // find the bucket
+ int wordNum = h[i] >> 4; // div 16
+ int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
+
+ long bucketMask = 15L << bucketShift;
+ long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+
+ // only decrement if the count in the bucket is between 0 and BUCKET_MAX_VALUE
+ if(bucketValue >= 1 && bucketValue < BUCKET_MAX_VALUE) {
+ // decrement by 1
+ buckets[wordNum] = (buckets[wordNum] & ~bucketMask) | ((bucketValue - 1) << bucketShift);
+ }
+ }
+ }//end delete
+
+ @Override
+ public void and(Filter filter){
+ if(filter == null
+ || !(filter instanceof CountingBloomFilter)
+ || filter.vectorSize != this.vectorSize
+ || filter.nbHash != this.nbHash) {
+ throw new IllegalArgumentException("filters cannot be and-ed");
+ }
+ CountingBloomFilter cbf = (CountingBloomFilter)filter;
+
+ int sizeInWords = buckets2words(vectorSize);
+ for(int i = 0; i < sizeInWords; i++) {
+ this.buckets[i] &= cbf.buckets[i];
+ }
+ }//end and()
+
+ @Override
+ public boolean membershipTest(Key key){
+ if(key == null) {
+ throw new NullPointerException("Key may not be null");
+ }
+
+ int[] h = hash.hash(key);
+ hash.clear();
+
+ for(int i = 0; i < nbHash; i++) {
+ // find the bucket
+ int wordNum = h[i] >> 4; // div 16
+ int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
+
+ long bucketMask = 15L << bucketShift;
+
+ if((buckets[wordNum] & bucketMask) == 0) {
+ return false;
+ }
+ }
+
+ return true;
+ }//end membershipTest()
+
+ /**
+ * This method calculates an approximate count of the key, i.e. how many
+ * times the key was added to the filter. This allows the filter to be
+ * used as an approximate <code>key -> count</code> map.
+ * <p>NOTE: due to the bucket size of this filter, inserting the same
+ * key more than 15 times will cause an overflow at all filter positions
+ * associated with this key, and it will significantly increase the error
+ * rate for this and other keys. For this reason the filter can only be
+ * used to store small count values <code>0 <= N << 15</code>.
+ * @param key key to be tested
+ * @return 0 if the key is not present. Otherwise, a positive value v will
+ * be returned such that <code>v == count</code> with probability equal to the
+ * error rate of this filter, and <code>v > count</code> otherwise.
+ * Additionally, if the filter experienced an underflow as a result of
+ * {@link #delete(Key)} operation, the return value may be lower than the
+ * <code>count</code> with the probability of the false negative rate of such
+ * filter.
+ */
+ public int approximateCount(Key key) {
+ int res = Integer.MAX_VALUE;
+ int[] h = hash.hash(key);
+ hash.clear();
+ for (int i = 0; i < nbHash; i++) {
+ // find the bucket
+ int wordNum = h[i] >> 4; // div 16
+ int bucketShift = (h[i] & 0x0f) << 2; // (mod 16) * 4
+
+ long bucketMask = 15L << bucketShift;
+ long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+ if (bucketValue < res) res = (int)bucketValue;
+ }
+ if (res != Integer.MAX_VALUE) {
+ return res;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public void not(){
+ throw new UnsupportedOperationException("not() is undefined for "
+ + this.getClass().getName());
+ }//end not()
+
+ @Override
+ public void or(Filter filter){
+ if(filter == null
+ || !(filter instanceof CountingBloomFilter)
+ || filter.vectorSize != this.vectorSize
+ || filter.nbHash != this.nbHash) {
+ throw new IllegalArgumentException("filters cannot be or-ed");
+ }
+
+ CountingBloomFilter cbf = (CountingBloomFilter)filter;
+
+ int sizeInWords = buckets2words(vectorSize);
+ for(int i = 0; i < sizeInWords; i++) {
+ this.buckets[i] |= cbf.buckets[i];
+ }
+ }//end or()
+
+ @Override
+ @SuppressWarnings("unused")
+ public void xor(Filter filter){
+ throw new UnsupportedOperationException("xor() is undefined for "
+ + this.getClass().getName());
+ }//end xor()
+
+ @Override
+ public String toString(){
+ StringBuilder res = new StringBuilder();
+
+ for(int i = 0; i < vectorSize; i++) {
+ if(i > 0) {
+ res.append(" ");
+ }
+
+ int wordNum = i >> 4; // div 16
+ int bucketShift = (i & 0x0f) << 2; // (mod 16) * 4
+
+ long bucketMask = 15L << bucketShift;
+ long bucketValue = (buckets[wordNum] & bucketMask) >>> bucketShift;
+
+ res.append(bucketValue);
+ }
+
+ return res.toString();
+ }//end toString()
+
+ @Override
+ public Object clone(){
+ CountingBloomFilter cbf = new CountingBloomFilter(vectorSize, nbHash, hashType);
+ cbf.buckets = this.buckets.clone();
+ return cbf;
+ }//end clone()
+
+ // Writable
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ super.write(out);
+ int sizeInWords = buckets2words(vectorSize);
+ for(int i = 0; i < sizeInWords; i++) {
+ out.writeLong(buckets[i]);
+ }
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ super.readFields(in);
+ int sizeInWords = buckets2words(vectorSize);
+ buckets = new long[sizeInWords];
+ for(int i = 0; i < sizeInWords; i++) {
+ buckets[i] = in.readLong();
+ }
+ }
+}//end class
Added: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/DynamicBloomFilter.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/DynamicBloomFilter.java?rev=795701&view=auto
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/DynamicBloomFilter.java (added)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/DynamicBloomFilter.java Mon Jul 20 06:22:20 2009
@@ -0,0 +1,303 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ * - Neither the name of the University Catholique de Louvain - UCL
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.hbase.util.Hash;
+
+/**
+ * Implements a <i>dynamic Bloom filter</i>, as defined in the INFOCOM 2006 paper.
+ * <p>
+ * A dynamic Bloom filter (DBF) makes use of a <code>s * m</code> bit matrix but
+ * each of the <code>s</code> rows is a standard Bloom filter. The creation
+ * process of a DBF is iterative. At the start, the DBF is a <code>1 * m</code>
+ * bit matrix, i.e., it is composed of a single standard Bloom filter.
+ * It assumes that <code>n<sub>r</sub></code> elements are recorded in the
+ * initial bit vector, where <code>n<sub>r</sub> <= n</code> (<code>n</code> is
+ * the cardinality of the set <code>A</code> to record in the filter).
+ * <p>
+ * As the size of <code>A</code> grows during the execution of the application,
+ * several keys must be inserted in the DBF. When inserting a key into the DBF,
+ * one must first get an active Bloom filter in the matrix. A Bloom filter is
+ * active when the number of recorded keys, <code>n<sub>r</sub></code>, is
+ * strictly less than the current cardinality of <code>A</code>, <code>n</code>.
+ * If an active Bloom filter is found, the key is inserted and
+ * <code>n<sub>r</sub></code> is incremented by one. On the other hand, if there
+ * is no active Bloom filter, a new one is created (i.e., a new row is added to
+ * the matrix) according to the current size of <code>A</code> and the element
+ * is added in this new Bloom filter and the <code>n<sub>r</sub></code> value of
+ * this new Bloom filter is set to one. A given key is said to belong to the
+ * DBF if the <code>k</code> positions are set to one in one of the matrix rows.
+ *
+ * contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ *
+ * @version 1.0 - 6 Feb. 07
+ *
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ * @see org.onelab.filter.BloomFilter A Bloom filter
+ *
+ * @see <a href="http://www.cse.fau.edu/~jie/research/publications/Publication_files/infocom2006.pdf">Theory and Network Applications of Dynamic Bloom Filters</a>
+ */
+public class DynamicBloomFilter extends Filter {
+ /**
+ * Threshold for the maximum number of key to record in a dynamic Bloom filter row.
+ */
+ private int nr;
+
+ /**
+ * The number of keys recorded in the current standard active Bloom filter.
+ */
+ private int currentNbRecord;
+
+ /**
+ * The matrix of Bloom filter.
+ */
+ private BloomFilter[] matrix;
+
+ /**
+ * Zero-args constructor for the serialization.
+ */
+ public DynamicBloomFilter() { }
+
+ /**
+ * Constructor.
+ * <p>
+ * Builds an empty Dynamic Bloom filter.
+ * @param vectorSize The number of bits in the vector.
+ * @param nbHash The number of hash function to consider.
+ * @param hashType type of the hashing function (see {@link Hash}).
+ * @param nr The threshold for the maximum number of keys to record in a dynamic Bloom filter row.
+ */
+ public DynamicBloomFilter(int vectorSize, int nbHash, int hashType, int nr) {
+ super(vectorSize, nbHash, hashType);
+
+ this.nr = nr;
+ this.currentNbRecord = 0;
+
+ matrix = new BloomFilter[1];
+ matrix[0] = new BloomFilter(this.vectorSize, this.nbHash, this.hashType);
+ }//end constructor
+
+ @Override
+ public void add(Key key){
+ if(key == null) {
+ throw new NullPointerException("Key can not be null");
+ }
+
+ BloomFilter bf = getActiveStandardBF();
+
+ if(bf == null){
+ addRow();
+ bf = matrix[matrix.length - 1];
+ currentNbRecord = 0;
+ }
+
+ bf.add(key);
+
+ currentNbRecord++;
+ }//end add()
+
+ @Override
+ public void and(Filter filter) {
+ if(filter == null
+ || !(filter instanceof DynamicBloomFilter)
+ || filter.vectorSize != this.vectorSize
+ || filter.nbHash != this.nbHash) {
+ throw new IllegalArgumentException("filters cannot be and-ed");
+ }
+
+ DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
+
+ if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
+ throw new IllegalArgumentException("filters cannot be and-ed");
+ }
+
+ for(int i = 0; i < matrix.length; i++) {
+ matrix[i].and(dbf.matrix[i]);
+ }
+ }//end and()
+
+ @Override
+ public boolean membershipTest(Key key){
+ if(key == null) {
+ return true;
+ }
+
+ for(int i = 0; i < matrix.length; i++) {
+ if(matrix[i].membershipTest(key)) {
+ return true;
+ }
+ }
+
+ return false;
+ }//end membershipTest()
+
+ @Override
+ public void not(){
+ for(int i = 0; i < matrix.length; i++) {
+ matrix[i].not();
+ }
+ }//end not()
+
+ @Override
+ public void or(Filter filter){
+ if(filter == null
+ || !(filter instanceof DynamicBloomFilter)
+ || filter.vectorSize != this.vectorSize
+ || filter.nbHash != this.nbHash) {
+ throw new IllegalArgumentException("filters cannot be or-ed");
+ }
+
+ DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
+
+ if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
+ throw new IllegalArgumentException("filters cannot be or-ed");
+ }
+ for(int i = 0; i < matrix.length; i++) {
+ matrix[i].or(dbf.matrix[i]);
+ }
+ }//end or()
+
+ @Override
+ public void xor(Filter filter){
+ if(filter == null
+ || !(filter instanceof DynamicBloomFilter)
+ || filter.vectorSize != this.vectorSize
+ || filter.nbHash != this.nbHash) {
+ throw new IllegalArgumentException("filters cannot be xor-ed");
+ }
+ DynamicBloomFilter dbf = (DynamicBloomFilter)filter;
+
+ if(dbf.matrix.length != this.matrix.length || dbf.nr != this.nr) {
+ throw new IllegalArgumentException("filters cannot be xor-ed");
+ }
+
+ for(int i = 0; i<matrix.length; i++) {
+ matrix[i].xor(dbf.matrix[i]);
+ }
+ }//end xor()
+
+ @Override
+ public String toString(){
+ StringBuilder res = new StringBuilder();
+
+ for(int i=0; i<matrix.length; i++) {
+ res.append(matrix[i]);
+ res.append(Character.LINE_SEPARATOR);
+ }
+ return res.toString();
+ }//end toString()
+
+ @Override
+ public Object clone(){
+ DynamicBloomFilter dbf = new DynamicBloomFilter(vectorSize, nbHash, hashType, nr);
+ dbf.currentNbRecord = this.currentNbRecord;
+ dbf.matrix = new BloomFilter[this.matrix.length];
+ for(int i = 0; i < this.matrix.length; i++) {
+ dbf.matrix[i] = (BloomFilter)this.matrix[i].clone();
+ }
+ return dbf;
+ }//end clone()
+
+ // Writable
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ super.write(out);
+ out.writeInt(nr);
+ out.writeInt(currentNbRecord);
+ out.writeInt(matrix.length);
+ for (int i = 0; i < matrix.length; i++) {
+ matrix[i].write(out);
+ }
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ super.readFields(in);
+ nr = in.readInt();
+ currentNbRecord = in.readInt();
+ int len = in.readInt();
+ matrix = new BloomFilter[len];
+ for (int i = 0; i < matrix.length; i++) {
+ matrix[i] = new BloomFilter();
+ matrix[i].readFields(in);
+ }
+ }
+
+ /**
+ * Adds a new row to <i>this</i> dynamic Bloom filter.
+ */
+ private void addRow(){
+ BloomFilter[] tmp = new BloomFilter[matrix.length + 1];
+
+ for(int i = 0; i < matrix.length; i++) {
+ tmp[i] = (BloomFilter)matrix[i].clone();
+ }
+
+ tmp[tmp.length-1] = new BloomFilter(vectorSize, nbHash, hashType);
+
+ matrix = tmp;
+ }//end addRow()
+
+ /**
+ * Returns the active standard Bloom filter in <i>this</i> dynamic Bloom filter.
+ * @return BloomFilter The active standard Bloom filter.
+ * <code>Null</code> otherwise.
+ */
+ private BloomFilter getActiveStandardBF() {
+ if(currentNbRecord >= nr) {
+ return null;
+ }
+
+ return matrix[matrix.length - 1];
+ }//end getActiveStandardBF()
+}//end class
Added: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Filter.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Filter.java?rev=795701&view=auto
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Filter.java (added)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Filter.java Mon Jul 20 06:22:20 2009
@@ -0,0 +1,216 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819
+ * (http://www.one-lab.org)
+ *
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ * - Neither the name of the University Catholique de Louvain - UCL
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+
+import org.apache.hadoop.hbase.util.Hash;
+import org.apache.hadoop.io.Writable;
+
+/**
+ * Defines the general behavior of a filter.
+ * <p>
+ * A filter is a data structure which aims at offering a lossy summary of a set <code>A</code>. The
+ * key idea is to map entries of <code>A</code> (also called <i>keys</i>) into several positions
+ * in a vector through the use of several hash functions.
+ * <p>
+ * Typically, a filter will be implemented as a Bloom filter (or a Bloom filter extension).
+ * <p>
+ * It must be extended in order to define the real behavior.
+ *
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ *
+ * @version 1.0 - 2 Feb. 07
+ *
+ * @see org.onelab.filter.Key The general behavior of a key
+ * @see org.onelab.filter.HashFunction A hash function
+ */
+public abstract class Filter implements Writable {
+ private static final int VERSION = -1; // negative to accommodate for old format
+ /** The vector size of <i>this</i> filter. */
+ protected int vectorSize;
+
+ /** The hash function used to map a key to several positions in the vector. */
+ protected HashFunction hash;
+
+ /** The number of hash function to consider. */
+ protected int nbHash;
+
+ /** Type of hashing function to use. */
+ protected int hashType;
+
+ protected Filter() {}
+
+ /**
+ * Constructor.
+ * @param vectorSize The vector size of <i>this</i> filter.
+ * @param nbHash The number of hash functions to consider.
+ * @param hashType type of the hashing function (see {@link Hash}).
+ */
+ protected Filter(int vectorSize, int nbHash, int hashType) {
+ this.vectorSize = vectorSize;
+ this.nbHash = nbHash;
+ this.hashType = hashType;
+ this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
+ }//end constructor
+
+ /**
+ * Adds a key to <i>this</i> filter.
+ * @param key The key to add.
+ */
+ public abstract void add(Key key);
+
+ /**
+ * Determines wether a specified key belongs to <i>this</i> filter.
+ * @param key The key to test.
+ * @return boolean True if the specified key belongs to <i>this</i> filter.
+ * False otherwise.
+ */
+ public abstract boolean membershipTest(Key key);
+
+ /**
+ * Peforms a logical AND between <i>this</i> filter and a specified filter.
+ * <p>
+ * <b>Invariant</b>: The result is assigned to <i>this</i> filter.
+ * @param filter The filter to AND with.
+ */
+ public abstract void and(Filter filter);
+
+ /**
+ * Peforms a logical OR between <i>this</i> filter and a specified filter.
+ * <p>
+ * <b>Invariant</b>: The result is assigned to <i>this</i> filter.
+ * @param filter The filter to OR with.
+ */
+ public abstract void or(Filter filter);
+
+ /**
+ * Peforms a logical XOR between <i>this</i> filter and a specified filter.
+ * <p>
+ * <b>Invariant</b>: The result is assigned to <i>this</i> filter.
+ * @param filter The filter to XOR with.
+ */
+ public abstract void xor(Filter filter);
+
+ /**
+ * Performs a logical NOT on <i>this</i> filter.
+ * <p>
+ * The result is assigned to <i>this</i> filter.
+ */
+ public abstract void not();
+
+ /**
+ * Adds a list of keys to <i>this</i> filter.
+ * @param keys The list of keys.
+ */
+ public void add(List<Key> keys){
+ if(keys == null) {
+ throw new IllegalArgumentException("ArrayList<Key> may not be null");
+ }
+
+ for(Key key: keys) {
+ add(key);
+ }
+ }//end add()
+
+ /**
+ * Adds a collection of keys to <i>this</i> filter.
+ * @param keys The collection of keys.
+ */
+ public void add(Collection<Key> keys){
+ if(keys == null) {
+ throw new IllegalArgumentException("Collection<Key> may not be null");
+ }
+ for(Key key: keys) {
+ add(key);
+ }
+ }//end add()
+
+ /**
+ * Adds an array of keys to <i>this</i> filter.
+ * @param keys The array of keys.
+ */
+ public void add(Key[] keys){
+ if(keys == null) {
+ throw new IllegalArgumentException("Key[] may not be null");
+ }
+ for(int i = 0; i < keys.length; i++) {
+ add(keys[i]);
+ }
+ }//end add()
+
+ // Writable interface
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(VERSION);
+ out.writeInt(this.nbHash);
+ out.writeByte(this.hashType);
+ out.writeInt(this.vectorSize);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ int ver = in.readInt();
+ if (ver > 0) { // old unversioned format
+ this.nbHash = ver;
+ this.hashType = Hash.JENKINS_HASH;
+ } else if (ver == VERSION) {
+ this.nbHash = in.readInt();
+ this.hashType = in.readByte();
+ } else {
+ throw new IOException("Unsupported version: " + ver);
+ }
+ this.vectorSize = in.readInt();
+ this.hash = new HashFunction(this.vectorSize, this.nbHash, this.hashType);
+ }
+}//end class
Added: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/HashFunction.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/HashFunction.java?rev=795701&view=auto
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/HashFunction.java (added)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/HashFunction.java Mon Jul 20 06:22:20 2009
@@ -0,0 +1,127 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819
+ * (http://www.one-lab.org)
+ *
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ * - Neither the name of the University Catholique de Louvain - UCL
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+import org.apache.hadoop.hbase.util.Hash;
+
+/**
+ * Implements a hash object that returns a certain number of hashed values.
+ * <p>
+ * It is based on the SHA-1 algorithm.
+ *
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ *
+ * @version 1.0 - 2 Feb. 07
+ *
+ * @see org.onelab.filter.Key The general behavior of a key being stored in a filter
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ *
+ * @see <a href="http://www.itl.nist.gov/fipspubs/fip180-1.htm">SHA-1 algorithm</a>
+ */
+public final class HashFunction {
+ /** The number of hashed values. */
+ private int nbHash;
+
+ /** The maximum highest returned value. */
+ private int maxValue;
+
+ /** Hashing algorithm to use. */
+ private Hash hashFunction;
+
+ /**
+ * Constructor.
+ * <p>
+ * Builds a hash function that must obey to a given maximum number of returned values and a highest value.
+ * @param maxValue The maximum highest returned value.
+ * @param nbHash The number of resulting hashed values.
+ * @param hashType type of the hashing function (see {@link Hash}).
+ */
+ public HashFunction(int maxValue, int nbHash, int hashType) {
+ if(maxValue <= 0) {
+ throw new IllegalArgumentException("maxValue must be > 0");
+ }
+
+ if(nbHash <= 0) {
+ throw new IllegalArgumentException("nbHash must be > 0");
+ }
+
+ this.maxValue = maxValue;
+ this.nbHash = nbHash;
+ this.hashFunction = Hash.getInstance(hashType);
+ if (this.hashFunction == null)
+ throw new IllegalArgumentException("hashType must be known");
+ }//end constructor
+
+ /** Clears <i>this</i> hash function. A NOOP */
+ public void clear() {
+ }
+
+ /**
+ * Hashes a specified key into several integers.
+ * @param k The specified key.
+ * @return The array of hashed values.
+ */
+ public int[] hash(Key k){
+ byte[] b = k.getBytes();
+ if(b == null) {
+ throw new NullPointerException("buffer reference is null");
+ }
+ if(b.length == 0) {
+ throw new IllegalArgumentException("key length must be > 0");
+ }
+ int[] result = new int[nbHash];
+ for (int i = 0, initval = 0; i < nbHash; i++) {
+ initval = hashFunction.hash(b, initval);
+ result[i] = Math.abs(initval) % maxValue;
+ }
+ return result;
+ }//end hash()
+
+}//end class
Added: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Key.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Key.java?rev=795701&view=auto
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Key.java (added)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/Key.java Mon Jul 20 06:22:20 2009
@@ -0,0 +1,176 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ * - Neither the name of the University Catholique de Louvain - UCL
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+import org.apache.hadoop.io.WritableComparable;
+
+/**
+ * The general behavior of a key that must be stored in a filter.
+ *
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ */
+public class Key implements WritableComparable {
+ /** Byte value of key */
+ byte[] bytes;
+
+ /**
+ * The weight associated to <i>this</i> key.
+ * <p>
+ * <b>Invariant</b>: if it is not specified, each instance of
+ * <code>Key</code> will have a default weight of 1.0
+ */
+ double weight;
+
+ /** default constructor - use with readFields */
+ public Key() {}
+
+ /**
+ * Constructor.
+ * <p>
+ * Builds a key with a default weight.
+ * @param value The byte value of <i>this</i> key.
+ */
+ public Key(byte[] value) {
+ this(value, 1.0);
+ }//end constructor
+
+ /**
+ * Constructor.
+ * <p>
+ * Builds a key with a specified weight.
+ * @param value The value of <i>this</i> key.
+ * @param weight The weight associated to <i>this</i> key.
+ */
+ public Key(byte[] value, double weight) {
+ set(value, weight);
+ }//end constructor
+
+ /**
+ * @param value
+ * @param weight
+ */
+ public void set(byte[] value, double weight) {
+ if(value == null) {
+ throw new IllegalArgumentException("value can not be null");
+ }
+ this.bytes = value;
+ this.weight = weight;
+ }
+
+ /** @return byte[] The value of <i>this</i> key. */
+ public byte[] getBytes() {
+ return this.bytes;
+ }
+
+ /** @return Returns the weight associated to <i>this</i> key. */
+ public double getWeight(){
+ return weight;
+ }//end getWeight()
+
+ /**
+ * Increments the weight of <i>this</i> key with a specified value.
+ * @param weight The increment.
+ */
+ public void incrementWeight(double weight){
+ this.weight += weight;
+ }//end incrementWeight()
+
+ /** Increments the weight of <i>this</i> key by one. */
+ public void incrementWeight(){
+ this.weight++;
+ }//end incrementWeight()
+
+ @Override
+ public boolean equals(Object o) {
+ return this.compareTo(o) == 0;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = 0;
+ for(int i = 0; i < bytes.length; i++) {
+ result ^= Byte.valueOf(bytes[i]).hashCode();
+ }
+ result ^= Double.valueOf(weight).hashCode();
+ return result;
+ }
+
+ // Writable
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(bytes.length);
+ out.write(bytes);
+ out.writeDouble(weight);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ this.bytes = new byte[in.readInt()];
+ in.readFully(this.bytes);
+ weight = in.readDouble();
+ }
+
+ // Comparable
+
+ public int compareTo(Object o) {
+ Key other = (Key)o;
+
+ int result = this.bytes.length - other.getBytes().length;
+ for(int i = 0; result == 0 && i < bytes.length; i++) {
+ result = this.bytes[i] - other.bytes[i];
+ }
+
+ if(result == 0) {
+ result = Double.valueOf(this.weight - other.weight).intValue();
+ }
+ return result;
+ }
+}//end class
Added: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RemoveScheme.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RemoveScheme.java?rev=795701&view=auto
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RemoveScheme.java (added)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RemoveScheme.java Mon Jul 20 06:22:20 2009
@@ -0,0 +1,91 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819
+ * (http://www.one-lab.org)
+ *
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ * - Neither the name of the University Catholique de Louvain - UCL
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+/**
+ * Defines the different remove scheme for retouched Bloom filters.
+ *
+ * contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ *
+ * @version 1.0 - 7 Feb. 07
+ */
+public interface RemoveScheme {
+ /**
+ * Random selection.
+ * <p>
+ * The idea is to randomly select a bit to reset.
+ */
+ public final static short RANDOM = 0;
+
+ /**
+ * MinimumFN Selection.
+ * <p>
+ * The idea is to select the bit to reset that will generate the minimum
+ * number of false negative.
+ */
+ public final static short MINIMUM_FN = 1;
+
+ /**
+ * MaximumFP Selection.
+ * <p>
+ * The idea is to select the bit to reset that will remove the maximum number
+ * of false positive.
+ */
+ public final static short MAXIMUM_FP = 2;
+
+ /**
+ * Ratio Selection.
+ * <p>
+ * The idea is to select the bit to reset that will, at the same time, remove
+ * the maximum number of false positve while minimizing the amount of false
+ * negative generated.
+ */
+ public final static short RATIO = 3;
+}//end interface
Added: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RetouchedBloomFilter.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RetouchedBloomFilter.java?rev=795701&view=auto
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RetouchedBloomFilter.java (added)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/onelab/filter/RetouchedBloomFilter.java Mon Jul 20 06:22:20 2009
@@ -0,0 +1,450 @@
+/**
+ *
+ * Copyright (c) 2005, European Commission project OneLab under contract 034819 (http://www.one-lab.org)
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the distribution.
+ * - Neither the name of the University Catholique de Louvain - UCL
+ * nor the names of its contributors may be used to endorse or
+ * promote products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+ * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+ * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+ * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+ * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.onelab.filter;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.hadoop.hbase.util.Hash;
+
+/**
+ * Implements a <i>retouched Bloom filter</i>, as defined in the CoNEXT 2006 paper.
+ * <p>
+ * It allows the removal of selected false positives at the cost of introducing
+ * random false negatives, and with the benefit of eliminating some random false
+ * positives at the same time.
+ *
+ * contract <a href="http://www.one-lab.org">European Commission One-Lab Project 034819</a>.
+ *
+ * @version 1.0 - 7 Feb. 07
+ *
+ * @see org.onelab.filter.Filter The general behavior of a filter
+ * @see org.onelab.filter.BloomFilter A Bloom filter
+ * @see org.onelab.filter.RemoveScheme The different selective clearing algorithms
+ *
+ * @see <a href="http://www-rp.lip6.fr/site_npa/site_rp/_publications/740-rbf_cameraready.pdf">Retouched Bloom Filters: Allowing Networked Applications to Trade Off Selected False Positives Against False Negatives</a>
+ */
+public final class RetouchedBloomFilter extends BloomFilter
+implements RemoveScheme {
+ /**
+ * KeyList vector (or ElementList Vector, as defined in the paper) of false positives.
+ */
+ List<Key>[] fpVector;
+
+ /**
+ * KeyList vector of keys recorded in the filter.
+ */
+ List<Key>[] keyVector;
+
+ /**
+ * Ratio vector.
+ */
+ double[] ratio;
+
+ private Random rand;
+
+ /** Default constructor - use with readFields */
+ public RetouchedBloomFilter() {}
+
+ /**
+ * Constructor
+ * @param vectorSize The vector size of <i>this</i> filter.
+ * @param nbHash The number of hash function to consider.
+ * @param hashType type of the hashing function (see {@link Hash}).
+ */
+ public RetouchedBloomFilter(int vectorSize, int nbHash, int hashType) {
+ super(vectorSize, nbHash, hashType);
+
+ this.rand = null;
+ createVector();
+ }//end constructor
+
+ @Override
+ public void add(Key key){
+ if(key == null) {
+ throw new NullPointerException("key can not be null");
+ }
+
+ int[] h = hash.hash(key);
+ hash.clear();
+
+ for(int i = 0; i < nbHash; i++) {
+ bits.set(h[i]);
+ keyVector[h[i]].add(key);
+ }//end for - i
+ }//end add()
+
+ /**
+ * Adds a false positive information to <i>this</i> retouched Bloom filter.
+ * <p>
+ * <b>Invariant</b>: if the false positive is <code>null</code>, nothing happens.
+ * @param key The false positive key to add.
+ */
+ public void addFalsePositive(Key key){
+ if(key == null) {
+ throw new NullPointerException("key can not be null");
+ }
+
+ int[] h = hash.hash(key);
+ hash.clear();
+
+ for(int i = 0; i < nbHash; i++) {
+ fpVector[h[i]].add(key);
+ }
+ }//end addFalsePositive()
+
+ /**
+ * Adds a collection of false positive information to <i>this</i> retouched Bloom filter.
+ * @param coll The collection of false positive.
+ */
+ public void addFalsePositive(Collection<Key> coll) {
+ if(coll == null) {
+ throw new NullPointerException("Collection<Key> can not be null");
+ }
+
+ for(Key k: coll) {
+ addFalsePositive(k);
+ }
+ }//end addFalsePositive()
+
+ /**
+ * Adds a list of false positive information to <i>this</i> retouched Bloom filter.
+ * @param keys The list of false positive.
+ */
+ public void addFalsePositive(List<Key> keys){
+ if(keys == null) {
+ throw new NullPointerException("ArrayList<Key> can not be null");
+ }
+
+ for(Key k: keys) {
+ addFalsePositive(k);
+ }
+ }//end addFalsePositive()
+
+ /**
+ * Adds an array of false positive information to <i>this</i> retouched Bloom filter.
+ * @param keys The array of false positive.
+ */
+ public void addFalsePositive(Key[] keys){
+ if(keys == null) {
+ throw new NullPointerException("Key[] can not be null");
+ }
+
+ for(int i = 0; i < keys.length; i++) {
+ addFalsePositive(keys[i]);
+ }
+ }//end addFalsePositive()
+
+ /**
+ * Performs the selective clearing for a given key.
+ * @param k The false positive key to remove from <i>this</i> retouched Bloom filter.
+ * @param scheme The selective clearing scheme to apply.
+ */
+ public void selectiveClearing(Key k, short scheme) {
+ if(k == null) {
+ throw new NullPointerException("Key can not be null");
+ }
+
+ if(!membershipTest(k)) {
+ throw new IllegalArgumentException("Key is not a member");
+ }
+
+ int index = 0;
+ int[] h = hash.hash(k);
+
+ switch(scheme) {
+
+ case RANDOM:
+ index = randomRemove();
+ break;
+
+ case MINIMUM_FN:
+ index = minimumFnRemove(h);
+ break;
+
+ case MAXIMUM_FP:
+ index = maximumFpRemove(h);
+ break;
+
+ case RATIO:
+ index = ratioRemove(h);
+ break;
+
+ default:
+ throw new AssertionError("Undefined selective clearing scheme");
+
+ }//end switch
+
+ clearBit(index);
+ }//end selectiveClearing()
+
+ private int randomRemove() {
+ if(rand == null) {
+ rand = new Random();
+ }
+
+ return rand.nextInt(nbHash);
+ }//end randomRemove()
+
+ /**
+ * Chooses the bit position that minimizes the number of false negative generated.
+ * @param h The different bit positions.
+ * @return int The position that minimizes the number of false negative generated.
+ */
+ private int minimumFnRemove(int[] h) {
+ int minIndex = Integer.MAX_VALUE;
+ double minValue = Double.MAX_VALUE;
+
+ for(int i = 0; i < nbHash; i++) {
+ double keyWeight = getWeight(keyVector[h[i]]);
+
+ if(keyWeight < minValue) {
+ minIndex = h[i];
+ minValue = keyWeight;
+ }
+
+ }//end for - i
+
+ return minIndex;
+ }//end minimumFnRemove()
+
+ /**
+ * Chooses the bit position that maximizes the number of false positive removed.
+ * @param h The different bit positions.
+ * @return int The position that maximizes the number of false positive removed.
+ */
+ private int maximumFpRemove(int[] h){
+ int maxIndex = Integer.MIN_VALUE;
+ double maxValue = Double.MIN_VALUE;
+
+ for(int i = 0; i < nbHash; i++) {
+ double fpWeight = getWeight(fpVector[h[i]]);
+
+ if(fpWeight > maxValue) {
+ maxValue = fpWeight;
+ maxIndex = h[i];
+ }
+ }
+
+ return maxIndex;
+ }//end maximumFpRemove()
+
+ /**
+ * Chooses the bit position that minimizes the number of false negative generated while maximizing.
+ * the number of false positive removed.
+ * @param h The different bit positions.
+ * @return int The position that minimizes the number of false negative generated while maximizing.
+ */
+ private int ratioRemove(int[] h){
+ computeRatio();
+ int minIndex = Integer.MAX_VALUE;
+ double minValue = Double.MAX_VALUE;
+
+ for(int i = 0; i < nbHash; i++) {
+ if(ratio[h[i]] < minValue) {
+ minValue = ratio[h[i]];
+ minIndex = h[i];
+ }
+ }//end for - i
+
+ return minIndex;
+ }//end ratioRemove()
+
+ /**
+ * Clears a specified bit in the bit vector and keeps up-to-date the KeyList vectors.
+ * @param index The position of the bit to clear.
+ */
+ private void clearBit(int index){
+ if(index < 0 || index >= vectorSize) {
+ throw new ArrayIndexOutOfBoundsException(index);
+ }
+
+ List<Key> kl = keyVector[index];
+ List<Key> fpl = fpVector[index];
+
+ // update key list
+ int listSize = kl.size();
+ for(int i = 0; i < listSize && !kl.isEmpty(); i++) {
+ removeKey(kl.get(0), keyVector);
+ }
+
+ kl.clear();
+ keyVector[index].clear();
+
+ //update false positive list
+ listSize = fpl.size();
+ for(int i = 0; i < listSize && !fpl.isEmpty(); i++) {
+ removeKey(fpl.get(0), fpVector);
+ }
+
+ fpl.clear();
+ fpVector[index].clear();
+
+ //update ratio
+ ratio[index] = 0.0;
+
+ //update bit vector
+ bits.clear(index);
+ }//end clearBit()
+
+ /**
+ * Removes a given key from <i>this</i> filer.
+ * @param k The key to remove.
+ * @param vector The counting vector associated to the key.
+ */
+ private void removeKey(Key k, List<Key>[] vector) {
+ if(k == null) {
+ throw new NullPointerException("Key can not be null");
+ }
+ if(vector == null) {
+ throw new NullPointerException("ArrayList<Key>[] can not be null");
+ }
+
+ int[] h = hash.hash(k);
+ hash.clear();
+
+ for(int i = 0; i < nbHash; i++) {
+ vector[h[i]].remove(k);
+ }
+ }//end removeKey()
+
+ /**
+ * Computes the ratio A/FP.
+ */
+ private void computeRatio() {
+ for(int i = 0; i < vectorSize; i++) {
+ double keyWeight = getWeight(keyVector[i]);
+ double fpWeight = getWeight(fpVector[i]);
+
+ if(keyWeight > 0 && fpWeight > 0) {
+ ratio[i] = keyWeight/fpWeight;
+ }
+ }//end for - i
+ }//end computeRatio()
+
+ private double getWeight(List<Key> keyList) {
+ double weight = 0.0;
+ for(Key k: keyList) {
+ weight += k.getWeight();
+ }
+ return weight;
+ }
+
+ /**
+ * Creates and initialises the various vectors.
+ */
+ @SuppressWarnings("unchecked")
+ private void createVector() {
+ fpVector = new List[vectorSize];
+ keyVector = new List[vectorSize];
+ ratio = new double[vectorSize];
+
+ for(int i = 0; i < vectorSize; i++) {
+ fpVector[i] = Collections.synchronizedList(new ArrayList<Key>());
+ keyVector[i] = Collections.synchronizedList(new ArrayList<Key>());
+ ratio[i] = 0.0;
+ }//end for -i
+ }//end createVector()
+
+ // Writable
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+ super.write(out);
+ for(int i = 0; i < fpVector.length; i++) {
+ List<Key> list = fpVector[i];
+ out.writeInt(list.size());
+ for(Key k: list) {
+ k.write(out);
+ }
+ }
+ for(int i = 0; i < keyVector.length; i++) {
+ List<Key> list = keyVector[i];
+ out.writeInt(list.size());
+ for(Key k: list) {
+ k.write(out);
+ }
+ }
+ for(int i = 0; i < ratio.length; i++) {
+ out.writeDouble(ratio[i]);
+ }
+ }
+
+ @Override
+ public void readFields(DataInput in) throws IOException {
+ super.readFields(in);
+ createVector();
+ for(int i = 0; i < fpVector.length; i++) {
+ List<Key> list = fpVector[i];
+ int size = in.readInt();
+ for(int j = 0; j < size; j++) {
+ Key k = new Key();
+ k.readFields(in);
+ list.add(k);
+ }
+ }
+ for(int i = 0; i < keyVector.length; i++) {
+ List<Key> list = keyVector[i];
+ int size = in.readInt();
+ for(int j = 0; j < size; j++) {
+ Key k = new Key();
+ k.readFields(in);
+ list.add(k);
+ }
+ }
+ for(int i = 0; i < ratio.length; i++) {
+ ratio[i] = in.readDouble();
+ }
+ }
+}//end class
Added: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/package-info.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/package-info.java?rev=795701&view=auto
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/package-info.java (added)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/package-info.java Mon Jul 20 06:22:20 2009
@@ -0,0 +1,24 @@
+/*
+ * Copyright 2009The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/**
+Provides classes from old hbase versions used migrating data.
+Nineteen package has classes from hbase 0.19.
+*/
+package org.apache.hadoop.hbase.migration.nineteen;
Added: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/regionserver/HStoreFile.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/regionserver/HStoreFile.java?rev=795701&view=auto
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/regionserver/HStoreFile.java (added)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/migration/nineteen/regionserver/HStoreFile.java Mon Jul 20 06:22:20 2009
@@ -0,0 +1,558 @@
+/**
+ * Copyright 2007 The Apache Software Foundation
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hbase.migration.nineteen.regionserver;
+
+import java.io.DataInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hbase.HBaseConfiguration;
+import org.apache.hadoop.hbase.HConstants;
+import org.apache.hadoop.hbase.HRegionInfo;
+import org.apache.hadoop.hbase.migration.nineteen.io.BloomFilterMapFile;
+import org.apache.hadoop.hbase.migration.nineteen.io.HalfMapFileReader;
+import org.apache.hadoop.hbase.migration.nineteen.io.Reference;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.MapFile;
+import org.apache.hadoop.io.SequenceFile;
+
+/**
+ * A HStore data file. HStores usually have one or more of these files. They
+ * are produced by flushing the memcache to disk.
+ *
+ * <p>Each HStore maintains a bunch of different data files. The filename is a
+ * mix of the parent dir, the region name, the column name, and a file
+ * identifier. The name may also be a reference to a store file located
+ * elsewhere. This class handles all that path-building stuff for you.
+ *
+ * <p>An HStoreFile usually tracks 4 things: its parent dir, the region
+ * identifier, the column family, and the file identifier. If you know those
+ * four things, you know how to obtain the right HStoreFile. HStoreFiles may
+ * also reference store files in another region serving either from
+ * the top-half of the remote file or from the bottom-half. Such references
+ * are made fast splitting regions.
+ *
+ * <p>Plain HStoreFiles are named for a randomly generated id as in:
+ * <code>1278437856009925445</code> A file by this name is made in both the
+ * <code>mapfiles</code> and <code>info</code> subdirectories of a
+ * HStore columnfamily directoy: E.g. If the column family is 'anchor:', then
+ * under the region directory there is a subdirectory named 'anchor' within
+ * which is a 'mapfiles' and 'info' subdirectory. In each will be found a
+ * file named something like <code>1278437856009925445</code>, one to hold the
+ * data in 'mapfiles' and one under 'info' that holds the sequence id for this
+ * store file.
+ *
+ * <p>References to store files located over in some other region look like
+ * this:
+ * <code>1278437856009925445.hbaserepository,qAReLZD-OyQORZWq_vqR1k==,959247014679548184</code>:
+ * i.e. an id followed by the name of the referenced region. The data
+ * ('mapfiles') of HStoreFile references are empty. The accompanying
+ * <code>info</code> file contains the
+ * midkey, the id of the remote store we're referencing and whether we're
+ * to serve the top or bottom region of the remote store file. Note, a region
+ * is not splitable if it has instances of store file references (References
+ * are cleaned up by compactions).
+ *
+ * <p>When merging or splitting HRegions, we might want to modify one of the
+ * params for an HStoreFile (effectively moving it elsewhere).
+ */
+public class HStoreFile implements HConstants {
+ static final Log LOG = LogFactory.getLog(HStoreFile.class.getName());
+ static final byte INFO_SEQ_NUM = 0;
+ static final byte MAJOR_COMPACTION = INFO_SEQ_NUM + 1;
+ static final String HSTORE_DATFILE_DIR = "mapfiles";
+ static final String HSTORE_INFO_DIR = "info";
+ static final String HSTORE_FILTER_DIR = "filter";
+
+ private final static Random rand = new Random();
+
+ private final Path basedir;
+ private final int encodedRegionName;
+ private final byte [] colFamily;
+ private final long fileId;
+ private final HBaseConfiguration conf;
+ private final FileSystem fs;
+ private final Reference reference;
+ private final HRegionInfo hri;
+ /* If true, this file was product of a major compaction.
+ */
+ private boolean majorCompaction = false;
+ private long indexLength;
+
+ /**
+ * Constructor that fully initializes the object
+ * @param conf Configuration object
+ * @param basedir qualified path that is parent of region directory
+ * @param colFamily name of the column family
+ * @param fileId file identifier
+ * @param ref Reference to another HStoreFile.
+ * @param hri The region info for this file (HACK HBASE-868). TODO: Fix.
+ * @throws IOException
+ */
+ HStoreFile(HBaseConfiguration conf, FileSystem fs, Path basedir,
+ final HRegionInfo hri, byte [] colFamily, long fileId,
+ final Reference ref)
+ throws IOException {
+ this(conf, fs, basedir, hri, colFamily, fileId, ref, false);
+ }
+
+ /**
+ * Constructor that fully initializes the object
+ * @param conf Configuration object
+ * @param basedir qualified path that is parent of region directory
+ * @param colFamily name of the column family
+ * @param fileId file identifier
+ * @param ref Reference to another HStoreFile.
+ * @param hri The region info for this file (HACK HBASE-868). TODO: Fix.
+ * @param mc Try if this file was result of a major compression.
+ * @throws IOException
+ */
+ HStoreFile(HBaseConfiguration conf, FileSystem fs, Path basedir,
+ final HRegionInfo hri, byte [] colFamily, long fileId,
+ final Reference ref, final boolean mc)
+ throws IOException {
+ this.conf = conf;
+ this.fs = fs;
+ this.basedir = basedir;
+ this.encodedRegionName = hri.getEncodedName();
+ this.colFamily = colFamily;
+ this.hri = hri;
+
+ long id = fileId;
+ if (id == -1) {
+ Path mapdir = HStoreFile.getMapDir(basedir, encodedRegionName, colFamily);
+ Path testpath = null;
+ do {
+ id = Math.abs(rand.nextLong());
+ testpath = new Path(mapdir, createHStoreFilename(id, -1));
+ } while(fs.exists(testpath));
+ }
+ this.fileId = id;
+
+ // If a reference, construction does not write the pointer files. Thats
+ // done by invocations of writeReferenceFiles(hsf, fs). Happens at split.
+ this.reference = ref;
+ this.majorCompaction = mc;
+ }
+
+ /** @return the region name */
+ boolean isReference() {
+ return reference != null;
+ }
+
+ Reference getReference() {
+ return reference;
+ }
+
+ int getEncodedRegionName() {
+ return this.encodedRegionName;
+ }
+
+ /** @return the column family */
+ byte [] getColFamily() {
+ return colFamily;
+ }
+
+ /** @return the file identifier */
+ long getFileId() {
+ return fileId;
+ }
+
+ // Build full filenames from those components
+
+ /** @return path for MapFile */
+ Path getMapFilePath() {
+ if (isReference()) {
+ return getMapFilePath(encodedRegionName, fileId,
+ reference.getEncodedRegionName());
+ }
+ return getMapFilePath(this.encodedRegionName, fileId);
+ }
+
+ private Path getMapFilePath(final Reference r) {
+ if (r == null) {
+ return getMapFilePath();
+ }
+ return getMapFilePath(r.getEncodedRegionName(), r.getFileId());
+ }
+
+ private Path getMapFilePath(final int encodedName, final long fid) {
+ return getMapFilePath(encodedName, fid, HRegionInfo.NO_HASH);
+ }
+
+ private Path getMapFilePath(final int encodedName, final long fid,
+ final int ern) {
+ return new Path(HStoreFile.getMapDir(basedir, encodedName, colFamily),
+ createHStoreFilename(fid, ern));
+ }
+
+ /** @return path for info file */
+ Path getInfoFilePath() {
+ if (isReference()) {
+ return getInfoFilePath(encodedRegionName, fileId,
+ reference.getEncodedRegionName());
+
+ }
+ return getInfoFilePath(encodedRegionName, fileId);
+ }
+
+ private Path getInfoFilePath(final int encodedName, final long fid) {
+ return getInfoFilePath(encodedName, fid, HRegionInfo.NO_HASH);
+ }
+
+ private Path getInfoFilePath(final int encodedName, final long fid,
+ final int ern) {
+ return new Path(HStoreFile.getInfoDir(basedir, encodedName, colFamily),
+ createHStoreFilename(fid, ern));
+ }
+
+ // File handling
+
+ /*
+ * Split by making two new store files that reference top and bottom regions
+ * of original store file.
+ * @param midKey
+ * @param dstA
+ * @param dstB
+ * @param fs
+ * @param c
+ * @throws IOException
+ *
+ * @param midKey the key which will be the starting key of the second region
+ * @param dstA the file which will contain keys from the start of the source
+ * @param dstB the file which will contain keys from midKey to end of source
+ * @param fs file system
+ * @param c configuration
+ * @throws IOException
+ */
+ void splitStoreFile(final HStoreFile dstA, final HStoreFile dstB,
+ final FileSystem fs)
+ throws IOException {
+ dstA.writeReferenceFiles(fs);
+ dstB.writeReferenceFiles(fs);
+ }
+
+ void writeReferenceFiles(final FileSystem fs)
+ throws IOException {
+ createOrFail(fs, getMapFilePath());
+ writeSplitInfo(fs);
+ }
+
+ /*
+ * If reference, create and write the remote store file id, the midkey and
+ * whether we're going against the top file region of the referent out to
+ * the info file.
+ * @param p Path to info file.
+ * @param hsf
+ * @param fs
+ * @throws IOException
+ */
+ private void writeSplitInfo(final FileSystem fs) throws IOException {
+ Path p = getInfoFilePath();
+ if (fs.exists(p)) {
+ throw new IOException("File already exists " + p.toString());
+ }
+ FSDataOutputStream out = fs.create(p);
+ try {
+ reference.write(out);
+ } finally {
+ out.close();
+ }
+ }
+
+ /**
+ * @see #writeSplitInfo(FileSystem fs)
+ */
+ static Reference readSplitInfo(final Path p, final FileSystem fs)
+ throws IOException {
+ FSDataInputStream in = fs.open(p);
+ try {
+ Reference r = new Reference();
+ r.readFields(in);
+ return r;
+ } finally {
+ in.close();
+ }
+ }
+
+ private void createOrFail(final FileSystem fs, final Path p)
+ throws IOException {
+ if (fs.exists(p)) {
+ throw new IOException("File already exists " + p.toString());
+ }
+ if (!fs.createNewFile(p)) {
+ throw new IOException("Failed create of " + p);
+ }
+ }
+
+ /**
+ * Reads in an info file
+ *
+ * @param filesystem file system
+ * @return The sequence id contained in the info file
+ * @throws IOException
+ */
+ long loadInfo(final FileSystem filesystem) throws IOException {
+ Path p = null;
+ if (isReference()) {
+ p = getInfoFilePath(reference.getEncodedRegionName(),
+ this.reference.getFileId());
+ } else {
+ p = getInfoFilePath();
+ }
+ long length = filesystem.getFileStatus(p).getLen();
+ boolean hasMoreThanSeqNum = length > (Byte.SIZE + Bytes.SIZEOF_LONG);
+ DataInputStream in = new DataInputStream(filesystem.open(p));
+ try {
+ byte flag = in.readByte();
+ if (flag == INFO_SEQ_NUM) {
+ if (hasMoreThanSeqNum) {
+ flag = in.readByte();
+ if (flag == MAJOR_COMPACTION) {
+ this.majorCompaction = in.readBoolean();
+ }
+ }
+ return in.readLong();
+ }
+ throw new IOException("Cannot process log file: " + p);
+ } finally {
+ in.close();
+ }
+ }
+
+ /**
+ * Writes the file-identifier to disk
+ *
+ * @param filesystem file system
+ * @param infonum file id
+ * @throws IOException
+ */
+ void writeInfo(final FileSystem filesystem, final long infonum)
+ throws IOException {
+ writeInfo(filesystem, infonum, false);
+ }
+
+ /**
+ * Writes the file-identifier to disk
+ *
+ * @param filesystem file system
+ * @param infonum file id
+ * @param mc True if this file is product of a major compaction
+ * @throws IOException
+ */
+ void writeInfo(final FileSystem filesystem, final long infonum,
+ final boolean mc)
+ throws IOException {
+ Path p = getInfoFilePath();
+ FSDataOutputStream out = filesystem.create(p);
+ try {
+ out.writeByte(INFO_SEQ_NUM);
+ out.writeLong(infonum);
+ if (mc) {
+ // Set whether major compaction flag on this file.
+ this.majorCompaction = mc;
+ out.writeByte(MAJOR_COMPACTION);
+ out.writeBoolean(mc);
+ }
+ } finally {
+ out.close();
+ }
+ }
+
+ /**
+ * Delete store map files.
+ * @throws IOException
+ */
+ public void delete() throws IOException {
+ fs.delete(getMapFilePath(), true);
+ fs.delete(getInfoFilePath(), true);
+ }
+
+ /**
+ * Renames the mapfiles and info directories under the passed
+ * <code>hsf</code> directory.
+ * @param fs
+ * @param hsf
+ * @return True if succeeded.
+ * @throws IOException
+ */
+ public boolean rename(final FileSystem fs, final HStoreFile hsf)
+ throws IOException {
+ Path src = getMapFilePath();
+ if (!fs.exists(src)) {
+ throw new FileNotFoundException(src.toString());
+ }
+ boolean success = fs.rename(src, hsf.getMapFilePath());
+ if (!success) {
+ LOG.warn("Failed rename of " + src + " to " + hsf.getMapFilePath());
+ } else {
+ src = getInfoFilePath();
+ if (!fs.exists(src)) {
+ throw new FileNotFoundException(src.toString());
+ }
+ success = fs.rename(src, hsf.getInfoFilePath());
+ if (!success) {
+ LOG.warn("Failed rename of " + src + " to " + hsf.getInfoFilePath());
+ }
+ }
+ return success;
+ }
+
+ /**
+ * Get reader for the store file map file.
+ * Client is responsible for closing file when done.
+ * @param fs
+ * @param bloomFilter If true, a bloom filter exists
+ * @param blockCacheEnabled If true, MapFile blocks should be cached.
+ * @return BloomFilterMapFile.Reader
+ * @throws IOException
+ */
+ public synchronized BloomFilterMapFile.Reader getReader(final FileSystem fs,
+ final boolean bloomFilter, final boolean blockCacheEnabled)
+ throws IOException {
+ if (isReference()) {
+ return new HalfMapFileReader(fs,
+ getMapFilePath(reference).toString(), conf,
+ reference.getFileRegion(), reference.getMidkey(), bloomFilter,
+ blockCacheEnabled, this.hri);
+ }
+ return new BloomFilterMapFile.Reader(fs, getMapFilePath().toString(),
+ conf, bloomFilter, blockCacheEnabled, this.hri);
+ }
+
+ /**
+ * Get a store file writer.
+ * Client is responsible for closing file when done.
+ * @param fs
+ * @param compression Pass <code>SequenceFile.CompressionType.NONE</code>
+ * for none.
+ * @param bloomFilter If true, create a bloom filter
+ * @param nrows number of rows expected. Required if bloomFilter is true.
+ * @return MapFile.Writer
+ * @throws IOException
+ */
+ public MapFile.Writer getWriter(final FileSystem fs,
+ final SequenceFile.CompressionType compression,
+ final boolean bloomFilter, int nrows)
+ throws IOException {
+ if (isReference()) {
+ throw new IOException("Illegal Access: Cannot get a writer on a" +
+ "HStoreFile reference");
+ }
+ return new BloomFilterMapFile.Writer(conf, fs,
+ getMapFilePath().toString(), compression, bloomFilter, nrows, this.hri);
+ }
+
+ /**
+ * @return Length of the store map file. If a reference, size is
+ * approximation.
+ * @throws IOException
+ */
+ public long length() throws IOException {
+ Path p = new Path(getMapFilePath(reference), MapFile.DATA_FILE_NAME);
+ long l = p.getFileSystem(conf).getFileStatus(p).getLen();
+ return (isReference())? l / 2: l;
+ }
+
+ /**
+ * @return Length of the store map file index.
+ * @throws IOException
+ */
+ public synchronized long indexLength() throws IOException {
+ if (indexLength == 0) {
+ Path p = new Path(getMapFilePath(reference), MapFile.INDEX_FILE_NAME);
+ indexLength = p.getFileSystem(conf).getFileStatus(p).getLen();
+ }
+ return indexLength;
+ }
+
+ @Override
+ public String toString() {
+ return encodedRegionName + "/" + Bytes.toString(colFamily) + "/" + fileId +
+ (isReference()? "-" + reference.toString(): "");
+ }
+
+ /**
+ * @return True if this file was made by a major compaction.
+ */
+ public boolean isMajorCompaction() {
+ return this.majorCompaction;
+ }
+
+ private static String createHStoreFilename(final long fid,
+ final int encodedRegionName) {
+ return Long.toString(fid) +
+ ((encodedRegionName != HRegionInfo.NO_HASH)?
+ "." + encodedRegionName : "");
+ }
+
+ /**
+ * @param dir Base directory
+ * @param encodedRegionName Encoding of region name.
+ * @param f Column family.
+ * @return path for map file directory
+ */
+ public static Path getMapDir(Path dir, int encodedRegionName,
+ final byte [] f) {
+ return getFamilySubDir(dir, encodedRegionName, f, HSTORE_DATFILE_DIR);
+ }
+
+ /**
+ * @param dir Base directory
+ * @param encodedRegionName Encoding of region name.
+ * @param f Column family.
+ * @return the info directory path
+ */
+ public static Path getInfoDir(Path dir, int encodedRegionName, byte [] f) {
+ return getFamilySubDir(dir, encodedRegionName, f, HSTORE_INFO_DIR);
+ }
+
+ /**
+ * @param dir Base directory
+ * @param encodedRegionName Encoding of region name.
+ * @param f Column family.
+ * @return the bloom filter directory path
+ */
+ @Deprecated
+ public static Path getFilterDir(Path dir, int encodedRegionName,
+ final byte [] f) {
+ return getFamilySubDir(dir, encodedRegionName, f, HSTORE_FILTER_DIR);
+ }
+
+ /*
+ * @param base Base directory
+ * @param encodedRegionName Encoding of region name.
+ * @param f Column family.
+ * @param subdir Subdirectory to create under column family/store directory.
+ * @return
+ */
+ private static Path getFamilySubDir(final Path base,
+ final int encodedRegionName, final byte [] f, final String subdir) {
+ return new Path(base, new Path(Integer.toString(encodedRegionName),
+ new Path(Bytes.toString(f), subdir)));
+ }
+}
\ No newline at end of file
Modified: hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=795701&r1=795700&r2=795701&view=diff
==============================================================================
--- hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hadoop/hbase/trunk_on_hadoop-0.18.3/src/java/org/apache/hadoop/hbase/regionserver/HRegion.java Mon Jul 20 06:22:20 2009
@@ -30,7 +30,6 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantReadWriteLock;
@@ -198,7 +197,6 @@
new ReentrantReadWriteLock();
private final Object splitLock = new Object();
private long minSequenceId;
- final AtomicInteger activeScannerCount = new AtomicInteger(0);
/**
* Name of the region info file that resides just under the region directory.
@@ -466,19 +464,6 @@
}
newScannerLock.writeLock().lock();
try {
- // Wait for active scanners to finish. The write lock we hold will
- // prevent new scanners from being created.
- synchronized (activeScannerCount) {
- while (activeScannerCount.get() != 0) {
- LOG.debug("waiting for " + activeScannerCount.get() +
- " scanners to finish");
- try {
- activeScannerCount.wait();
- } catch (InterruptedException e) {
- // continue
- }
- }
- }
splitsAndClosesLock.writeLock().lock();
LOG.debug("Updates disabled for region, no outstanding scanners on " +
this);
@@ -1256,12 +1241,12 @@
Integer lid = getLock(lockid, row);
byte [] now = Bytes.toBytes(System.currentTimeMillis());
try {
- for(Map.Entry<byte[], List<KeyValue>> entry :
- put.getFamilyMap().entrySet()) {
+ for (Map.Entry<byte[], List<KeyValue>> entry :
+ put.getFamilyMap().entrySet()) {
byte [] family = entry.getKey();
checkFamily(family);
List<KeyValue> puts = entry.getValue();
- if(updateKeys(puts, now)) {
+ if (updateKeys(puts, now)) {
put(family, puts, writeToWAL);
}
}
@@ -1690,8 +1675,8 @@
* It is used to combine scanners from multiple Stores (aka column families).
*/
class RegionScanner implements InternalScanner {
- private KeyValueHeap storeHeap;
- private byte [] stopRow;
+ private final KeyValueHeap storeHeap;
+ private final byte [] stopRow;
RegionScanner(Scan scan) {
if (Bytes.equals(scan.getStopRow(), HConstants.EMPTY_END_ROW)) {
@@ -1708,10 +1693,6 @@
}
this.storeHeap =
new KeyValueHeap(scanners.toArray(new KeyValueScanner[0]), comparator);
-
- // As we have now successfully completed initialization, increment the
- // activeScanner count.
- activeScannerCount.incrementAndGet();
}
/**
@@ -1763,23 +1744,9 @@
}
public void close() {
- try {
- storeHeap.close();
- } finally {
- synchronized (activeScannerCount) {
- int count = activeScannerCount.decrementAndGet();
- if (count < 0) {
- LOG.error("active scanner count less than zero: " + count +
- " resetting to zero");
- activeScannerCount.set(0);
- count = 0;
- }
- if (count == 0) {
- activeScannerCount.notifyAll();
- }
- }
- }
+ storeHeap.close();
}
+
/**
*
* @param scanner to be closed