You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2010/01/21 11:38:15 UTC

svn commit: r901644 [32/37] - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ ql/src/java/org/apache/hadoop/hive/ql/history/ ql/src/jav...

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashBucket.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashBucket.java?rev=901644&r1=901643&r2=901644&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashBucket.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashBucket.java Thu Jan 21 10:37:58 2010
@@ -68,265 +68,233 @@
 import java.io.IOException;
 import java.io.ObjectInput;
 import java.io.ObjectOutput;
-
 import java.util.ArrayList;
 
 /**
- * A bucket is a placeholder for multiple (key, value) pairs.  Buckets
- * are used to store collisions (same hash value) at all levels of an
- * H*tree.
- *
+ * A bucket is a placeholder for multiple (key, value) pairs. Buckets are used
+ * to store collisions (same hash value) at all levels of an H*tree.
+ * 
  * There are two types of buckets: leaf and non-leaf.
- *
- * Non-leaf buckets are buckets which hold collisions which happen
- * when the H*tree is not fully expanded.   Keys in a non-leaf buckets
- * can have different hash codes.  Non-leaf buckets are limited to an
- * arbitrary size.  When this limit is reached, the H*tree should create
- * a new Directory page and distribute keys of the non-leaf buckets into
- * the newly created Directory.
- *
- * A leaf bucket is a bucket which contains keys which all have
- * the same <code>hashCode()</code>.  Leaf buckets stand at the
- * bottom of an H*tree because the hashing algorithm cannot further
- * discriminate between different keys based on their hash code.
- *
- *  @author <a href="mailto:boisvert@intalio.com">Alex Boisvert</a>
- *  @version $Id: HashBucket.java,v 1.2 2005/06/25 23:12:32 doomdark Exp $
+ * 
+ * Non-leaf buckets are buckets which hold collisions which happen when the
+ * H*tree is not fully expanded. Keys in a non-leaf buckets can have different
+ * hash codes. Non-leaf buckets are limited to an arbitrary size. When this
+ * limit is reached, the H*tree should create a new Directory page and
+ * distribute keys of the non-leaf buckets into the newly created Directory.
+ * 
+ * A leaf bucket is a bucket which contains keys which all have the same
+ * <code>hashCode()</code>. Leaf buckets stand at the bottom of an H*tree
+ * because the hashing algorithm cannot further discriminate between different
+ * keys based on their hash code.
+ * 
+ * @author <a href="mailto:boisvert@intalio.com">Alex Boisvert</a>
+ * @version $Id: HashBucket.java,v 1.2 2005/06/25 23:12:32 doomdark Exp $
  */
-final class HashBucket
-    extends HashNode
-    implements Externalizable
-{
-
-    final static long serialVersionUID = 1L;
-
-    /**
-     * The maximum number of elements (key, value) a non-leaf bucket
-     * can contain.
-     */
-    public static final int OVERFLOW_SIZE = 8;
-
-
-    /**
-     * Depth of this bucket.
-     */
-    private int _depth;
-
-
-    /**
-     * Keys in this bucket.  Keys are ordered to match their respective
-     * value in <code>_values</code>.
-     */
-    private ArrayList _keys;
-
-
-    /**
-     * Values in this bucket.  Values are ordered to match their respective
-     * key in <code>_keys</code>.
-     */
-    private ArrayList _values;
-
-
-    /**
-     * Public constructor for serialization.
-     */
-    public HashBucket() {
-        // empty
-    }
+final class HashBucket extends HashNode implements Externalizable {
 
+  final static long serialVersionUID = 1L;
 
-    /**
-     * Construct a bucket with a given depth level.  Depth level is the
-     * number of <code>HashDirectory</code> above this bucket.
-     */
-    public HashBucket( int level )
-    {
-        if ( level > HashDirectory.MAX_DEPTH+1 ) {
-            throw new IllegalArgumentException(
-                            "Cannot create bucket with depth > MAX_DEPTH+1. "
-                            + "Depth=" + level );
-        }
-        _depth = level;
-        _keys = new ArrayList( OVERFLOW_SIZE );
-        _values = new ArrayList( OVERFLOW_SIZE );
-    }
-
-
-    /**
-     * Returns the number of elements contained in this bucket.
-     */
-    public int getElementCount()
-    {
-        return _keys.size();
-    }
-
-
-    /**
-     * Returns whether or not this bucket is a "leaf bucket".
-     */
-    public boolean isLeaf()
-    {
-        return ( _depth > HashDirectory.MAX_DEPTH );
-    }
-
-
-    /**
-     * Returns true if bucket can accept at least one more element.
-     */
-    public boolean hasRoom()
-    {
-        if ( isLeaf() ) {
-            return true;  // leaf buckets are never full
-        } else {
-            // non-leaf bucket
-            return ( _keys.size() < OVERFLOW_SIZE );
-        }
-    }
-
-
-    /**
-     * Add an element (key, value) to this bucket.  If an existing element
-     * has the same key, it is replaced silently.
-     *
-     * @return Object which was previously associated with the given key
-     *          or <code>null</code> if no association existed.
-     */
-    public Object addElement( Object key, Object value )
-    {
-        int existing = _keys.indexOf(key);
-        if ( existing != -1 ) {
-            // replace existing element
-            Object before = _values.get( existing );
-            _values.set( existing, value );
-            return before;
-        } else {
-            // add new (key, value) pair
-            _keys.add( key );
-            _values.add( value );
-            return null;
-        }
-    }
-
-
-    /**
-     * Remove an element, given a specific key.
-     *
-     * @param key Key of the element to remove
-     *
-     * @return Removed element value, or <code>null</code> if not found
-     */
-    public Object removeElement( Object key )
-    {
-        int existing = _keys.indexOf(key);
-        if ( existing != -1 ) {
-            Object obj = _values.get( existing );
-            _keys.remove( existing );
-            _values.remove( existing );
-            return obj;
-        } else {
-            // not found
-            return null;
-        }
-    }
-
-
-    /**
-     * Returns the value associated with a given key.  If the given key
-     * is not found in this bucket, returns <code>null</code>.
-     */
-    public Object getValue( Object key )
-    {
-        int existing = _keys.indexOf(key);
-        if ( existing != -1 ) {
-            return _values.get( existing );
-        } else {
-            // key not found
-            return null;
-        }
-    }
-
-
-    /**
-     * Obtain keys contained in this buckets.  Keys are ordered to match
-     * their values, which be be obtained by calling <code>getValues()</code>.
-     *
-     * As an optimization, the Vector returned is the instance member
-     * of this class.  Please don't modify outside the scope of this class.
-     */
-    ArrayList getKeys()
-    {
-        return this._keys;
-    }
-
-
-    /**
-     * Obtain values contained in this buckets.  Values are ordered to match
-     * their keys, which be be obtained by calling <code>getKeys()</code>.
-     *
-     * As an optimization, the Vector returned is the instance member
-     * of this class.  Please don't modify outside the scope of this class.
-     */
-    ArrayList getValues()
-    {
-        return this._values;
-    }
-
-
-    /**
-     * Implement Externalizable interface.
-     */
-    public void writeExternal( ObjectOutput out )
-        throws IOException
-    {
-        out.writeInt( _depth );
-
-        int entries = _keys.size();
-        out.writeInt( entries );
-
-        // write keys
-        for (int i=0; i<entries; i++) {
-            out.writeObject( _keys.get( i ) );
-        }
-        // write values
-        for (int i=0; i<entries; i++) {
-            out.writeObject( _values.get( i ) );
-        }
-    }
-
-
-    /**
-     * Implement Externalizable interface.
-     */
-    public void readExternal(ObjectInput in)
-    throws IOException, ClassNotFoundException {
-        _depth = in.readInt();
-
-        int entries = in.readInt();
-
-        // prepare array lists
-        int size = Math.max( entries, OVERFLOW_SIZE );
-        _keys = new ArrayList( size );
-        _values = new ArrayList( size );
-
-        // read keys
-        for ( int i=0; i<entries; i++ ) {
-            _keys.add( in.readObject() );
-        }
-        // read values
-        for ( int i=0; i<entries; i++ ) {
-            _values.add( in.readObject() );
-        }
-    }
-
-    public String toString() {
-        StringBuffer buf = new StringBuffer();
-        buf.append("HashBucket {depth=");
-        buf.append(_depth);
-        buf.append(", keys=");
-        buf.append(_keys);
-        buf.append(", values=");
-        buf.append(_values);
-        buf.append("}");
-        return buf.toString();
-    }
+  /**
+   * The maximum number of elements (key, value) a non-leaf bucket can contain.
+   */
+  public static final int OVERFLOW_SIZE = 8;
+
+  /**
+   * Depth of this bucket.
+   */
+  private int _depth;
+
+  /**
+   * Keys in this bucket. Keys are ordered to match their respective value in
+   * <code>_values</code>.
+   */
+  private ArrayList _keys;
+
+  /**
+   * Values in this bucket. Values are ordered to match their respective key in
+   * <code>_keys</code>.
+   */
+  private ArrayList _values;
+
+  /**
+   * Public constructor for serialization.
+   */
+  public HashBucket() {
+    // empty
+  }
+
+  /**
+   * Construct a bucket with a given depth level. Depth level is the number of
+   * <code>HashDirectory</code> above this bucket.
+   */
+  public HashBucket(int level) {
+    if (level > HashDirectory.MAX_DEPTH + 1) {
+      throw new IllegalArgumentException(
+          "Cannot create bucket with depth > MAX_DEPTH+1. " + "Depth=" + level);
+    }
+    _depth = level;
+    _keys = new ArrayList(OVERFLOW_SIZE);
+    _values = new ArrayList(OVERFLOW_SIZE);
+  }
+
+  /**
+   * Returns the number of elements contained in this bucket.
+   */
+  public int getElementCount() {
+    return _keys.size();
+  }
+
+  /**
+   * Returns whether or not this bucket is a "leaf bucket".
+   */
+  public boolean isLeaf() {
+    return (_depth > HashDirectory.MAX_DEPTH);
+  }
+
+  /**
+   * Returns true if bucket can accept at least one more element.
+   */
+  public boolean hasRoom() {
+    if (isLeaf()) {
+      return true; // leaf buckets are never full
+    } else {
+      // non-leaf bucket
+      return (_keys.size() < OVERFLOW_SIZE);
+    }
+  }
+
+  /**
+   * Add an element (key, value) to this bucket. If an existing element has the
+   * same key, it is replaced silently.
+   * 
+   * @return Object which was previously associated with the given key or
+   *         <code>null</code> if no association existed.
+   */
+  public Object addElement(Object key, Object value) {
+    int existing = _keys.indexOf(key);
+    if (existing != -1) {
+      // replace existing element
+      Object before = _values.get(existing);
+      _values.set(existing, value);
+      return before;
+    } else {
+      // add new (key, value) pair
+      _keys.add(key);
+      _values.add(value);
+      return null;
+    }
+  }
+
+  /**
+   * Remove an element, given a specific key.
+   * 
+   * @param key
+   *          Key of the element to remove
+   * 
+   * @return Removed element value, or <code>null</code> if not found
+   */
+  public Object removeElement(Object key) {
+    int existing = _keys.indexOf(key);
+    if (existing != -1) {
+      Object obj = _values.get(existing);
+      _keys.remove(existing);
+      _values.remove(existing);
+      return obj;
+    } else {
+      // not found
+      return null;
+    }
+  }
+
+  /**
+   * Returns the value associated with a given key. If the given key is not
+   * found in this bucket, returns <code>null</code>.
+   */
+  public Object getValue(Object key) {
+    int existing = _keys.indexOf(key);
+    if (existing != -1) {
+      return _values.get(existing);
+    } else {
+      // key not found
+      return null;
+    }
+  }
+
+  /**
+   * Obtain keys contained in this buckets. Keys are ordered to match their
+   * values, which be be obtained by calling <code>getValues()</code>.
+   * 
+   * As an optimization, the Vector returned is the instance member of this
+   * class. Please don't modify outside the scope of this class.
+   */
+  ArrayList getKeys() {
+    return _keys;
+  }
+
+  /**
+   * Obtain values contained in this buckets. Values are ordered to match their
+   * keys, which be be obtained by calling <code>getKeys()</code>.
+   * 
+   * As an optimization, the Vector returned is the instance member of this
+   * class. Please don't modify outside the scope of this class.
+   */
+  ArrayList getValues() {
+    return _values;
+  }
+
+  /**
+   * Implement Externalizable interface.
+   */
+  public void writeExternal(ObjectOutput out) throws IOException {
+    out.writeInt(_depth);
+
+    int entries = _keys.size();
+    out.writeInt(entries);
+
+    // write keys
+    for (int i = 0; i < entries; i++) {
+      out.writeObject(_keys.get(i));
+    }
+    // write values
+    for (int i = 0; i < entries; i++) {
+      out.writeObject(_values.get(i));
+    }
+  }
+
+  /**
+   * Implement Externalizable interface.
+   */
+  public void readExternal(ObjectInput in) throws IOException,
+      ClassNotFoundException {
+    _depth = in.readInt();
+
+    int entries = in.readInt();
+
+    // prepare array lists
+    int size = Math.max(entries, OVERFLOW_SIZE);
+    _keys = new ArrayList(size);
+    _values = new ArrayList(size);
+
+    // read keys
+    for (int i = 0; i < entries; i++) {
+      _keys.add(in.readObject());
+    }
+    // read values
+    for (int i = 0; i < entries; i++) {
+      _values.add(in.readObject());
+    }
+  }
+
+  @Override
+  public String toString() {
+    StringBuffer buf = new StringBuffer();
+    buf.append("HashBucket {depth=");
+    buf.append(_depth);
+    buf.append(", keys=");
+    buf.append(_keys);
+    buf.append(", values=");
+    buf.append(_values);
+    buf.append("}");
+    return buf.toString();
+  }
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashDirectory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashDirectory.java?rev=901644&r1=901643&r2=901644&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashDirectory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashDirectory.java Thu Jan 21 10:37:58 2010
@@ -64,503 +64,472 @@
 
 package org.apache.hadoop.hive.ql.util.jdbm.htree;
 
-import org.apache.hadoop.hive.ql.util.jdbm.RecordManager;
-
-import org.apache.hadoop.hive.ql.util.jdbm.helper.FastIterator;
-import org.apache.hadoop.hive.ql.util.jdbm.helper.IterationException;
-
 import java.io.Externalizable;
 import java.io.IOException;
 import java.io.ObjectInput;
 import java.io.ObjectOutput;
-
 import java.util.ArrayList;
 import java.util.Iterator;
 
+import org.apache.hadoop.hive.ql.util.jdbm.RecordManager;
+import org.apache.hadoop.hive.ql.util.jdbm.helper.FastIterator;
+import org.apache.hadoop.hive.ql.util.jdbm.helper.IterationException;
+
 /**
- *  Hashtable directory page.
- *
- *  @author <a href="mailto:boisvert@exoffice.com">Alex Boisvert</a>
- *  @version $Id: HashDirectory.java,v 1.5 2005/06/25 23:12:32 doomdark Exp $
+ * Hashtable directory page.
+ * 
+ * @author <a href="mailto:boisvert@exoffice.com">Alex Boisvert</a>
+ * @version $Id: HashDirectory.java,v 1.5 2005/06/25 23:12:32 doomdark Exp $
  */
-final class HashDirectory
-    extends HashNode
-    implements Externalizable
-{
+final class HashDirectory extends HashNode implements Externalizable {
 
-    static final long serialVersionUID = 1L;
-
-
-    /**
-     * Maximum number of children in a directory.
-     *
-     * (Must be a power of 2 -- if you update this value, you must also
-     *  update BIT_SIZE and MAX_DEPTH.)
-     */
-    static final int MAX_CHILDREN = 256;
-
-
-    /**
-     * Number of significant bits per directory level.
-     */
-    static final int BIT_SIZE = 8; // log2(256) = 8
-
-
-    /**
-     * Maximum number of levels (zero-based)
-     *
-     * (4 * 8 bits = 32 bits, which is the size of an "int", and as
-     *  you know, hashcodes in Java are "ints")
-     */
-    static final int MAX_DEPTH = 3; // 4 levels
-
-
-    /**
-     * Record ids of children pages.
-     */
-    private long[] _children;
-
-
-    /**
-     * Depth of this directory page, zero-based
-     */
-    private byte _depth;
-
-
-    /**
-     * PageManager used to persist changes in directory and buckets
-     */
-    private transient RecordManager _recman;
-
-
-    /**
-     * This directory's record ID in the PageManager.  (transient)
-     */
-    private transient long _recid;
-
-
-    /**
-     * Public constructor used by serialization
-     */
-    public HashDirectory() {
-        // empty
-    }
-
-    /**
-     * Construct a HashDirectory
-     *
-     * @param depth Depth of this directory page.
-     */
-    HashDirectory(byte depth) {
-        _depth = depth;
-        _children = new long[MAX_CHILDREN];
-    }
-
-
-    /**
-     * Sets persistence context.  This method must be called before any
-     * persistence-related operation.
-     *
-     * @param recman RecordManager which stores this directory
-     * @param recid Record id of this directory.
-     */
-    void setPersistenceContext( RecordManager recman, long recid )
-    {
-        this._recman = recman;
-        this._recid = recid;
-    }
-
-
-    /**
-     * Get the record identifier used to load this hashtable.
-     */
-    long getRecid() {
-        return _recid;
-    }
+  static final long serialVersionUID = 1L;
 
+  /**
+   * Maximum number of children in a directory.
+   * 
+   * (Must be a power of 2 -- if you update this value, you must also update
+   * BIT_SIZE and MAX_DEPTH.)
+   */
+  static final int MAX_CHILDREN = 256;
+
+  /**
+   * Number of significant bits per directory level.
+   */
+  static final int BIT_SIZE = 8; // log2(256) = 8
+
+  /**
+   * Maximum number of levels (zero-based)
+   * 
+   * (4 * 8 bits = 32 bits, which is the size of an "int", and as you know,
+   * hashcodes in Java are "ints")
+   */
+  static final int MAX_DEPTH = 3; // 4 levels
+
+  /**
+   * Record ids of children pages.
+   */
+  private long[] _children;
+
+  /**
+   * Depth of this directory page, zero-based
+   */
+  private byte _depth;
+
+  /**
+   * PageManager used to persist changes in directory and buckets
+   */
+  private transient RecordManager _recman;
+
+  /**
+   * This directory's record ID in the PageManager. (transient)
+   */
+  private transient long _recid;
+
+  /**
+   * Public constructor used by serialization
+   */
+  public HashDirectory() {
+    // empty
+  }
+
+  /**
+   * Construct a HashDirectory
+   * 
+   * @param depth
+   *          Depth of this directory page.
+   */
+  HashDirectory(byte depth) {
+    _depth = depth;
+    _children = new long[MAX_CHILDREN];
+  }
+
+  /**
+   * Sets persistence context. This method must be called before any
+   * persistence-related operation.
+   * 
+   * @param recman
+   *          RecordManager which stores this directory
+   * @param recid
+   *          Record id of this directory.
+   */
+  void setPersistenceContext(RecordManager recman, long recid) {
+    _recman = recman;
+    _recid = recid;
+  }
+
+  /**
+   * Get the record identifier used to load this hashtable.
+   */
+  long getRecid() {
+    return _recid;
+  }
+
+  /**
+   * Returns whether or not this directory is empty. A directory is empty when
+   * it no longer contains buckets or sub-directories.
+   */
+  boolean isEmpty() {
+    for (long element : _children) {
+      if (element != 0) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  /**
+   * Returns the value which is associated with the given key. Returns
+   * <code>null</code> if there is not association for this key.
+   * 
+   * @param key
+   *          key whose associated value is to be returned
+   */
+  Object get(Object key) throws IOException {
+    int hash = hashCode(key);
+    long child_recid = _children[hash];
+    if (child_recid == 0) {
+      // not bucket/page --> not found
+      return null;
+    } else {
+      HashNode node = (HashNode) _recman.fetch(child_recid);
+      // System.out.println("HashDirectory.get() child is : "+node);
+
+      if (node instanceof HashDirectory) {
+        // recurse into next directory level
+        HashDirectory dir = (HashDirectory) node;
+        dir.setPersistenceContext(_recman, child_recid);
+        return dir.get(key);
+      } else {
+        // node is a bucket
+        HashBucket bucket = (HashBucket) node;
+        return bucket.getValue(key);
+      }
+    }
+  }
+
+  /**
+   * Associates the specified value with the specified key.
+   * 
+   * @param key
+   *          key with which the specified value is to be assocated.
+   * @param value
+   *          value to be associated with the specified key.
+   * @return object which was previously associated with the given key, or
+   *         <code>null</code> if no association existed.
+   */
+  Object put(Object key, Object value) throws IOException {
+    if (value == null) {
+      return remove(key);
+    }
+    int hash = hashCode(key);
+    long child_recid = _children[hash];
+    if (child_recid == 0) {
+      // no bucket/page here yet, let's create a bucket
+      HashBucket bucket = new HashBucket(_depth + 1);
+
+      // insert (key,value) pair in bucket
+      Object existing = bucket.addElement(key, value);
+
+      long b_recid = _recman.insert(bucket);
+      _children[hash] = b_recid;
+
+      _recman.update(_recid, this);
+
+      // System.out.println("Added: "+bucket);
+      return existing;
+    } else {
+      HashNode node = (HashNode) _recman.fetch(child_recid);
+
+      if (node instanceof HashDirectory) {
+        // recursive insert in next directory level
+        HashDirectory dir = (HashDirectory) node;
+        dir.setPersistenceContext(_recman, child_recid);
+        return dir.put(key, value);
+      } else {
+        // node is a bucket
+        HashBucket bucket = (HashBucket) node;
+        if (bucket.hasRoom()) {
+          Object existing = bucket.addElement(key, value);
+          _recman.update(child_recid, bucket);
+          // System.out.println("Added: "+bucket);
+          return existing;
+        } else {
+          // overflow, so create a new directory
+          if (_depth == MAX_DEPTH) {
+            throw new RuntimeException("Cannot create deeper directory. "
+                + "Depth=" + _depth);
+          }
+          HashDirectory dir = new HashDirectory((byte) (_depth + 1));
+          long dir_recid = _recman.insert(dir);
+          dir.setPersistenceContext(_recman, dir_recid);
+
+          _children[hash] = dir_recid;
+          _recman.update(_recid, this);
+
+          // discard overflown bucket
+          _recman.delete(child_recid);
+
+          // migrate existing bucket elements
+          ArrayList keys = bucket.getKeys();
+          ArrayList values = bucket.getValues();
+          int entries = keys.size();
+          for (int i = 0; i < entries; i++) {
+            dir.put(keys.get(i), values.get(i));
+          }
 
-    /**
-     * Returns whether or not this directory is empty.  A directory
-     * is empty when it no longer contains buckets or sub-directories.
-     */
-    boolean isEmpty() {
-        for (int i=0; i<_children.length; i++) {
-            if (_children[i] != 0) {
-                return false;
-            }
+          // (finally!) insert new element
+          return dir.put(key, value);
         }
-        return true;
+      }
     }
+  }
 
-    /**
-     * Returns the value which is associated with the given key. Returns
-     * <code>null</code> if there is not association for this key.
-     *
-     * @param key key whose associated value is to be returned
-     */
-    Object get(Object key)
-        throws IOException
-    {
-        int hash = hashCode( key );
-        long child_recid = _children[ hash ];
-        if ( child_recid == 0 ) {
-            // not bucket/page --> not found
-            return null;
-        } else {
-            HashNode node = (HashNode) _recman.fetch( child_recid );
-            // System.out.println("HashDirectory.get() child is : "+node);
-
-            if ( node instanceof HashDirectory ) {
-                // recurse into next directory level
-                HashDirectory dir = (HashDirectory) node;
-                dir.setPersistenceContext( _recman, child_recid );
-                return dir.get( key );
-            } else {
-                // node is a bucket
-                HashBucket bucket = (HashBucket) node;
-                return bucket.getValue( key );
-            }
+  /**
+   * Remove the value which is associated with the given key. If the key does
+   * not exist, this method simply ignores the operation.
+   * 
+   * @param key
+   *          key whose associated value is to be removed
+   * @return object which was associated with the given key, or
+   *         <code>null</code> if no association existed with given key.
+   */
+  Object remove(Object key) throws IOException {
+    int hash = hashCode(key);
+    long child_recid = _children[hash];
+    if (child_recid == 0) {
+      // not bucket/page --> not found
+      return null;
+    } else {
+      HashNode node = (HashNode) _recman.fetch(child_recid);
+      // System.out.println("HashDirectory.remove() child is : "+node);
+
+      if (node instanceof HashDirectory) {
+        // recurse into next directory level
+        HashDirectory dir = (HashDirectory) node;
+        dir.setPersistenceContext(_recman, child_recid);
+        Object existing = dir.remove(key);
+        if (existing != null) {
+          if (dir.isEmpty()) {
+            // delete empty directory
+            _recman.delete(child_recid);
+            _children[hash] = 0;
+            _recman.update(_recid, this);
+          }
+        }
+        return existing;
+      } else {
+        // node is a bucket
+        HashBucket bucket = (HashBucket) node;
+        Object existing = bucket.removeElement(key);
+        if (existing != null) {
+          if (bucket.getElementCount() >= 1) {
+            _recman.update(child_recid, bucket);
+          } else {
+            // delete bucket, it's empty
+            _recman.delete(child_recid);
+            _children[hash] = 0;
+            _recman.update(_recid, this);
+          }
         }
+        return existing;
+      }
     }
+  }
 
+  /**
+   * Calculates the hashcode of a key, based on the current directory depth.
+   */
+  private int hashCode(Object key) {
+    int hashMask = hashMask();
+    int hash = key.hashCode();
+    hash = hash & hashMask;
+    hash = hash >>> ((MAX_DEPTH - _depth) * BIT_SIZE);
+    hash = hash % MAX_CHILDREN;
+    /*
+     * System.out.println("HashDirectory.hashCode() is: 0x"
+     * +Integer.toHexString(hash) +" for object hashCode() 0x"
+     * +Integer.toHexString(key.hashCode()));
+     */
+    return hash;
+  }
 
-    /**
-     * Associates the specified value with the specified key.
-     *
-     * @param key key with which the specified value is to be assocated.
-     * @param value value to be associated with the specified key.
-     * @return object which was previously associated with the given key,
-     *          or <code>null</code> if no association existed.
+  /**
+   * Calculates the hashmask of this directory. The hashmask is the bit mask
+   * applied to a hashcode to retain only bits that are relevant to this
+   * directory level.
+   */
+  int hashMask() {
+    int bits = MAX_CHILDREN - 1;
+    int hashMask = bits << ((MAX_DEPTH - _depth) * BIT_SIZE);
+    /*
+     * System.out.println("HashDirectory.hashMask() is: 0x"
+     * +Integer.toHexString(hashMask));
      */
-    Object put(Object key, Object value)
-    throws IOException {
-        if (value == null) {
-            return remove(key);
-        }
-        int hash = hashCode(key);
-        long child_recid = _children[hash];
-        if (child_recid == 0) {
-            // no bucket/page here yet, let's create a bucket
-            HashBucket bucket = new HashBucket(_depth+1);
+    return hashMask;
+  }
 
-            // insert (key,value) pair in bucket
-            Object existing = bucket.addElement(key, value);
+  /**
+   * Returns an enumeration of the keys contained in this
+   */
+  FastIterator keys() throws IOException {
+    return new HDIterator(true);
+  }
 
-            long b_recid = _recman.insert(bucket);
-            _children[hash] = b_recid;
+  /**
+   * Returns an enumeration of the values contained in this
+   */
+  FastIterator values() throws IOException {
+    return new HDIterator(false);
+  }
 
-            _recman.update(_recid, this);
+  /**
+   * Implement Externalizable interface
+   */
+  public void writeExternal(ObjectOutput out) throws IOException {
+    out.writeByte(_depth);
+    out.writeObject(_children);
+  }
 
-            // System.out.println("Added: "+bucket);
-            return existing;
-        } else {
-            HashNode node = (HashNode) _recman.fetch( child_recid );
+  /**
+   * Implement Externalizable interface
+   */
+  public synchronized void readExternal(ObjectInput in) throws IOException,
+      ClassNotFoundException {
+    _depth = in.readByte();
+    _children = (long[]) in.readObject();
+  }
 
-            if ( node instanceof HashDirectory ) {
-                // recursive insert in next directory level
-                HashDirectory dir = (HashDirectory) node;
-                dir.setPersistenceContext( _recman, child_recid );
-                return dir.put( key, value );
-            } else {
-                // node is a bucket
-                HashBucket bucket = (HashBucket)node;
-                if (bucket.hasRoom()) {
-                    Object existing = bucket.addElement(key, value);
-                    _recman.update(child_recid, bucket);
-                    // System.out.println("Added: "+bucket);
-                    return existing;
-                } else {
-                    // overflow, so create a new directory
-                    if (_depth == MAX_DEPTH) {
-                        throw new RuntimeException( "Cannot create deeper directory. "
-                                                    + "Depth=" + _depth );
-                    }
-                    HashDirectory dir = new HashDirectory( (byte) (_depth+1) );
-                    long dir_recid = _recman.insert( dir );
-                    dir.setPersistenceContext( _recman, dir_recid );
-
-                    _children[hash] = dir_recid;
-                    _recman.update( _recid, this );
-
-                    // discard overflown bucket
-                    _recman.delete( child_recid );
-
-                    // migrate existing bucket elements
-                    ArrayList keys = bucket.getKeys();
-                    ArrayList values = bucket.getValues();
-                    int entries = keys.size();
-                    for ( int i=0; i<entries; i++ ) {
-                        dir.put( keys.get( i ), values.get( i ) );
-                    }
-
-                    // (finally!) insert new element
-                    return dir.put( key, value );
-                }
-            }
-        }
-    }
+  // //////////////////////////////////////////////////////////////////////
+  // INNER CLASS
+  // //////////////////////////////////////////////////////////////////////
 
+  /**
+   * Utility class to enumerate keys/values in a HTree
+   */
+  public class HDIterator extends FastIterator {
 
     /**
-     * Remove the value which is associated with the given key.  If the
-     * key does not exist, this method simply ignores the operation.
-     *
-     * @param key key whose associated value is to be removed
-     * @return object which was associated with the given key, or
-     *          <code>null</code> if no association existed with given key.
+     * True if we're iterating on keys, False if enumerating on values.
      */
-    Object remove(Object key) throws IOException {
-        int hash = hashCode(key);
-        long child_recid = _children[hash];
-        if (child_recid == 0) {
-            // not bucket/page --> not found
-            return null;
-        } else {
-            HashNode node = (HashNode) _recman.fetch( child_recid );
-            // System.out.println("HashDirectory.remove() child is : "+node);
-
-            if (node instanceof HashDirectory) {
-                // recurse into next directory level
-                HashDirectory dir = (HashDirectory)node;
-                dir.setPersistenceContext( _recman, child_recid );
-                Object existing = dir.remove(key);
-                if (existing != null) {
-                    if (dir.isEmpty()) {
-                        // delete empty directory
-                        _recman.delete(child_recid);
-                        _children[hash] = 0;
-                        _recman.update(_recid, this);
-                    }
-                }
-                return existing;
-            } else {
-                // node is a bucket
-                HashBucket bucket = (HashBucket)node;
-                Object existing = bucket.removeElement(key);
-                if (existing != null) {
-                    if (bucket.getElementCount() >= 1) {
-                        _recman.update(child_recid, bucket);
-                    } else {
-                        // delete bucket, it's empty
-                        _recman.delete(child_recid);
-                        _children[hash] = 0;
-                        _recman.update(_recid, this);
-                    }
-                }
-                return existing;
-            }
-        }
-    }
+    private final boolean _iterateKeys;
 
     /**
-     * Calculates the hashcode of a key, based on the current directory
-     * depth.
+     * Stacks of directories & last enumerated child position
      */
-    private int hashCode(Object key) {
-        int hashMask = hashMask();
-        int hash = key.hashCode();
-        hash = hash & hashMask;
-        hash = hash >>> ((MAX_DEPTH - _depth) * BIT_SIZE);
-        hash = hash % MAX_CHILDREN;
-        /*
-        System.out.println("HashDirectory.hashCode() is: 0x"
-                           +Integer.toHexString(hash)
-                           +" for object hashCode() 0x"
-                           +Integer.toHexString(key.hashCode()));
-        */
-        return hash;
-    }
+    private final ArrayList _dirStack;
+    private final ArrayList _childStack;
 
     /**
-     * Calculates the hashmask of this directory.  The hashmask is the
-     * bit mask applied to a hashcode to retain only bits that are
-     * relevant to this directory level.
+     * Current HashDirectory in the hierarchy
      */
-    int hashMask() {
-        int bits = MAX_CHILDREN-1;
-        int hashMask = bits << ((MAX_DEPTH - _depth) * BIT_SIZE);
-        /*
-        System.out.println("HashDirectory.hashMask() is: 0x"
-                           +Integer.toHexString(hashMask));
-        */
-        return hashMask;
-    }
+    private HashDirectory _dir;
 
     /**
-     * Returns an enumeration of the keys contained in this
+     * Current child position
      */
-    FastIterator keys()
-        throws IOException
-    {
-        return new HDIterator( true );
-    }
+    private int _child;
 
     /**
-     * Returns an enumeration of the values contained in this
+     * Current bucket iterator
      */
-    FastIterator values()
-        throws IOException
-    {
-        return new HDIterator( false );
-    }
-
+    private Iterator _iter;
 
     /**
-     * Implement Externalizable interface
+     * Construct an iterator on this directory.
+     * 
+     * @param iterateKeys
+     *          True if iteration supplies keys, False if iterateKeys supplies
+     *          values.
      */
-    public void writeExternal(ObjectOutput out)
-    throws IOException {
-        out.writeByte(_depth);
-        out.writeObject(_children);
-    }
-
+    HDIterator(boolean iterateKeys) throws IOException {
+      _dirStack = new ArrayList();
+      _childStack = new ArrayList();
+      _dir = HashDirectory.this;
+      _child = -1;
+      _iterateKeys = iterateKeys;
 
-    /**
-     * Implement Externalizable interface
-     */
-    public synchronized void readExternal(ObjectInput in)
-    throws IOException, ClassNotFoundException {
-        _depth = in.readByte();
-        _children = (long[])in.readObject();
+      prepareNext();
     }
 
-
-    ////////////////////////////////////////////////////////////////////////
-    // INNER CLASS
-    ////////////////////////////////////////////////////////////////////////
-
     /**
-     * Utility class to enumerate keys/values in a HTree
+     * Returns the next object.
      */
-    public class HDIterator
-        extends FastIterator
-    {
-
-        /**
-         * True if we're iterating on keys, False if enumerating on values.
-         */
-        private boolean _iterateKeys;
-
-        /**
-         * Stacks of directories & last enumerated child position
-         */
-        private ArrayList _dirStack;
-        private ArrayList _childStack;
-
-        /**
-         * Current HashDirectory in the hierarchy
-         */
-        private HashDirectory _dir;
-
-        /**
-         * Current child position
-         */
-        private int _child;
-
-        /**
-         * Current bucket iterator
-         */
-        private Iterator _iter;
-
-
-        /**
-         * Construct an iterator on this directory.
-         *
-         * @param iterateKeys True if iteration supplies keys, False
-         *                  if iterateKeys supplies values.
-         */
-        HDIterator( boolean iterateKeys )
-            throws IOException
-        {
-            _dirStack = new ArrayList();
-            _childStack = new ArrayList();
-            _dir = HashDirectory.this;
-            _child = -1;
-            _iterateKeys = iterateKeys;
-
-            prepareNext();
+    @Override
+    public Object next() {
+      Object next = null;
+      if (_iter != null && _iter.hasNext()) {
+        next = _iter.next();
+      } else {
+        try {
+          prepareNext();
+        } catch (IOException except) {
+          throw new IterationException(except);
         }
-
-
-        /**
-         * Returns the next object.
-         */
-        public Object next()
-        {   
-            Object next = null;      
-            if( _iter != null && _iter.hasNext() ) {
-              next = _iter.next();
-            } else {
-              try {
-                prepareNext();
-              } catch ( IOException except ) {
-                throw new IterationException( except );
-              }
-              if ( _iter != null && _iter.hasNext() ) {
-                return next();
-              }
-            }
-            return next;         
+        if (_iter != null && _iter.hasNext()) {
+          return next();
         }
+      }
+      return next;
+    }
 
+    /**
+     * Prepare internal state so we can answer <code>hasMoreElements</code>
+     * 
+     * Actually, this code prepares an Enumeration on the next Bucket to
+     * enumerate. If no following bucket is found, the next Enumeration is set
+     * to <code>null</code>.
+     */
+    private void prepareNext() throws IOException {
+      long child_recid = 0;
+
+      // find next bucket/directory to enumerate
+      do {
+        _child++;
+        if (_child >= MAX_CHILDREN) {
+
+          if (_dirStack.isEmpty()) {
+            // no more directory in the stack, we're finished
+            return;
+          }
+
+          // try next page
+          _dir = (HashDirectory) _dirStack.remove(_dirStack.size() - 1);
+          _child = ((Integer) _childStack.remove(_childStack.size() - 1))
+              .intValue();
+          continue;
+        }
+        child_recid = _dir._children[_child];
+      } while (child_recid == 0);
 
-        /**
-         * Prepare internal state so we can answer <code>hasMoreElements</code>
-         *
-         * Actually, this code prepares an Enumeration on the next
-         * Bucket to enumerate.   If no following bucket is found,
-         * the next Enumeration is set to <code>null</code>.
-         */
-        private void prepareNext() throws IOException {
-            long child_recid = 0;
-
-            // find next bucket/directory to enumerate
-            do {
-                _child++;
-                if (_child >= MAX_CHILDREN) {
-
-                    if (_dirStack.isEmpty()) {
-                        // no more directory in the stack, we're finished
-                        return;
-                    }
-
-                    // try next page
-                    _dir = (HashDirectory) _dirStack.remove( _dirStack.size()-1 );
-                    _child = ( (Integer) _childStack.remove( _childStack.size()-1 ) ).intValue();
-                    continue;
-                }
-                child_recid = _dir._children[_child];
-            } while (child_recid == 0);
-
-            if (child_recid == 0) {
-                throw new Error("child_recid cannot be 0");
-            }
-
-            HashNode node = (HashNode) _recman.fetch( child_recid );
-            // System.out.println("HDEnumeration.get() child is : "+node);
- 
-            if ( node instanceof HashDirectory ) {
-                // save current position
-                _dirStack.add( _dir );
-                _childStack.add( new Integer( _child ) );
-
-                _dir = (HashDirectory)node;
-                _child = -1;
-
-                // recurse into
-                _dir.setPersistenceContext( _recman, child_recid );
-                prepareNext();
-            } else {
-                // node is a bucket
-                HashBucket bucket = (HashBucket)node;
-                if ( _iterateKeys ) {
-                    _iter = bucket.getKeys().iterator();
-                } else {
-                    _iter = bucket.getValues().iterator();
-                }
-            }
+      if (child_recid == 0) {
+        throw new Error("child_recid cannot be 0");
+      }
+
+      HashNode node = (HashNode) _recman.fetch(child_recid);
+      // System.out.println("HDEnumeration.get() child is : "+node);
+
+      if (node instanceof HashDirectory) {
+        // save current position
+        _dirStack.add(_dir);
+        _childStack.add(new Integer(_child));
+
+        _dir = (HashDirectory) node;
+        _child = -1;
+
+        // recurse into
+        _dir.setPersistenceContext(_recman, child_recid);
+        prepareNext();
+      } else {
+        // node is a bucket
+        HashBucket bucket = (HashBucket) node;
+        if (_iterateKeys) {
+          _iter = bucket.getKeys().iterator();
+        } else {
+          _iter = bucket.getValues().iterator();
         }
+      }
     }
+  }
 
 }
-

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashNode.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashNode.java?rev=901644&r1=901643&r2=901644&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashNode.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashNode.java Thu Jan 21 10:37:58 2010
@@ -67,14 +67,14 @@
 import java.io.Serializable;
 
 /**
- *  Abstract class for Hashtable directory nodes
- *
- *  @author <a href="mailto:boisvert@intalio.com">Alex Boisvert</a>
- *  @version $Id: HashNode.java,v 1.2 2003/03/21 02:54:58 boisvert Exp $
+ * Abstract class for Hashtable directory nodes
+ * 
+ * @author <a href="mailto:boisvert@intalio.com">Alex Boisvert</a>
+ * @version $Id: HashNode.java,v 1.2 2003/03/21 02:54:58 boisvert Exp $
  */
 class HashNode implements Serializable {
 
-    // Empty, there's no common functionality.  We use this abstract
-    // class for typing only.
+  // Empty, there's no common functionality. We use this abstract
+  // class for typing only.
 
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BaseRecordManager.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BaseRecordManager.java?rev=901644&r1=901643&r2=901644&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BaseRecordManager.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BaseRecordManager.java Thu Jan 21 10:37:58 2010
@@ -66,456 +66,411 @@
 
 package org.apache.hadoop.hive.ql.util.jdbm.recman;
 
-import java.io.IOException;
 import java.io.File;
-
+import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.hadoop.hive.ql.util.jdbm.RecordManager;
-import org.apache.hadoop.hive.ql.util.jdbm.helper.Serializer;
 import org.apache.hadoop.hive.ql.util.jdbm.helper.DefaultSerializer;
+import org.apache.hadoop.hive.ql.util.jdbm.helper.Serializer;
 
 /**
- *  This class manages records, which are uninterpreted blobs of data. The
- *  set of operations is simple and straightforward: you communicate with
- *  the class using long "rowids" and byte[] data blocks. Rowids are returned
- *  on inserts and you can stash them away someplace safe to be able to get
- *  back to them. Data blocks can be as long as you wish, and may have
- *  lengths different from the original when updating.
- *  <p>
- *  Operations are synchronized, so that only one of them will happen
- *  concurrently even if you hammer away from multiple threads. Operations
- *  are made atomic by keeping a transaction log which is recovered after
- *  a crash, so the operations specified by this interface all have ACID
- *  properties.
- *  <p>
- *  You identify a file by just the name. The package attaches <tt>.db</tt>
- *  for the database file, and <tt>.lg</tt> for the transaction log. The
- *  transaction log is synchronized regularly and then restarted, so don't
- *  worry if you see the size going up and down.
- *
+ * This class manages records, which are uninterpreted blobs of data. The set of
+ * operations is simple and straightforward: you communicate with the class
+ * using long "rowids" and byte[] data blocks. Rowids are returned on inserts
+ * and you can stash them away someplace safe to be able to get back to them.
+ * Data blocks can be as long as you wish, and may have lengths different from
+ * the original when updating.
+ * <p>
+ * Operations are synchronized, so that only one of them will happen
+ * concurrently even if you hammer away from multiple threads. Operations are
+ * made atomic by keeping a transaction log which is recovered after a crash, so
+ * the operations specified by this interface all have ACID properties.
+ * <p>
+ * You identify a file by just the name. The package attaches <tt>.db</tt> for
+ * the database file, and <tt>.lg</tt> for the transaction log. The transaction
+ * log is synchronized regularly and then restarted, so don't worry if you see
+ * the size going up and down.
+ * 
  * @author <a href="mailto:boisvert@intalio.com">Alex Boisvert</a>
  * @author <a href="cg@cdegroot.com">Cees de Groot</a>
  * @version $Id: BaseRecordManager.java,v 1.8 2005/06/25 23:12:32 doomdark Exp $
  */
-public final class BaseRecordManager
-    implements RecordManager
-{
-
-    /**
-     * Underlying record file.
-     */
-    private RecordFile _file;
-
-
-    /**
-     * Physical row identifier manager.
-     */
-    private PhysicalRowIdManager _physMgr;
-
-
-    /**
-     * Logigal to Physical row identifier manager.
-     */
-    private LogicalRowIdManager _logMgr;
-
-
-    /**
-     * Page manager.
-     */
-    private PageManager _pageman;
-
-
-    /**
-     * Reserved slot for name directory.
-     */
-    public static final int NAME_DIRECTORY_ROOT = 0;
-
-
-    /**
-     * Static debugging flag
-     */
-    public static final boolean DEBUG = false;
-
-    
-    /**
-     * Directory of named JDBMHashtables.  This directory is a persistent
-     * directory, stored as a Hashtable.  It can be retrived by using
-     * the NAME_DIRECTORY_ROOT.
-     */
-    private Map _nameDirectory;
-
-
-    /**
-     *  Creates a record manager for the indicated file
-     *
-     *  @throws IOException when the file cannot be opened or is not
-     *          a valid file content-wise.
-     */
-    public BaseRecordManager( String filename )
-        throws IOException
-    {
-        _file = new RecordFile( filename );
-        _pageman = new PageManager( _file );
-        _physMgr = new PhysicalRowIdManager( _file, _pageman );
-        _logMgr = new LogicalRowIdManager( _file, _pageman );
-    }
-    
-     /**
-     *  Creates a record manager for the indicated file
-     *
-     *  @throws IOException when the file cannot be opened or is not
-     *          a valid file content-wise.
-     */
-    public BaseRecordManager( File file )
-        throws IOException
-    {
-        _file = new RecordFile( file );
-        _pageman = new PageManager( _file );
-        _physMgr = new PhysicalRowIdManager( _file, _pageman );
-        _logMgr = new LogicalRowIdManager( _file, _pageman );
-    }
-    
-    
-
-
-    /**
-     *  Get the underlying Transaction Manager
-     */
-    public synchronized TransactionManager getTransactionManager()
-    {
-        checkIfClosed();
-
-        return _file.txnMgr;
-    }
-
-
-    /**
-     *  Switches off transactioning for the record manager. This means
-     *  that a) a transaction log is not kept, and b) writes aren't
-     *  synch'ed after every update. This is useful when batch inserting
-     *  into a new database.
-     *  <p>
-     *  Only call this method directly after opening the file, otherwise
-     *  the results will be undefined.
-     */
-    public synchronized void disableTransactions()
-    {
-        checkIfClosed();
-
-        _file.disableTransactions();
-    }
-
-    
-    /**
-     *  Closes the record manager.
-     *
-     *  @throws IOException when one of the underlying I/O operations fails.
-     */
-    public synchronized void close()
-        throws IOException
-    {
-        checkIfClosed();
-
-        _pageman.close();
-        _pageman = null;
-
-        _file.close();
-        _file = null;
-    }
-
-
-    /**
-     *  Inserts a new record using standard java object serialization.
-     *
-     *  @param obj the object for the new record.
-     *  @return the rowid for the new record.
-     *  @throws IOException when one of the underlying I/O operations fails.
-     */
-    public long insert( Object obj )
-        throws IOException
-    {
-        return insert( obj, DefaultSerializer.INSTANCE );
-    }
-
-    
-    /**
-     *  Inserts a new record using a custom serializer.
-     *
-     *  @param obj the object for the new record.
-     *  @param serializer a custom serializer
-     *  @return the rowid for the new record.
-     *  @throws IOException when one of the underlying I/O operations fails.
-     */
-    public synchronized long insert( Object obj, Serializer serializer )
-        throws IOException
-    {
-        byte[]    data;
-        long      recid;
-        Location  physRowId;
-        
-        checkIfClosed();
-
-        data = serializer.serialize( obj );
-        physRowId = _physMgr.insert( data, 0, data.length );
-        recid = _logMgr.insert( physRowId ).toLong();
-        if ( DEBUG ) {
-            System.out.println( "BaseRecordManager.insert() recid " + recid + " length " + data.length ) ;
-        }
-        return recid;
-    }
-
-    /**
-     *  Deletes a record.
-     *
-     *  @param recid the rowid for the record that should be deleted.
-     *  @throws IOException when one of the underlying I/O operations fails.
-     */
-    public synchronized void delete( long recid )
-        throws IOException
-    {
-        checkIfClosed();
-        if ( recid <= 0 ) {
-            throw new IllegalArgumentException( "Argument 'recid' is invalid: "
-                                                + recid );
-        }
-
-        if ( DEBUG ) {
-            System.out.println( "BaseRecordManager.delete() recid " + recid ) ;
-        }
-
-        Location logRowId = new Location( recid );
-        Location physRowId = _logMgr.fetch( logRowId );
-        _physMgr.delete( physRowId );
-        _logMgr.delete( logRowId );
-    }
-
-
-    /**
-     *  Updates a record using standard java object serialization.
-     *
-     *  @param recid the recid for the record that is to be updated.
-     *  @param obj the new object for the record.
-     *  @throws IOException when one of the underlying I/O operations fails.
-     */
-    public void update( long recid, Object obj )
-        throws IOException
-    {
-        update( recid, obj, DefaultSerializer.INSTANCE );
-    }
-
-    
-    /**
-     *  Updates a record using a custom serializer.
-     *
-     *  @param recid the recid for the record that is to be updated.
-     *  @param obj the new object for the record.
-     *  @param serializer a custom serializer
-     *  @throws IOException when one of the underlying I/O operations fails.
-     */
-    public synchronized void update( long recid, Object obj, Serializer serializer )
-        throws IOException
-    {
-        checkIfClosed();
-        if ( recid <= 0 ) {
-            throw new IllegalArgumentException( "Argument 'recid' is invalid: "
-                                                + recid );
-        }
-
-        Location logRecid = new Location( recid );
-        Location physRecid = _logMgr.fetch( logRecid );
-        
-        byte[] data = serializer.serialize( obj );
-        if ( DEBUG ) {
-            System.out.println( "BaseRecordManager.update() recid " + recid + " length " + data.length ) ;
-        }
-        
-        Location newRecid = _physMgr.update( physRecid, data, 0, data.length );
-        if ( ! newRecid.equals( physRecid ) ) {
-            _logMgr.update( logRecid, newRecid );
-        }
-    }
-
-
-    /**
-     *  Fetches a record using standard java object serialization.
-     *
-     *  @param recid the recid for the record that must be fetched.
-     *  @return the object contained in the record.
-     *  @throws IOException when one of the underlying I/O operations fails.
-     */
-    public Object fetch( long recid )
-        throws IOException
-    {
-        return fetch( recid, DefaultSerializer.INSTANCE );
-    }
-
-
-    /**
-     *  Fetches a record using a custom serializer.
-     *
-     *  @param recid the recid for the record that must be fetched.
-     *  @param serializer a custom serializer
-     *  @return the object contained in the record.
-     *  @throws IOException when one of the underlying I/O operations fails.
-     */
-    public synchronized Object fetch( long recid, Serializer serializer )
-        throws IOException
-    {
-        byte[] data;
-
-        checkIfClosed();
-        if ( recid <= 0 ) {
-            throw new IllegalArgumentException( "Argument 'recid' is invalid: "
-                                                + recid );
-        }
-        data = _physMgr.fetch( _logMgr.fetch( new Location( recid ) ) );
-        if ( DEBUG ) {
-            System.out.println( "BaseRecordManager.fetch() recid " + recid + " length " + data.length ) ;
-        }
-        return serializer.deserialize( data );
-    }
-
-
-    /**
-     *  Returns the number of slots available for "root" rowids. These slots
-     *  can be used to store special rowids, like rowids that point to
-     *  other rowids. Root rowids are useful for bootstrapping access to
-     *  a set of data.
-     */
-    public int getRootCount()
-    {
-        return FileHeader.NROOTS;
-    }
-
-    /**
-     *  Returns the indicated root rowid.
-     *
-     *  @see #getRootCount
-     */
-    public synchronized long getRoot( int id )
-        throws IOException
-    {
-        checkIfClosed();
-
-        return _pageman.getFileHeader().getRoot( id );
-    }
-
-
-    /**
-     *  Sets the indicated root rowid.
-     *
-     *  @see #getRootCount
-     */
-    public synchronized void setRoot( int id, long rowid )
-        throws IOException
-    {
-        checkIfClosed();
-
-        _pageman.getFileHeader().setRoot( id, rowid );
-    }
-
-
-    /**
-     * Obtain the record id of a named object. Returns 0 if named object
-     * doesn't exist.
-     */
-    public long getNamedObject( String name )
-        throws IOException
-    {
-        checkIfClosed();
-
-        Map nameDirectory = getNameDirectory();
-        Long recid = (Long) nameDirectory.get( name );
-        if ( recid == null ) {
-            return 0;
-        }
-        return recid.longValue();
-    }
-
-    /**
-     * Set the record id of a named object.
-     */
-    public void setNamedObject( String name, long recid )
-        throws IOException
-    {
-        checkIfClosed();
-
-        Map nameDirectory = getNameDirectory();
-        if ( recid == 0 ) {
-            // remove from hashtable
-            nameDirectory.remove( name );
-        } else {
-            nameDirectory.put( name, new Long( recid ) );
-        }
-        saveNameDirectory( nameDirectory );
-    }
-
-
-    /**
-     * Commit (make persistent) all changes since beginning of transaction.
-     */
-    public synchronized void commit()
-        throws IOException
-    {
-        checkIfClosed();
-
-        _pageman.commit();
-    }
-
-
-    /**
-     * Rollback (cancel) all changes since beginning of transaction.
-     */
-    public synchronized void rollback()
-        throws IOException
-    {
-        checkIfClosed();
-
-        _pageman.rollback();
-    }
-
-
-    /**
-     * Load name directory
-     */
-    private Map getNameDirectory()
-        throws IOException
-    {
-        // retrieve directory of named hashtable
-        long nameDirectory_recid = getRoot( NAME_DIRECTORY_ROOT );
-        if ( nameDirectory_recid == 0 ) {
-            _nameDirectory = new HashMap();
-            nameDirectory_recid = insert( _nameDirectory );
-            setRoot( NAME_DIRECTORY_ROOT, nameDirectory_recid );
-        } else {
-            _nameDirectory = (Map) fetch( nameDirectory_recid );
-        }
-        return _nameDirectory;
-    }
-
-
-    private void saveNameDirectory( Map directory )
-        throws IOException
-    {
-        long recid = getRoot( NAME_DIRECTORY_ROOT );
-        if ( recid == 0 ) {
-            throw new IOException( "Name directory must exist" );
-        }
-        update( recid, _nameDirectory );
-    }
-
-
-    /**
-     * Check if RecordManager has been closed.  If so, throw an
-     * IllegalStateException.
-     */
-    private void checkIfClosed()
-        throws IllegalStateException
-    {
-        if ( _file == null ) {
-            throw new IllegalStateException( "RecordManager has been closed" );
-        }
+public final class BaseRecordManager implements RecordManager {
+
+  /**
+   * Underlying record file.
+   */
+  private RecordFile _file;
+
+  /**
+   * Physical row identifier manager.
+   */
+  private final PhysicalRowIdManager _physMgr;
+
+  /**
+   * Logigal to Physical row identifier manager.
+   */
+  private final LogicalRowIdManager _logMgr;
+
+  /**
+   * Page manager.
+   */
+  private PageManager _pageman;
+
+  /**
+   * Reserved slot for name directory.
+   */
+  public static final int NAME_DIRECTORY_ROOT = 0;
+
+  /**
+   * Static debugging flag
+   */
+  public static final boolean DEBUG = false;
+
+  /**
+   * Directory of named JDBMHashtables. This directory is a persistent
+   * directory, stored as a Hashtable. It can be retrived by using the
+   * NAME_DIRECTORY_ROOT.
+   */
+  private Map _nameDirectory;
+
+  /**
+   * Creates a record manager for the indicated file
+   * 
+   * @throws IOException
+   *           when the file cannot be opened or is not a valid file
+   *           content-wise.
+   */
+  public BaseRecordManager(String filename) throws IOException {
+    _file = new RecordFile(filename);
+    _pageman = new PageManager(_file);
+    _physMgr = new PhysicalRowIdManager(_file, _pageman);
+    _logMgr = new LogicalRowIdManager(_file, _pageman);
+  }
+
+  /**
+   * Creates a record manager for the indicated file
+   * 
+   * @throws IOException
+   *           when the file cannot be opened or is not a valid file
+   *           content-wise.
+   */
+  public BaseRecordManager(File file) throws IOException {
+    _file = new RecordFile(file);
+    _pageman = new PageManager(_file);
+    _physMgr = new PhysicalRowIdManager(_file, _pageman);
+    _logMgr = new LogicalRowIdManager(_file, _pageman);
+  }
+
+  /**
+   * Get the underlying Transaction Manager
+   */
+  public synchronized TransactionManager getTransactionManager() {
+    checkIfClosed();
+
+    return _file.txnMgr;
+  }
+
+  /**
+   * Switches off transactioning for the record manager. This means that a) a
+   * transaction log is not kept, and b) writes aren't synch'ed after every
+   * update. This is useful when batch inserting into a new database.
+   * <p>
+   * Only call this method directly after opening the file, otherwise the
+   * results will be undefined.
+   */
+  public synchronized void disableTransactions() {
+    checkIfClosed();
+
+    _file.disableTransactions();
+  }
+
+  /**
+   * Closes the record manager.
+   * 
+   * @throws IOException
+   *           when one of the underlying I/O operations fails.
+   */
+  public synchronized void close() throws IOException {
+    checkIfClosed();
+
+    _pageman.close();
+    _pageman = null;
+
+    _file.close();
+    _file = null;
+  }
+
+  /**
+   * Inserts a new record using standard java object serialization.
+   * 
+   * @param obj
+   *          the object for the new record.
+   * @return the rowid for the new record.
+   * @throws IOException
+   *           when one of the underlying I/O operations fails.
+   */
+  public long insert(Object obj) throws IOException {
+    return insert(obj, DefaultSerializer.INSTANCE);
+  }
+
+  /**
+   * Inserts a new record using a custom serializer.
+   * 
+   * @param obj
+   *          the object for the new record.
+   * @param serializer
+   *          a custom serializer
+   * @return the rowid for the new record.
+   * @throws IOException
+   *           when one of the underlying I/O operations fails.
+   */
+  public synchronized long insert(Object obj, Serializer serializer)
+      throws IOException {
+    byte[] data;
+    long recid;
+    Location physRowId;
+
+    checkIfClosed();
+
+    data = serializer.serialize(obj);
+    physRowId = _physMgr.insert(data, 0, data.length);
+    recid = _logMgr.insert(physRowId).toLong();
+    if (DEBUG) {
+      System.out.println("BaseRecordManager.insert() recid " + recid
+          + " length " + data.length);
+    }
+    return recid;
+  }
+
+  /**
+   * Deletes a record.
+   * 
+   * @param recid
+   *          the rowid for the record that should be deleted.
+   * @throws IOException
+   *           when one of the underlying I/O operations fails.
+   */
+  public synchronized void delete(long recid) throws IOException {
+    checkIfClosed();
+    if (recid <= 0) {
+      throw new IllegalArgumentException("Argument 'recid' is invalid: "
+          + recid);
+    }
+
+    if (DEBUG) {
+      System.out.println("BaseRecordManager.delete() recid " + recid);
+    }
+
+    Location logRowId = new Location(recid);
+    Location physRowId = _logMgr.fetch(logRowId);
+    _physMgr.delete(physRowId);
+    _logMgr.delete(logRowId);
+  }
+
+  /**
+   * Updates a record using standard java object serialization.
+   * 
+   * @param recid
+   *          the recid for the record that is to be updated.
+   * @param obj
+   *          the new object for the record.
+   * @throws IOException
+   *           when one of the underlying I/O operations fails.
+   */
+  public void update(long recid, Object obj) throws IOException {
+    update(recid, obj, DefaultSerializer.INSTANCE);
+  }
+
+  /**
+   * Updates a record using a custom serializer.
+   * 
+   * @param recid
+   *          the recid for the record that is to be updated.
+   * @param obj
+   *          the new object for the record.
+   * @param serializer
+   *          a custom serializer
+   * @throws IOException
+   *           when one of the underlying I/O operations fails.
+   */
+  public synchronized void update(long recid, Object obj, Serializer serializer)
+      throws IOException {
+    checkIfClosed();
+    if (recid <= 0) {
+      throw new IllegalArgumentException("Argument 'recid' is invalid: "
+          + recid);
+    }
+
+    Location logRecid = new Location(recid);
+    Location physRecid = _logMgr.fetch(logRecid);
+
+    byte[] data = serializer.serialize(obj);
+    if (DEBUG) {
+      System.out.println("BaseRecordManager.update() recid " + recid
+          + " length " + data.length);
+    }
+
+    Location newRecid = _physMgr.update(physRecid, data, 0, data.length);
+    if (!newRecid.equals(physRecid)) {
+      _logMgr.update(logRecid, newRecid);
+    }
+  }
+
+  /**
+   * Fetches a record using standard java object serialization.
+   * 
+   * @param recid
+   *          the recid for the record that must be fetched.
+   * @return the object contained in the record.
+   * @throws IOException
+   *           when one of the underlying I/O operations fails.
+   */
+  public Object fetch(long recid) throws IOException {
+    return fetch(recid, DefaultSerializer.INSTANCE);
+  }
+
+  /**
+   * Fetches a record using a custom serializer.
+   * 
+   * @param recid
+   *          the recid for the record that must be fetched.
+   * @param serializer
+   *          a custom serializer
+   * @return the object contained in the record.
+   * @throws IOException
+   *           when one of the underlying I/O operations fails.
+   */
+  public synchronized Object fetch(long recid, Serializer serializer)
+      throws IOException {
+    byte[] data;
+
+    checkIfClosed();
+    if (recid <= 0) {
+      throw new IllegalArgumentException("Argument 'recid' is invalid: "
+          + recid);
+    }
+    data = _physMgr.fetch(_logMgr.fetch(new Location(recid)));
+    if (DEBUG) {
+      System.out.println("BaseRecordManager.fetch() recid " + recid
+          + " length " + data.length);
+    }
+    return serializer.deserialize(data);
+  }
+
+  /**
+   * Returns the number of slots available for "root" rowids. These slots can be
+   * used to store special rowids, like rowids that point to other rowids. Root
+   * rowids are useful for bootstrapping access to a set of data.
+   */
+  public int getRootCount() {
+    return FileHeader.NROOTS;
+  }
+
+  /**
+   * Returns the indicated root rowid.
+   * 
+   * @see #getRootCount
+   */
+  public synchronized long getRoot(int id) throws IOException {
+    checkIfClosed();
+
+    return _pageman.getFileHeader().getRoot(id);
+  }
+
+  /**
+   * Sets the indicated root rowid.
+   * 
+   * @see #getRootCount
+   */
+  public synchronized void setRoot(int id, long rowid) throws IOException {
+    checkIfClosed();
+
+    _pageman.getFileHeader().setRoot(id, rowid);
+  }
+
+  /**
+   * Obtain the record id of a named object. Returns 0 if named object doesn't
+   * exist.
+   */
+  public long getNamedObject(String name) throws IOException {
+    checkIfClosed();
+
+    Map nameDirectory = getNameDirectory();
+    Long recid = (Long) nameDirectory.get(name);
+    if (recid == null) {
+      return 0;
+    }
+    return recid.longValue();
+  }
+
+  /**
+   * Set the record id of a named object.
+   */
+  public void setNamedObject(String name, long recid) throws IOException {
+    checkIfClosed();
+
+    Map nameDirectory = getNameDirectory();
+    if (recid == 0) {
+      // remove from hashtable
+      nameDirectory.remove(name);
+    } else {
+      nameDirectory.put(name, new Long(recid));
+    }
+    saveNameDirectory(nameDirectory);
+  }
+
+  /**
+   * Commit (make persistent) all changes since beginning of transaction.
+   */
+  public synchronized void commit() throws IOException {
+    checkIfClosed();
+
+    _pageman.commit();
+  }
+
+  /**
+   * Rollback (cancel) all changes since beginning of transaction.
+   */
+  public synchronized void rollback() throws IOException {
+    checkIfClosed();
+
+    _pageman.rollback();
+  }
+
+  /**
+   * Load name directory
+   */
+  private Map getNameDirectory() throws IOException {
+    // retrieve directory of named hashtable
+    long nameDirectory_recid = getRoot(NAME_DIRECTORY_ROOT);
+    if (nameDirectory_recid == 0) {
+      _nameDirectory = new HashMap();
+      nameDirectory_recid = insert(_nameDirectory);
+      setRoot(NAME_DIRECTORY_ROOT, nameDirectory_recid);
+    } else {
+      _nameDirectory = (Map) fetch(nameDirectory_recid);
+    }
+    return _nameDirectory;
+  }
+
+  private void saveNameDirectory(Map directory) throws IOException {
+    long recid = getRoot(NAME_DIRECTORY_ROOT);
+    if (recid == 0) {
+      throw new IOException("Name directory must exist");
+    }
+    update(recid, _nameDirectory);
+  }
+
+  /**
+   * Check if RecordManager has been closed. If so, throw an
+   * IllegalStateException.
+   */
+  private void checkIfClosed() throws IllegalStateException {
+    if (_file == null) {
+      throw new IllegalStateException("RecordManager has been closed");
     }
+  }
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockIo.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockIo.java?rev=901644&r1=901643&r2=901644&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockIo.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockIo.java Thu Jan 21 10:37:58 2010
@@ -65,261 +65,249 @@
 
 package org.apache.hadoop.hive.ql.util.jdbm.recman;
 
-import java.io.*;
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
 
 /**
- *  This class wraps a page-sized byte array and provides methods
- *  to read and write data to and from it. The readers and writers
- *  are just the ones that the rest of the toolkit needs, nothing else.
- *  Values written are compatible with java.io routines.
- *
- *  @see java.io.DataInput
- *  @see java.io.DataOutput
+ * This class wraps a page-sized byte array and provides methods to read and
+ * write data to and from it. The readers and writers are just the ones that the
+ * rest of the toolkit needs, nothing else. Values written are compatible with
+ * java.io routines.
+ * 
+ * @see java.io.DataInput
+ * @see java.io.DataOutput
  */
 public final class BlockIo implements java.io.Externalizable {
 
-    public final static long serialVersionUID = 2L;
+  public final static long serialVersionUID = 2L;
 
-    private long blockId;
+  private long blockId;
 
-    private transient byte[] data; // work area
-    private transient BlockView view = null;
-    private transient boolean dirty = false;
-    private transient int transactionCount = 0;
-
-    /**
-     * Default constructor for serialization
-     */
-    public BlockIo() {
-        // empty
-    }
-
-    /**
-     *  Constructs a new BlockIo instance working on the indicated
-     *  buffer.
-     */
-    BlockIo(long blockId, byte[] data) {
-        // removeme for production version
-        if (blockId > 10000000000L)
-            throw new Error("bogus block id " + blockId);
-        this.blockId = blockId;
-        this.data = data;
-    }
-
-    /**
-     *  Returns the underlying array
-     */
-    byte[] getData() {
-        return data;
-    }
-
-    /**
-     *  Sets the block number. Should only be called by RecordFile.
-     */
-    void setBlockId(long id) {
-        if (isInTransaction())
-            throw new Error("BlockId assigned for transaction block");
-        // removeme for production version
-        if (id > 10000000000L)
-            throw new Error("bogus block id " + id);
-        blockId = id;
-    }
-
-    /**
-     *  Returns the block number.
-     */
-    long getBlockId() {
-        return blockId;
-    }
-
-    /**
-     *  Returns the current view of the block.
-     */
-    public BlockView getView() {
-        return view;
-    }
-
-    /**
-     *  Sets the current view of the block.
-     */
-    public void setView(BlockView view) {
-        this.view = view;
-    }
-
-    /**
-     *  Sets the dirty flag
-     */
-    void setDirty() {
-        dirty = true;
-    }
-
-    /**
-     *  Clears the dirty flag
-     */
-    void setClean() {
-        dirty = false;
-    }
-
-    /**
-     *  Returns true if the dirty flag is set.
-     */
-    boolean isDirty() {
-        return dirty;
-    }
-
-    /**
-     *  Returns true if the block is still dirty with respect to the
-     *  transaction log.
-     */
-    boolean isInTransaction() {
-        return transactionCount != 0;
-    }
-
-    /**
-     *  Increments transaction count for this block, to signal that this
-     *  block is in the log but not yet in the data file. The method also
-     *  takes a snapshot so that the data may be modified in new transactions.
-     */
-    synchronized void incrementTransactionCount() {
-        transactionCount++;
-        // @fixme(alex)
-        setClean();
-    }
-
-    /**
-     *  Decrements transaction count for this block, to signal that this
-     *  block has been written from the log to the data file.
-     */
-    synchronized void decrementTransactionCount() {
-        transactionCount--;
-        if (transactionCount < 0)
-            throw new Error("transaction count on block "
-                            + getBlockId() + " below zero!");
-
-    }
-
-    /**
-     *  Reads a byte from the indicated position
-     */
-    public byte readByte(int pos) {
-        return data[pos];
-    }
-
-    /**
-     *  Writes a byte to the indicated position
-     */
-    public void writeByte(int pos, byte value) {
-        data[pos] = value;
-        setDirty();
-    }
-
-    /**
-     *  Reads a short from the indicated position
-     */
-    public short readShort(int pos) {
-        return (short)
-            (((short) (data[pos+0] & 0xff) << 8) |
-             ((short) (data[pos+1] & 0xff) << 0));
-    }
-
-    /**
-     *  Writes a short to the indicated position
-     */
-    public void writeShort(int pos, short value) {
-        data[pos+0] = (byte)(0xff & (value >> 8));
-        data[pos+1] = (byte)(0xff & (value >> 0));
-        setDirty();
-    }
-
-    /**
-     *  Reads an int from the indicated position
-     */
-    public int readInt(int pos) {
-        return
-            (((int)(data[pos+0] & 0xff) << 24) |
-             ((int)(data[pos+1] & 0xff) << 16) |
-             ((int)(data[pos+2] & 0xff) <<  8) |
-             ((int)(data[pos+3] & 0xff) <<  0));
-    }
-
-    /**
-     *  Writes an int to the indicated position
-     */
-    public void writeInt(int pos, int value) {
-        data[pos+0] = (byte)(0xff & (value >> 24));
-        data[pos+1] = (byte)(0xff & (value >> 16));
-        data[pos+2] = (byte)(0xff & (value >>  8));
-        data[pos+3] = (byte)(0xff & (value >>  0));
-        setDirty();
-    }
-
-    /**
-     *  Reads a long from the indicated position
-     */
-    public long readLong( int pos )
-    {
-        // Contributed by Erwin Bolwidt <ej...@klomp.org>
-        // Gives about 15% performance improvement
-        return
-            ( (long)( ((data[pos+0] & 0xff) << 24) |
-                      ((data[pos+1] & 0xff) << 16) |
-                      ((data[pos+2] & 0xff) <<  8) |
-                      ((data[pos+3] & 0xff)      ) ) << 32 ) |
-            ( (long)( ((data[pos+4] & 0xff) << 24) |
-                      ((data[pos+5] & 0xff) << 16) |
-                      ((data[pos+6] & 0xff) <<  8) |
-                      ((data[pos+7] & 0xff)      ) ) & 0xffffffff );
-        /* Original version by Alex Boisvert.  Might be faster on 64-bit JVMs.
-        return
-            (((long)(data[pos+0] & 0xff) << 56) |
-             ((long)(data[pos+1] & 0xff) << 48) |
-             ((long)(data[pos+2] & 0xff) << 40) |
-             ((long)(data[pos+3] & 0xff) << 32) |
-             ((long)(data[pos+4] & 0xff) << 24) |
-             ((long)(data[pos+5] & 0xff) << 16) |
-             ((long)(data[pos+6] & 0xff) <<  8) |
-             ((long)(data[pos+7] & 0xff) <<  0));
-        */
-    }
-
-    /**
-     *  Writes a long to the indicated position
-     */
-    public void writeLong(int pos, long value) {
-        data[pos+0] = (byte)(0xff & (value >> 56));
-        data[pos+1] = (byte)(0xff & (value >> 48));
-        data[pos+2] = (byte)(0xff & (value >> 40));
-        data[pos+3] = (byte)(0xff & (value >> 32));
-        data[pos+4] = (byte)(0xff & (value >> 24));
-        data[pos+5] = (byte)(0xff & (value >> 16));
-        data[pos+6] = (byte)(0xff & (value >>  8));
-        data[pos+7] = (byte)(0xff & (value >>  0));
-        setDirty();
-    }
-
-    // overrides java.lang.Object
-
-    public String toString() {
-        return "BlockIO("
-            + blockId + ","
-            + dirty + ","
-            + view + ")";
-    }
-
-    // implement externalizable interface
-    public void readExternal(ObjectInput in)
-    throws IOException, ClassNotFoundException {
-        blockId = in.readLong();
-        int length = in.readInt();
-        data = new byte[length];
-        in.readFully(data);
-    }
-
-    // implement externalizable interface
-    public void writeExternal(ObjectOutput out) throws IOException {
-        out.writeLong(blockId);
-        out.writeInt(data.length);
-        out.write(data);
-    }
+  private transient byte[] data; // work area
+  private transient BlockView view = null;
+  private transient boolean dirty = false;
+  private transient int transactionCount = 0;
+
+  /**
+   * Default constructor for serialization
+   */
+  public BlockIo() {
+    // empty
+  }
+
+  /**
+   * Constructs a new BlockIo instance working on the indicated buffer.
+   */
+  BlockIo(long blockId, byte[] data) {
+    // removeme for production version
+    if (blockId > 10000000000L) {
+      throw new Error("bogus block id " + blockId);
+    }
+    this.blockId = blockId;
+    this.data = data;
+  }
+
+  /**
+   * Returns the underlying array
+   */
+  byte[] getData() {
+    return data;
+  }
+
+  /**
+   * Sets the block number. Should only be called by RecordFile.
+   */
+  void setBlockId(long id) {
+    if (isInTransaction()) {
+      throw new Error("BlockId assigned for transaction block");
+    }
+    // removeme for production version
+    if (id > 10000000000L) {
+      throw new Error("bogus block id " + id);
+    }
+    blockId = id;
+  }
+
+  /**
+   * Returns the block number.
+   */
+  long getBlockId() {
+    return blockId;
+  }
+
+  /**
+   * Returns the current view of the block.
+   */
+  public BlockView getView() {
+    return view;
+  }
+
+  /**
+   * Sets the current view of the block.
+   */
+  public void setView(BlockView view) {
+    this.view = view;
+  }
+
+  /**
+   * Sets the dirty flag
+   */
+  void setDirty() {
+    dirty = true;
+  }
+
+  /**
+   * Clears the dirty flag
+   */
+  void setClean() {
+    dirty = false;
+  }
+
+  /**
+   * Returns true if the dirty flag is set.
+   */
+  boolean isDirty() {
+    return dirty;
+  }
+
+  /**
+   * Returns true if the block is still dirty with respect to the transaction
+   * log.
+   */
+  boolean isInTransaction() {
+    return transactionCount != 0;
+  }
+
+  /**
+   * Increments transaction count for this block, to signal that this block is
+   * in the log but not yet in the data file. The method also takes a snapshot
+   * so that the data may be modified in new transactions.
+   */
+  synchronized void incrementTransactionCount() {
+    transactionCount++;
+    // @fixme(alex)
+    setClean();
+  }
+
+  /**
+   * Decrements transaction count for this block, to signal that this block has
+   * been written from the log to the data file.
+   */
+  synchronized void decrementTransactionCount() {
+    transactionCount--;
+    if (transactionCount < 0) {
+      throw new Error("transaction count on block " + getBlockId()
+          + " below zero!");
+    }
+
+  }
+
+  /**
+   * Reads a byte from the indicated position
+   */
+  public byte readByte(int pos) {
+    return data[pos];
+  }
+
+  /**
+   * Writes a byte to the indicated position
+   */
+  public void writeByte(int pos, byte value) {
+    data[pos] = value;
+    setDirty();
+  }
+
+  /**
+   * Reads a short from the indicated position
+   */
+  public short readShort(int pos) {
+    return (short) (((short) (data[pos + 0] & 0xff) << 8) | ((short) (data[pos + 1] & 0xff) << 0));
+  }
+
+  /**
+   * Writes a short to the indicated position
+   */
+  public void writeShort(int pos, short value) {
+    data[pos + 0] = (byte) (0xff & (value >> 8));
+    data[pos + 1] = (byte) (0xff & (value >> 0));
+    setDirty();
+  }
+
+  /**
+   * Reads an int from the indicated position
+   */
+  public int readInt(int pos) {
+    return (((data[pos + 0] & 0xff) << 24) | ((data[pos + 1] & 0xff) << 16)
+        | ((data[pos + 2] & 0xff) << 8) | ((data[pos + 3] & 0xff) << 0));
+  }
+
+  /**
+   * Writes an int to the indicated position
+   */
+  public void writeInt(int pos, int value) {
+    data[pos + 0] = (byte) (0xff & (value >> 24));
+    data[pos + 1] = (byte) (0xff & (value >> 16));
+    data[pos + 2] = (byte) (0xff & (value >> 8));
+    data[pos + 3] = (byte) (0xff & (value >> 0));
+    setDirty();
+  }
+
+  /**
+   * Reads a long from the indicated position
+   */
+  public long readLong(int pos) {
+    // Contributed by Erwin Bolwidt <ej...@klomp.org>
+    // Gives about 15% performance improvement
+    return ((long) (((data[pos + 0] & 0xff) << 24)
+        | ((data[pos + 1] & 0xff) << 16) | ((data[pos + 2] & 0xff) << 8) | ((data[pos + 3] & 0xff))) << 32)
+        | ((long) (((data[pos + 4] & 0xff) << 24)
+            | ((data[pos + 5] & 0xff) << 16) | ((data[pos + 6] & 0xff) << 8) | ((data[pos + 7] & 0xff))) & 0xffffffff);
+    /*
+     * Original version by Alex Boisvert. Might be faster on 64-bit JVMs. return
+     * (((long)(data[pos+0] & 0xff) << 56) | ((long)(data[pos+1] & 0xff) << 48)
+     * | ((long)(data[pos+2] & 0xff) << 40) | ((long)(data[pos+3] & 0xff) << 32)
+     * | ((long)(data[pos+4] & 0xff) << 24) | ((long)(data[pos+5] & 0xff) << 16)
+     * | ((long)(data[pos+6] & 0xff) << 8) | ((long)(data[pos+7] & 0xff) << 0));
+     */
+  }
+
+  /**
+   * Writes a long to the indicated position
+   */
+  public void writeLong(int pos, long value) {
+    data[pos + 0] = (byte) (0xff & (value >> 56));
+    data[pos + 1] = (byte) (0xff & (value >> 48));
+    data[pos + 2] = (byte) (0xff & (value >> 40));
+    data[pos + 3] = (byte) (0xff & (value >> 32));
+    data[pos + 4] = (byte) (0xff & (value >> 24));
+    data[pos + 5] = (byte) (0xff & (value >> 16));
+    data[pos + 6] = (byte) (0xff & (value >> 8));
+    data[pos + 7] = (byte) (0xff & (value >> 0));
+    setDirty();
+  }
+
+  // overrides java.lang.Object
+
+  @Override
+  public String toString() {
+    return "BlockIO(" + blockId + "," + dirty + "," + view + ")";
+  }
+
+  // implement externalizable interface
+  public void readExternal(ObjectInput in) throws IOException,
+      ClassNotFoundException {
+    blockId = in.readLong();
+    int length = in.readInt();
+    data = new byte[length];
+    in.readFully(data);
+  }
+
+  // implement externalizable interface
+  public void writeExternal(ObjectOutput out) throws IOException {
+    out.writeLong(blockId);
+    out.writeInt(data.length);
+    out.write(data);
+  }
 
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockView.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockView.java?rev=901644&r1=901643&r2=901644&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockView.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/recman/BlockView.java Thu Jan 21 10:37:58 2010
@@ -66,10 +66,10 @@
 package org.apache.hadoop.hive.ql.util.jdbm.recman;
 
 /**
- *  This is a marker interface that is implemented by classes that
- *  interpret blocks of data by pretending to be an overlay.
- *
- *  @see BlockIo#setView
+ * This is a marker interface that is implemented by classes that interpret
+ * blocks of data by pretending to be an overlay.
+ * 
+ * @see BlockIo#setView
  */
 public interface BlockView {
 }