You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2009/08/06 04:19:02 UTC

svn commit: r801497 [1/2] - in /hadoop/hive/trunk: ./ serde/src/java/org/apache/hadoop/hive/serde2/lazy/ serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/ serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/ serde/src/java/org/apac...

Author: zshao
Date: Thu Aug  6 02:19:01 2009
New Revision: 801497

URL: http://svn.apache.org/viewvc?rev=801497&view=rev
Log:
HIVE-640. Add LazyBinarySerDe to Hive. (Yuntao Jia via zshao)

Added:
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryListObjectInspector.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryMapObjectInspector.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryStructObjectInspector.java
    hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/
    hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java
    hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java
    hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardListObjectInspector.java
    hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardMapObjectInspector.java
    hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java
    hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestInnerStruct.java
    hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=801497&r1=801496&r2=801497&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Aug  6 02:19:01 2009
@@ -116,6 +116,8 @@
     HIVE-487. Make hive compatibale with hadoop 20
     (Todd Lipcon via namit)
 
+    HIVE-640. Add LazyBinarySerDe to Hive. (Yuntao Jia via zshao)
+
   IMPROVEMENTS
     HIVE-389. Option to build without ivy (jssarma)
 

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java?rev=801497&r1=801496&r2=801497&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java Thu Aug  6 02:19:01 2009
@@ -228,7 +228,6 @@
   /**
    * Get the value object with the index without checking parsed.
    * @param index  The index into the array starting from 0
-   * @param nullSequence  The byte sequence representing the NULL value
    */
   private LazyObject uncheckedGetValue(int index) {
     Text nullSequence = oi.getNullSequence();
@@ -254,7 +253,6 @@
   /**
    * Get the key object with the index without checking parsed.
    * @param index  The index into the array starting from 0
-   * @param nullSequence  The byte sequence representing the NULL value
    */
   private LazyPrimitive<?,?> uncheckedGetKey(int index) {
     Text nullSequence = oi.getNullSequence(); 

Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java?rev=801497&r1=801496&r2=801497&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java Thu Aug  6 02:19:01 2009
@@ -39,8 +39,8 @@
  * SerDe classes should call the static functions in this library to create an ObjectInspector
  * to return to the caller of SerDe2.getObjectInspector().
  * 
- * The reason of having caches here is that ObjectInspector is because ObjectInspectors do
- * not have an internal state - so ObjectInspectors with the same construction parameters should
+ * The reason of having caches here is that ObjectInspectors do not have an internal 
+ * state - so ObjectInspectors with the same construction parameters should
  * result in exactly the same ObjectInspector.
  */
 public class LazyObjectInspectorFactory {

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,236 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazy.LazyObject;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+ * LazyBinaryArray is serialized as follows:
+ *             start  A     b   b   b   b         b   b end
+ * bytes[] ->    |--------|---|---|---|---| ... |---|---|
+ * 
+ * Section A is the null-bytes. Suppose the list has
+ * N elements, then there are (N+7)/8 bytes used as null-bytes.
+ * Each bit corresponds to an element and it indicates whether 
+ * that element is null (0) or not null (1).
+ * 
+ * After A, all b(s) represent the elements of the list. 
+ * Each of them is again a LazyBinaryObject.
+ *
+ */
+
+public class LazyBinaryArray extends LazyBinaryNonPrimitive<LazyBinaryListObjectInspector> {
+ 
+  /**
+   * Whether the data is already parsed or not.
+   */
+  boolean parsed = false;
+  /**
+   * The length of the array.
+   * Only valid when the data is parsed.
+   */
+  int arraySize = 0;
+  
+  /**
+   * The start positions and lengths of array elements.
+   * Only valid when the data is parsed.
+   */
+  int[] elementStart;
+  int[] elementLength;
+  
+  /**
+   * Whether an element is initialized or not
+   */
+  boolean[] elementInited;
+
+  /**
+   * Whether an element is null or not.
+   * Because length is 0 does not means the field is null.
+   * In particular, a 0-length string is not null.
+   */
+  boolean[] elementIsNull;
+
+  /**
+   * The elements of the array. Note that we call 
+   * arrayElements[i].init(bytes, begin, length) 
+   * only when that element is accessed.
+   */
+  LazyBinaryObject[] arrayElements;
+
+  /**
+   * Construct a LazyBinaryArray object with the ObjectInspector.
+   * @param oi  the oi representing the type of this LazyBinaryArray
+   */
+  protected LazyBinaryArray(LazyBinaryListObjectInspector oi) {
+    super(oi);
+  }
+  
+  /**
+   * Set the row data for this LazyBinaryArray.
+   * @see LazyObject#init(ByteArrayRef, int, int)
+   */
+  @Override
+  public void init(ByteArrayRef bytes, int start, int length) {
+    super.init(bytes, start, length);
+    parsed = false;
+  }
+ 
+  /**
+   * Enlarge the size of arrays storing information for the elements inside 
+   * the array.
+   */
+  private void adjustArraySize(int newSize) {
+    if (elementStart == null || elementStart.length < newSize) {      
+      elementStart = new int[newSize];
+      elementLength = new int[newSize];
+      elementInited = new boolean[newSize];
+      elementIsNull = new boolean[newSize];
+      arrayElements = new LazyBinaryObject[newSize];
+    }
+  }
+  
+  VInt vInt = new LazyBinaryUtils.VInt();
+  RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();  
+  /**
+   * Parse the bytes and fill elementStart, elementLength,
+   * elementInited and elementIsNull.
+   */
+  private void parse() {
+    
+    byte[] bytes = this.bytes.getData();
+    
+    // get the vlong that represents the map size
+    LazyBinaryUtils.readVInt(bytes, start, vInt);    
+    arraySize = vInt.value;
+    if(0 == arraySize) {
+      parsed = true;
+      return;
+    }
+    
+    // adjust arrays
+    adjustArraySize(arraySize);
+    // find out the null-bytes
+    int arryByteStart = start + vInt.length; 
+    int nullByteCur = arryByteStart;    
+    int nullByteEnd = arryByteStart + (arraySize+7) / 8;    
+    // the begin the real elements
+    int lastElementByteEnd = nullByteEnd;
+    // the list element object inspector
+    ObjectInspector listEleObjectInspector = 
+      ((ListObjectInspector)oi).getListElementObjectInspector(); 
+    // parsing elements one by one
+    for (int i=0; i<arraySize; i++) {
+      elementIsNull[i] = true;
+      if ((bytes[nullByteCur] & (1 << (i%8))) !=0) {
+        elementIsNull[i] = false;
+        LazyBinaryUtils.checkObjectByteInfo(listEleObjectInspector, 
+            bytes, lastElementByteEnd, recordInfo);
+        elementStart [i] = lastElementByteEnd + recordInfo.elementOffset;
+        elementLength[i] = recordInfo.elementSize;      
+        lastElementByteEnd = elementStart [i] + elementLength[i];
+      }
+      // move onto the next null byte
+      if(7 == (i%8)) {
+        nullByteCur ++;
+      } 
+    }
+        
+    Arrays.fill(elementInited, 0, arraySize, false);
+    parsed = true;
+  }
+  
+ /**
+  * Returns the actual primitive object at the index position
+  * inside the array represented by this LazyBinaryObject.
+  */
+ public Object getListElementObject(int index) {
+   if (!parsed) {
+     parse();
+   }
+   if (index < 0 || index >= arraySize) {
+     return null;
+   }
+   return uncheckedGetElement(index);
+ }
+ 
+ /**
+  * Get the element without checking out-of-bound index.
+  * @param index  index to the array element
+  */
+ private Object uncheckedGetElement(int index) {
+   
+   if (elementIsNull[index]) {
+     return null;
+   } else {
+     if (!elementInited[index]) {
+       elementInited[index] = true;
+       if (arrayElements[index] == null) {
+         arrayElements[index] = LazyBinaryFactory.createLazyBinaryObject(
+           ((LazyBinaryListObjectInspector)oi).getListElementObjectInspector());
+       }
+       arrayElements[index].init(bytes, elementStart[index], elementLength[index]);
+     }
+   }
+   return arrayElements[index].getObject();
+ }
+ 
+ /** 
+  * Returns the array size.
+  */
+ public int getListLength() {
+   if (!parsed) {
+     parse();
+   }
+   return arraySize;
+ }
+ 
+ /** 
+  * cachedList is reused every time getList is called.
+  * Different LazyBianryArray instances cannot share 
+  * the same cachedList. 
+  */
+ ArrayList<Object> cachedList;
+ 
+ /** Returns the List of actual primitive objects.
+  *  Returns null for null array.
+  */
+ public List<Object> getList() {
+   if (!parsed) {
+     parse();
+   }
+   if (cachedList == null) {
+     cachedList = new ArrayList<Object>(arraySize);
+   } else {
+     cachedList.clear();
+   }
+   for (int index=0; index<arraySize; index++) {
+     cachedList.add(uncheckedGetElement(index));
+   }
+   return cachedList;
+ }
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
+import org.apache.hadoop.io.BooleanWritable;
+
+/**
+ * LazyBinaryObject for boolean which takes one byte
+ * 
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project.
+ * 
+ * As with the specification, this implementation relied on code laid out in <a
+ * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
+ * Delight, (Addison Wesley, 2002)</a> as well as <a
+ * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ * 
+ */
+public class LazyBinaryBoolean extends LazyBinaryPrimitive<WritableBooleanObjectInspector, BooleanWritable> {
+
+  public LazyBinaryBoolean(WritableBooleanObjectInspector oi) {
+    super(oi);
+    data = new BooleanWritable();
+  }
+
+  public LazyBinaryBoolean(LazyBinaryBoolean copy) {
+    super(copy);
+    data = new BooleanWritable(copy.data.get());
+  }
+
+  @Override
+  public void init(ByteArrayRef bytes, int start, int length) {
+    assert(1 == length);
+    byte val = bytes.getData()[start];
+    if (val == 0) {
+      data.set(false);        
+    } else if (val == 1) {
+      data.set(true);   
+    }
+  }
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+
+/**
+ * LazyBinaryObject for byte which takes one byte
+ */
+public class LazyBinaryByte extends LazyBinaryPrimitive<WritableByteObjectInspector, ByteWritable> {
+
+  LazyBinaryByte(WritableByteObjectInspector oi) {
+    super(oi);
+    data = new ByteWritable();
+  }
+
+  LazyBinaryByte(LazyBinaryByte copy) {
+    super(copy);
+    data = new ByteWritable(copy.data.get());
+  }
+  
+  @Override
+  public void init(ByteArrayRef bytes, int start, int length) {
+    assert(1 == length);
+    data.set(bytes.getData()[start]);
+  }
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+
+/**
+ * LazyBinaryObject for double which takes eight bytes
+ */
+public class LazyBinaryDouble extends LazyBinaryPrimitive<WritableDoubleObjectInspector, DoubleWritable> {
+
+  LazyBinaryDouble(WritableDoubleObjectInspector oi) {
+    super(oi);
+    data = new DoubleWritable();
+  }
+  
+  LazyBinaryDouble(LazyBinaryDouble copy) {
+    super(copy);
+    data = new DoubleWritable(copy.data.get());
+  }
+
+  @Override
+  public void init(ByteArrayRef bytes, int start, int length) {
+    assert(8 == length);
+    data.set(Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes.getData(), start)));
+  }
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+
+public class LazyBinaryFactory {
+
+  /**
+   * Create a lazy binary primitive class given the type name. 
+   */
+  public static LazyBinaryPrimitive<?,?> createLazyBinaryPrimitiveClass(PrimitiveObjectInspector oi) {
+    PrimitiveCategory p = oi.getPrimitiveCategory();
+    switch(p) {
+      case BOOLEAN: {
+        return new LazyBinaryBoolean((WritableBooleanObjectInspector)oi);
+      }
+      case BYTE: {
+        return new LazyBinaryByte((WritableByteObjectInspector)oi);
+      }
+      case SHORT: {
+        return new LazyBinaryShort((WritableShortObjectInspector)oi);
+      }
+      case INT: {
+        return new LazyBinaryInteger((WritableIntObjectInspector)oi);
+      }
+      case LONG: {
+        return new LazyBinaryLong((WritableLongObjectInspector)oi);
+      }
+      case FLOAT: {
+        return new LazyBinaryFloat((WritableFloatObjectInspector)oi);
+      }
+      case DOUBLE: {
+        return new LazyBinaryDouble((WritableDoubleObjectInspector)oi);
+      }
+      case STRING: {
+        return new LazyBinaryString((WritableStringObjectInspector)oi);
+      }
+      default: {
+        throw new RuntimeException("Internal error: no LazyBinaryObject for " + p);        
+      }
+    }
+  }
+
+  /**
+   * Create a hierarchical LazyBinaryObject based on the given typeInfo.
+   */
+  public static LazyBinaryObject createLazyBinaryObject(ObjectInspector oi) {
+    ObjectInspector.Category c = oi.getCategory();
+    switch(c) {
+    case PRIMITIVE:
+      return createLazyBinaryPrimitiveClass((PrimitiveObjectInspector)oi);
+    case MAP:
+      return new LazyBinaryMap((LazyBinaryMapObjectInspector)oi);      
+    case LIST: 
+      return new LazyBinaryArray((LazyBinaryListObjectInspector)oi);      
+    case STRUCT:
+      return new LazyBinaryStruct((LazyBinaryStructObjectInspector)oi);      
+    }
+
+    throw new RuntimeException("Hive LazyBinarySerDe Internal error.");
+  }  
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import org.apache.hadoop.io.FloatWritable;
+
+/**
+ * LazyBinaryObject for float which takes four bytes.
+ */
+public class LazyBinaryFloat extends LazyBinaryPrimitive<WritableFloatObjectInspector, FloatWritable> {
+
+  LazyBinaryFloat(WritableFloatObjectInspector oi) {
+    super(oi);
+    data = new FloatWritable();
+  }
+  
+  LazyBinaryFloat(LazyBinaryFloat copy) {
+    super(copy);
+    data = new FloatWritable(copy.data.get());
+  }
+
+  @Override
+  public void init(ByteArrayRef bytes, int start, int length) {
+    assert (4 == length);
+    data.set(Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes.getData(), start)));
+  }
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.io.IntWritable;
+
+/**
+ * LazyBinaryObject for integer which is serialized as VInt
+ * @see LazyBinaryUtils#readVInt(byte[], int, VInt) 
+ */
+public class LazyBinaryInteger extends LazyBinaryPrimitive<WritableIntObjectInspector, IntWritable> {
+
+  LazyBinaryInteger(WritableIntObjectInspector oi) {
+    super(oi);
+    data = new IntWritable();
+  }
+
+  LazyBinaryInteger(LazyBinaryInteger copy) {
+    super(copy);
+    data = new IntWritable(copy.data.get());
+  }
+
+  /**
+   *  The reusable vInt for decoding the integer
+   */
+  VInt vInt = new LazyBinaryUtils.VInt();
+  
+  @Override
+  public void init(ByteArrayRef bytes, int start, int length) {
+    LazyBinaryUtils.readVInt(bytes.getData(), start, vInt);
+    assert(length == vInt.length);
+    data.set(vInt.value);
+  }
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import org.apache.hadoop.io.LongWritable;
+
+/**
+ * LazyBinaryObject for long which stores as VLong.
+ * @see LazyBinaryUtils#readVLong(byte[], int, VLong)
+ */
+public class LazyBinaryLong extends LazyBinaryPrimitive<WritableLongObjectInspector, LongWritable> {
+
+  LazyBinaryLong(WritableLongObjectInspector oi) {
+    super(oi);
+    data = new LongWritable();
+  }
+  
+  LazyBinaryLong(LazyBinaryLong copy) {
+    super(copy);
+    data = new LongWritable(copy.data.get());
+  }
+
+  /**
+   *  The reusable vLong for decoding the long
+   */
+  VLong vLong = new LazyBinaryUtils.VLong();
+  
+  @Override
+  public void init(ByteArrayRef bytes, int start, int length) {
+    LazyBinaryUtils.readVLong(bytes.getData(), start, vLong);
+    assert(length == vLong.length);
+    data.set(vLong.value);
+  }
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,325 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazy.LazyObject;
+import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+/**
+ * LazyBinaryMap is serialized as follows:
+ *             start  A     b   c   b   c         b   c end
+ * bytes[] ->    |--------|---|---|---|---| ... |---|---|
+ * 
+ * Section A is the null-bytes. Suppose the map has
+ * N key-value pairs, then there are (N*2+7)/8 bytes used as null-bytes.
+ * Each bit corresponds to a key or a value and it indicates whether 
+ * that key or value is null (0) or not null (1).
+ * 
+ * After A, all the bytes are actual serialized data of the map,
+ * which are key-value pairs. b represent the keys and c represent
+ * the values. Each of them is again a LazyBinaryObject.
+ *  
+ */
+
+public class LazyBinaryMap extends LazyBinaryNonPrimitive<LazyBinaryMapObjectInspector> {
+
+  private static Log LOG = LogFactory.getLog(LazyBinaryMap.class.getName());
+
+  /**
+   * Whether the data is already parsed or not.
+   */
+  boolean parsed;
+  
+  /**
+   * The size of the map.
+   * Only valid when the data is parsed.
+   * -1 when the map is NULL.
+   */
+  int mapSize = 0;
+  
+  /**
+   * The beginning position and length of key[i] and value[i].
+   * Only valid when the data is parsed.
+   */
+  int[] keyStart;
+  int[] keyLength;
+  int[] valueStart;
+  int[] valueLength;
+  /**
+   * Whether valueObjects[i]/keyObjects[i] is initialized or not.
+   */
+  boolean[] keyInited;
+  boolean[] valueInited;
+
+  /**
+   * Whether valueObjects[i]/keyObjects[i] is null or not
+   * This could not be inferred from the length of the object.
+   * In particular, a 0-length string is not null.
+   */
+  boolean[] keyIsNull;
+  boolean[] valueIsNull;
+  
+  /**
+   * The keys are stored in an array of LazyPrimitives.
+   */
+  LazyBinaryPrimitive<?,?>[] keyObjects;
+  /**
+   * The values are stored in an array of LazyObjects.
+   * value[index] will start from KeyEnd[index] + 1,
+   * and ends before KeyStart[index+1] - 1.
+   */
+  LazyBinaryObject[] valueObjects;
+  
+  protected LazyBinaryMap(LazyBinaryMapObjectInspector oi) {
+    super(oi);
+  }
+  
+  /**
+   * Set the row data for this LazyBinaryMap.
+   * @see LazyBinaryObject#init(ByteArrayRef, int, int)
+   */
+  @Override
+  public void init(ByteArrayRef bytes, int start, int length) {
+    super.init(bytes, start, length);
+    parsed = false;
+  }
+
+  /**
+   * Adjust the size of arrays: 
+   * keyStart, keyLength
+   * valueStart, valueLength
+   * keyInited, keyIsNull
+   * valueInited, valueIsNull
+   */
+  protected void adjustArraySize(int newSize) {
+    if (keyStart == null || keyStart.length < newSize) {
+      keyStart     = new int[newSize];
+      keyLength    = new int[newSize];
+      valueStart   = new int[newSize];
+      valueLength  = new int[newSize];
+      keyInited    = new boolean[newSize];
+      keyIsNull    = new boolean[newSize];
+      valueInited  = new boolean[newSize];
+      valueIsNull  = new boolean[newSize];
+      keyObjects   = new LazyBinaryPrimitive<?,?>[newSize];
+      valueObjects = new LazyBinaryObject[newSize];
+    }
+  }  
+  
+  boolean nullMapKey = false;
+  VInt vInt = new LazyBinaryUtils.VInt();
+  RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();  
+
+  /**
+   * Parse the byte[] and fill keyStart, keyLength, keyIsNull 
+   * valueStart, valueLength and valueIsNull
+   */
+  private void parse() {    
+    
+    byte[] bytes = this.bytes.getData();
+    
+    // get the VInt that represents the map size
+    LazyBinaryUtils.readVInt(bytes, start, vInt);    
+    mapSize = vInt.value;
+    if(0 == mapSize) {
+      parsed = true;
+      return;
+    }
+  
+    // adjust arrays
+    adjustArraySize(mapSize);
+
+    // find out the null-bytes
+    int mapByteStart = start + vInt.length; 
+    int nullByteCur  = mapByteStart;    
+    int nullByteEnd  = mapByteStart + (mapSize*2+7) / 8;    
+    int lastElementByteEnd = nullByteEnd;
+
+    // parsing the keys and values one by one
+    for (int i=0; i<mapSize; i++) {
+      // parse a key
+      keyIsNull[i] = true;
+      if ((bytes[nullByteCur] & (1 << ((i*2)%8))) !=0) {
+        keyIsNull[i] = false;
+        LazyBinaryUtils.checkObjectByteInfo(((MapObjectInspector)oi).getMapKeyObjectInspector(), 
+            bytes, lastElementByteEnd, recordInfo);
+        keyStart[i]  = lastElementByteEnd + recordInfo.elementOffset;
+        keyLength[i] = recordInfo.elementSize;      
+        lastElementByteEnd = keyStart[i] + keyLength[i];
+      } else if (!nullMapKey) {
+        nullMapKey = true;
+        LOG.warn("Null map key encountered! Ignoring similar problems.");
+      }
+
+      // parse a value
+      valueIsNull[i] = true;
+      if ((bytes[nullByteCur] & (1 << ((i*2+1)%8))) !=0) {
+        valueIsNull[i] = false;
+        LazyBinaryUtils.checkObjectByteInfo(((MapObjectInspector)oi).getMapValueObjectInspector(), 
+            bytes, lastElementByteEnd, recordInfo);
+        valueStart[i]  = lastElementByteEnd + recordInfo.elementOffset;
+        valueLength[i] = recordInfo.elementSize;
+        lastElementByteEnd = valueStart[i] + valueLength[i];
+      }
+      
+      // move onto the next null byte
+      if (3 == (i%4)) {
+        nullByteCur ++;
+      }
+    }
+        
+    Arrays.fill(keyInited,   0, mapSize, false);
+    Arrays.fill(valueInited, 0, mapSize, false);
+    parsed = true;
+  }
+
+  /**
+   * Get the value object with the index without checking parsed.
+   * @param index  The index into the array starting from 0
+   */
+  private LazyBinaryObject uncheckedGetValue(int index) {
+    if (valueIsNull[index]) {
+      return null; 
+    }
+    if (!valueInited[index]) {
+      valueInited[index] = true;
+      if (valueObjects[index] == null) {
+        valueObjects[index] = LazyBinaryFactory.createLazyBinaryObject(
+            ((MapObjectInspector)oi).getMapValueObjectInspector());
+      }
+      valueObjects[index].init(bytes, valueStart[index], valueLength[index]);
+    }
+    return valueObjects[index];
+  }
+  
+  /**
+   * Get the value in the map for the key.
+   * 
+   * If there are multiple matches (which is possible in the serialized 
+   * format), only the first one is returned.
+   * 
+   * The most efficient way to get the value for the key is to serialize the 
+   * key and then try to find it in the array.  We do linear search because in 
+   * most cases, user only wants to get one or two values out of the map, and 
+   * the cost of building up a HashMap is substantially higher.
+   * 
+   * @param key   The key object that we are looking for.
+   * @return The corresponding value object, or NULL if not found
+   */
+  public Object getMapValueElement(Object key) {
+    if (!parsed) {
+      parse();
+    }
+    // search for the key
+    for (int i=0; i<mapSize; i++) {
+      LazyBinaryPrimitive<?,?> lazyKeyI = uncheckedGetKey(i);
+      if (lazyKeyI == null) continue;
+      // getWritableObject() will convert LazyPrimitive to actual primitive writable objects.
+      Object keyI = lazyKeyI.getWritableObject();
+      if (keyI == null) continue;
+      if (keyI.equals(key)) {
+        // Got a match, return the value
+        LazyBinaryObject v = uncheckedGetValue(i);
+        return v == null ? v : v.getObject();
+      }
+    }    
+    return null;
+  }
+
+  
+  /**
+   * Get the key object with the index without checking parsed.
+   * @param index  The index into the array starting from 0
+   */
+  private LazyBinaryPrimitive<?,?> uncheckedGetKey(int index) {
+    if (keyIsNull[index]) {
+      return null;
+    }
+    if (!keyInited[index]) {
+      keyInited[index] = true;
+      if (keyObjects[index] == null) {
+        // Keys are always primitive
+        keyObjects[index] = LazyBinaryFactory.createLazyBinaryPrimitiveClass(
+            (PrimitiveObjectInspector)((MapObjectInspector)oi).getMapKeyObjectInspector());
+      }
+      keyObjects[index].init(bytes, keyStart[index], keyLength[index]);
+    }
+    return keyObjects[index];
+  }
+  
+  /**
+   * cachedMap is reused for different calls to getMap().
+   * But each LazyBinaryMap has a separate cachedMap so we won't overwrite the
+   * data by accident.
+   */
+  LinkedHashMap<Object, Object> cachedMap;
+  
+  /**
+   * Return the map object representing this LazyBinaryMap.
+   * Note that the keyObjects will be Writable primitive objects.
+   * @return the map object
+   */
+  public Map<Object, Object> getMap() {
+    if (!parsed) {
+      parse();
+    }
+    if (cachedMap == null) {
+      // Use LinkedHashMap to provide deterministic order
+      cachedMap = new LinkedHashMap<Object, Object>();
+    } else {
+      cachedMap.clear();
+    }
+    
+    // go through each element of the map
+    for (int i = 0; i < mapSize; i++) {
+      LazyBinaryPrimitive<?,?> lazyKey = uncheckedGetKey(i);
+      if (lazyKey == null) continue;
+      Object key = lazyKey.getObject();
+      // do not overwrite if there are duplicate keys
+      if (key != null && !cachedMap.containsKey(key)) {
+        LazyBinaryObject lazyValue = uncheckedGetValue(i);
+        Object value = (lazyValue == null ? null : lazyValue.getObject());
+        cachedMap.put(key, value);
+      }
+    }
+    return cachedMap;
+  }
+ 
+  /**
+   * Get the size of the map represented by this LazyBinaryMap.
+   * @return  The size of the map
+   */
+  public int getMapSize() {
+    if (!parsed) {
+      parse();
+    }
+    return mapSize;
+  }  
+}
\ No newline at end of file

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+public abstract class LazyBinaryNonPrimitive<OI extends ObjectInspector>  extends LazyBinaryObject<OI> {
+
+  protected ByteArrayRef bytes;
+  protected int start;
+  protected int length;
+
+  protected LazyBinaryNonPrimitive(OI oi) {  
+    super(oi);
+    bytes = null;
+    start = 0;
+    length = 0;
+  }
+
+  @Override
+  public Object getObject() {
+    return this;
+  }
+
+  @Override
+  public void init(ByteArrayRef bytes, int start, int length) {
+    if (null == bytes) {
+      throw new RuntimeException("bytes cannot be null!");
+    }
+    if (length <= 0) {
+      throw new RuntimeException("length should be positive!");
+    }
+    this.bytes = bytes;
+    this.start = start;
+    this.length = length;
+  }
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+ * LazyBinaryObject stores an object in a binary format in a byte[].
+ * For example, a double takes four bytes.
+ * 
+ * A LazyBinaryObject can represent any primitive object or hierarchical object
+ * like string, list, map or struct.
+ */
+public abstract class LazyBinaryObject<OI extends ObjectInspector> {
+ 
+  OI oi;
+  
+  /**
+   * Create a LazyBinaryObject.
+   * @param oi  Derived classes can access meta information about this Lazy
+   *            Binary Object (e.g, length, null-bits) from it.
+   */
+  protected LazyBinaryObject(OI oi) {
+    this.oi = oi;
+  }
+  
+  /**
+   * Set the data for this LazyBinaryObject.
+   * We take ByteArrayRef instead of byte[] so that we will be able to drop
+   * the reference to byte[] by a single assignment.
+   * The ByteArrayRef object can be reused across multiple rows.
+   * 
+   * Never call this function if the object represent a null!!!
+   * 
+   * @param bytes  The wrapper of the byte[].
+   * @param start  The start position inside the bytes.
+   * @param length The length of the data, starting from "start"
+   * @see ByteArrayRef
+   */
+  public abstract void init(ByteArrayRef bytes, int start, int length);
+  
+  /**
+   * If the LazyBinaryObject is a primitive Object, then deserialize it and return
+   * the actual primitive Object.
+   * Otherwise (string, list, map, struct), return this. 
+   */
+  public abstract Object getObject();
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+/**
+ * Defines a LazyBianryPrimitive. 
+ * 
+ * {@data} will be initialized to corresponding types in 
+ * different LazyBinary primitive classes. For example, {@data} will
+ * be a BooleanWritable in the LazyBinaryBoolean class.
+ * 
+ * There is no null flag any more, 
+ * @see {@link LazyBinaryObject#init(ByteArrayRef, int, int)} 
+ *
+ */
+public abstract class LazyBinaryPrimitive<OI extends ObjectInspector, 
+T extends Writable> extends LazyBinaryObject<OI> {
+
+  LazyBinaryPrimitive(OI oi) {
+    super(oi);
+  }
+  
+  LazyBinaryPrimitive(LazyBinaryPrimitive<OI, T> copy) {
+    super(copy.oi);
+  }
+
+  T data;
+  
+  /**
+   * Returns the primitive object represented by this LazyBinaryObject.
+   * This is useful because it can make sure we have "null" for null objects.
+   */
+  public Object getObject() {
+    return data;
+  }
+
+  public T getWritableObject() {
+    return data;
+  }
+  
+  public String toString() {
+    return data.toString();
+  }
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,469 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.ByteStream;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+/**
+ * The LazyBinarySerDe class combines the lazy property of 
+ * LazySimpleSerDe class and the binary property of BinarySortable
+ * class. Lazy means a field is not deserialized until required. 
+ * Binary means a field is serialized in binary compact format.  
+ */
+public class LazyBinarySerDe implements SerDe {
+  
+  public static final Log LOG = LogFactory.getLog(
+      LazyBinarySerDe.class.getName());
+  
+  public LazyBinarySerDe() throws SerDeException {    
+  }
+  
+  List<String> columnNames;
+  List<TypeInfo> columnTypes;  
+  
+  TypeInfo rowTypeInfo;  
+  ObjectInspector cachedObjectInspector;
+  
+  // The object for storing row data
+  LazyBinaryStruct cachedLazyBinaryStruct;
+  
+  /**
+   * Initialize the SerDe with configuration and table information
+   * @see SerDe#initialize(Configuration, Properties)
+   */
+  @Override
+  public void initialize(Configuration conf, Properties tbl)
+      throws SerDeException {
+    // Get column names and types
+    String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
+    String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
+    if (columnNameProperty.length() == 0) {
+      columnNames = new ArrayList<String>();
+    } else {
+      columnNames = Arrays.asList(columnNameProperty.split(","));
+    }
+    if (columnTypeProperty.length() == 0) {
+      columnTypes = new ArrayList<TypeInfo>();
+    } else {
+      columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+    }
+    assert(columnNames.size() == columnTypes.size());    
+    // Create row related objects
+    rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);      
+    // Create the object inspector and the lazy binary struct object
+    cachedObjectInspector = LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(rowTypeInfo);    
+    cachedLazyBinaryStruct = (LazyBinaryStruct) LazyBinaryFactory
+      .createLazyBinaryObject(cachedObjectInspector);
+    // output debug info
+    LOG.debug("LazyBinarySerDe initialized with: columnNames="
+      + columnNames + " columnTypes=" + columnTypes);    
+  }
+  
+  /**
+   * Returns the ObjectInspector for the row.
+   * @see Deserializer#getObjectInspector()
+   */
+  @Override
+  public ObjectInspector getObjectInspector() throws SerDeException {
+    return cachedObjectInspector;   
+  }
+
+  /**
+   * Returns the Writable Class after serialization.
+   * @see Serializer#getSerializedClass()
+   */
+  @Override
+  public Class<? extends Writable> getSerializedClass() { 
+    return BytesWritable.class;
+  }
+
+  // The wrapper for byte array
+  ByteArrayRef byteArrayRef;
+  
+  /**
+   * Deserialize a table record to a lazybinary struct.
+   * @see Deserializer#deserialize(Writable)
+   */
+  @Override
+  public Object deserialize(Writable field) throws SerDeException {
+    if (byteArrayRef == null) {
+      byteArrayRef = new ByteArrayRef();
+    }
+    if (field instanceof BytesWritable) {
+      BytesWritable b = (BytesWritable)field;
+      if(b.getSize()==0)
+        return null;
+      // For backward-compatibility with hadoop 0.17
+      byteArrayRef.setData(b.get());
+      cachedLazyBinaryStruct.init(byteArrayRef, 0, b.getSize());
+    } else if (field instanceof Text) {
+      Text t = (Text)field;
+      if(t.getLength()==0)
+        return null;
+      byteArrayRef.setData(t.getBytes());
+      cachedLazyBinaryStruct.init(byteArrayRef, 0, t.getLength());
+    } else {
+      throw new SerDeException(getClass().toString()  
+          + ": expects either BytesWritable or Text object!");
+    }
+    return cachedLazyBinaryStruct;
+  }
+  
+  /**
+   * The reusable output buffer and serialize byte buffer.
+   */
+  BytesWritable serializeBytesWritable  = new BytesWritable();
+  ByteStream.Output serializeByteStream = new ByteStream.Output();
+
+  /**
+   * Serialize an object to a byte buffer in a binary compact way.
+   * @see Serializer#serialize(Object, ObjectInspector)
+   */
+  @Override
+  public Writable serialize(Object obj, ObjectInspector objInspector)
+      throws SerDeException {
+    // make sure it is a struct record
+    if (objInspector.getCategory() != Category.STRUCT) {
+      throw new SerDeException(getClass().toString() 
+          + " can only serialize struct types, but we got: " 
+          + objInspector.getTypeName());
+    }
+    
+    serializeByteStream.reset();    
+    // serialize the row as a struct
+    serializeStruct(serializeByteStream, obj, 
+        (StructObjectInspector) objInspector);    
+    // return the serialized bytes    
+    serializeBytesWritable.set(serializeByteStream.getData(), 
+        0, serializeByteStream.getCount());
+    return serializeBytesWritable;
+  }
+
+  boolean nullMapKey = false;
+  
+  /**
+   * Serialize a struct object without writing the byte size. 
+   * This function is shared by both row serialization and
+   * struct serialization.
+   * 
+   * @param byteStream      the byte stream storing the serialization data
+   * @param obj             the struct object to serialize
+   * @param objInspector    the struct object inspector
+   */
+  private void serializeStruct(Output byteStream,
+      Object obj, StructObjectInspector soi) {
+    // do nothing for null struct
+    if(null == obj)
+      return;
+    /* 
+     * Interleave serializing one null byte and 8 struct fields
+     * in each round, in order to support data deserialization
+     * with different table schemas
+     */
+    List<? extends StructField> fields = soi.getAllStructFieldRefs();    
+    int size  = fields.size();
+    int lasti = 0;
+    byte nullByte = 0;
+    for (int i=0; i<size; i++) {
+      // set bit to 1 if a field is not null
+      if (null != soi.getStructFieldData(obj, fields.get(i))) {
+        nullByte |= 1 << (i%8);            
+      }
+      // write the null byte every eight elements or
+      // if this is the last element and serialize the
+      // corresponding 8 struct fields at the same time 
+      if (7 == i%8 || i == size-1) {
+        serializeByteStream.write(nullByte);            
+        for (int j=lasti; j<=i; j++) {
+          serialize(serializeByteStream, 
+              soi.getStructFieldData(obj, fields.get(j)),
+              fields.get(j).getFieldObjectInspector());
+        }        
+        lasti    = i + 1;
+        nullByte = 0;
+      }
+    }
+  }
+  
+  /**
+   * A recursive function that serialize an object to a byte buffer 
+   * based on its object inspector.
+   * @param byteStream      the byte stream storing the serialization data
+   * @param obj             the object to serialize
+   * @param objInspector    the object inspector
+   * @see LazyBinaryUtils#checkObjectByteInfo(ObjectInspector, byte[], int, LazyBinaryUtils.RecordInfo) for
+   *      how the byte sizes of different object are decoded.  
+   */
+  private void serialize(Output byteStream,
+      Object obj, ObjectInspector objInspector) {
+    
+    // do nothing for null object
+    if (null == obj)
+      return;
+        
+    switch (objInspector.getCategory()) {
+      case PRIMITIVE: {
+        PrimitiveObjectInspector poi = (PrimitiveObjectInspector)objInspector;
+        switch (poi.getPrimitiveCategory()) {
+          case VOID: {
+            return;
+          }
+          case BOOLEAN: {
+            BooleanObjectInspector boi = (BooleanObjectInspector)poi;
+            boolean v = ((BooleanObjectInspector)poi).get(obj);
+            byteStream.write((byte)(v ? 1 : 0));
+            return;
+          }
+          case BYTE: {
+            ByteObjectInspector boi = (ByteObjectInspector)poi;
+            byte v = boi.get(obj);
+            byteStream.write(v);
+            return;
+          }
+          case SHORT: {
+            ShortObjectInspector spoi = (ShortObjectInspector)poi;
+            short v = spoi.get(obj);
+            byteStream.write((byte) (v >> 8));
+            byteStream.write((byte) (v));
+            return;
+          }
+          case INT: {
+            IntObjectInspector ioi = (IntObjectInspector)poi;
+            int v = ioi.get(obj);
+            LazyBinaryUtils.writeVInt(byteStream, v);
+            return;
+          }
+          case LONG: {
+            LongObjectInspector loi = (LongObjectInspector)poi;
+            long v = loi.get(obj);
+            LazyBinaryUtils.writeVLong(byteStream, v);
+            return;
+          }
+          case FLOAT: {
+            FloatObjectInspector foi = (FloatObjectInspector)poi;
+            int v = Float.floatToIntBits(foi.get(obj));
+            byteStream.write((byte) (v >> 24));
+            byteStream.write((byte) (v >> 16));
+            byteStream.write((byte) (v >> 8));
+            byteStream.write((byte) (v));
+            return;
+          }
+          case DOUBLE: {
+            DoubleObjectInspector doi = (DoubleObjectInspector)poi;
+            long v = Double.doubleToLongBits(doi.get(obj));
+            byteStream.write((byte) (v >> 56));
+            byteStream.write((byte) (v >> 48));
+            byteStream.write((byte) (v >> 40));
+            byteStream.write((byte) (v >> 32));
+            byteStream.write((byte) (v >> 24));
+            byteStream.write((byte) (v >> 16));
+            byteStream.write((byte) (v >> 8));
+            byteStream.write((byte) (v));
+            return;
+          }
+          case STRING: {
+            StringObjectInspector soi = (StringObjectInspector)poi;
+            Text t = soi.getPrimitiveWritableObject(obj);            
+            /* write byte size of the string which is a vint */ 
+            int length = t.getLength();      
+            LazyBinaryUtils.writeVInt(byteStream, length);
+            /* write string itself */
+            byte[] data = t.getBytes();
+            byteStream.write(data, 0, length);
+            return;
+          }
+          default: {
+            throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
+          }
+        }
+      }
+      case LIST: {
+        ListObjectInspector loi = (ListObjectInspector)objInspector;
+        ObjectInspector eoi = loi.getListElementObjectInspector();
+        
+        // 1/ reserve spaces for the byte size of the list
+        //    which is a integer and takes four bytes
+        int byteSizeStart = byteStream.getCount();
+        byteStream.write((byte) 0);
+        byteStream.write((byte) 0);
+        byteStream.write((byte) 0);
+        byteStream.write((byte) 0);        
+        int listStart = byteStream.getCount();
+        
+        // 2/ write the size of the list as a VInt
+        int size = loi.getListLength(obj);
+        LazyBinaryUtils.writeVInt(byteStream, size);
+        
+        // 3/ write the null bytes
+        byte nullByte = 0;
+        for (int eid = 0; eid < size; eid++) {
+          // set the bit to 1 if an element is not null 
+          if (null != loi.getListElement(obj, eid)) {
+            nullByte |= 1 << (eid%8);
+          }
+          // store the byte every eight elements or  
+          // if this is the last element
+          if (7 == eid%8 || eid == size-1) {
+            byteStream.write(nullByte);
+            nullByte = 0;
+          }
+        }
+        
+        // 4/ write element by element from the list
+        for (int eid = 0; eid < size; eid++) {
+          serialize(byteStream, loi.getListElement(obj, eid), eoi);
+        }
+        
+        // 5/ update the list byte size
+        int listEnd  = byteStream.getCount();
+        int listSize = listEnd - listStart;
+        byte [] bytes = byteStream.getData();
+        bytes[byteSizeStart    ] = (byte) (listSize >> 24);
+        bytes[byteSizeStart + 1] = (byte) (listSize >> 16);
+        bytes[byteSizeStart + 2] = (byte) (listSize >> 8);
+        bytes[byteSizeStart + 3] = (byte) (listSize);
+        
+        return;
+      }      
+      case MAP: {
+        MapObjectInspector moi = (MapObjectInspector)objInspector;
+        ObjectInspector koi = moi.getMapKeyObjectInspector();
+        ObjectInspector voi = moi.getMapValueObjectInspector();
+        Map<?, ?> map = moi.getMap(obj);
+
+        // 1/ reserve spaces for the byte size of the map
+        //    which is a integer and takes four bytes
+        int byteSizeStart = byteStream.getCount();
+        byteStream.write((byte) 0);
+        byteStream.write((byte) 0);
+        byteStream.write((byte) 0);
+        byteStream.write((byte) 0);        
+        int mapStart = byteStream.getCount();
+        
+        // 2/ write the size of the map which is a VInt
+        int size = map.size();
+        LazyBinaryUtils.writeVInt(byteStream, size);
+                
+        // 3/ write the null bytes
+        int b = 0;
+        byte nullByte = 0;
+        for (Map.Entry<?, ?> entry: map.entrySet()) {
+          // set the bit to 1 if a key is not null 
+          if (null != entry.getKey()) {
+            nullByte |= 1 << (b%8);
+          } else if (!nullMapKey) {
+            nullMapKey = true;
+            LOG.warn("Null map key encountered! Ignoring similar problems.");
+          }
+          b++;
+          // set the bit to 1 if a value is not null 
+          if (null != entry.getValue()) {
+            nullByte |= 1 << (b%8);
+          }
+          b++;
+          // write the byte to stream every 4 key-value pairs
+          // or if this is the last key-value pair
+          if (0 == b%8 || b == size*2) {
+            byteStream.write(nullByte);
+            nullByte = 0;
+          }
+        }             
+        
+        // 4/ write key-value pairs one by one 
+        for(Map.Entry<?, ?> entry: map.entrySet()) {
+          serialize(byteStream, entry.getKey(), koi);
+          serialize(byteStream, entry.getValue(), voi);
+        }
+        
+        // 5/ update the byte size of the map
+        int mapEnd  = byteStream.getCount();
+        int mapSize = mapEnd - mapStart;
+        byte [] bytes = byteStream.getData();
+        bytes[byteSizeStart    ] = (byte) (mapSize >> 24);
+        bytes[byteSizeStart + 1] = (byte) (mapSize >> 16);
+        bytes[byteSizeStart + 2] = (byte) (mapSize >> 8);
+        bytes[byteSizeStart + 3] = (byte) (mapSize);
+        
+        return;
+      }
+      case STRUCT: {        
+        // 1/ reserve spaces for the byte size of the struct 
+        //    which is a integer and takes four bytes
+        int byteSizeStart = byteStream.getCount();
+        byteStream.write((byte) 0);
+        byteStream.write((byte) 0);
+        byteStream.write((byte) 0);
+        byteStream.write((byte) 0);        
+        int structStart = byteStream.getCount();
+            
+        // 2/ serialize the struct
+        serializeStruct(byteStream, obj, (StructObjectInspector) objInspector);
+        
+        // 3/ update the byte size of the struct
+        int structEnd  = byteStream.getCount();
+        int structSize = structEnd - structStart;
+        byte [] bytes  = byteStream.getData();
+        bytes[byteSizeStart    ] = (byte) (structSize >> 24); 
+        bytes[byteSizeStart + 1] = (byte) (structSize >> 16);
+        bytes[byteSizeStart + 2] = (byte) (structSize >> 8);
+        bytes[byteSizeStart + 3] = (byte) (structSize);
+        
+        return;
+      }
+      default: {
+        throw new RuntimeException("Unrecognized type: " + objInspector.getCategory());
+      }
+    }
+  }
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+
+/**
+ * LazyBinaryObject for short which takes two bytes.
+ */
+public class LazyBinaryShort extends LazyBinaryPrimitive<WritableShortObjectInspector, ShortWritable> {
+
+  LazyBinaryShort(WritableShortObjectInspector oi) {
+    super(oi);
+    data = new ShortWritable();
+  }
+
+  LazyBinaryShort(LazyBinaryShort copy) {
+    super(copy);
+    data = new ShortWritable(copy.data.get());
+  }
+
+  @Override
+  public void init(ByteArrayRef bytes, int start, int length) {
+    assert(2 == length);
+    data.set(LazyBinaryUtils.byteArrayToShort(bytes.getData(), start));      
+  }  
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+
+/**
+ * The serialization of LazyBinaryString is very simple:
+ *             start             A                end
+ * bytes[] ->    |---------------------------------|
+ * 
+ * Section A is just an array of bytes which are exactly
+ * the Text contained in this object.
+ * 
+ */
+public class LazyBinaryString extends LazyBinaryPrimitive<WritableStringObjectInspector, Text> {
+
+  LazyBinaryString(WritableStringObjectInspector OI) {
+    super(OI);
+    data = new Text();
+  }
+  
+  public LazyBinaryString(LazyBinaryString copy) {
+    super(copy);
+    data = new Text(copy.data);
+  }   
+
+  @Override
+  public void init(ByteArrayRef bytes, int start, int length) {
+    assert(length > -1);
+    data.set(bytes.getData(), start, length);
+  }
+}

Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java Thu Aug  6 02:19:01 2009
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+/**
+ * LazyBinaryStruct is serialized as follows:
+ *            start A       B      A      B     A       B   end
+ * bytes[] ->    |-----|---------|--- ... ---|-----|---------|
+ * 
+ * Section A is one null-byte, corresponding to eight struct fields in Section B. 
+ * Each bit indicates whether the corresponding field is null (0) or not null (1).
+ * Each field is a LazyBinaryObject. 
+ * 
+ * Following B, there is another section A and B. This pattern repeats until the 
+ * all struct fields are serialized.
+ */
+public class LazyBinaryStruct extends LazyBinaryNonPrimitive<LazyBinaryStructObjectInspector> {
+
+  private static Log LOG = LogFactory.getLog(LazyBinaryStruct.class.getName());
+
+  /**
+   * Whether the data is already parsed or not.
+   */
+  boolean parsed;
+
+  /**
+   * The fields of the struct.
+   */
+  LazyBinaryObject[] fields;
+  
+  /**
+   * Whether a field is initialized or not.
+   */
+  boolean[] fieldInited;
+  
+  /**
+   * Whether a field is null or not.
+   * Because length is 0 does not means the field is null.
+   * In particular, a 0-length string is not null.
+   */
+  boolean[] fieldIsNull;
+  
+  /**
+   * The start positions and lengths of struct fields.
+   * Only valid when the data is parsed.
+   */
+  int[] fieldStart;  
+  int[] fieldLength;
+  
+  /**
+   * Construct a LazyBinaryStruct object with an ObjectInspector.
+   */
+  protected LazyBinaryStruct(LazyBinaryStructObjectInspector oi) {
+    super(oi);
+  }
+
+  @Override
+  public void init(ByteArrayRef bytes, int start, int length) {
+    super.init(bytes, start, length);
+    parsed = false;    
+  }
+  
+  RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();   
+  boolean missingFieldWarned = false;
+  boolean extraFieldWarned = false;
+  /**
+   * Parse the byte[] and fill fieldStart, fieldLength, 
+   * fieldInited and fieldIsNull.
+   */
+  private void parse() {
+    
+    List<? extends StructField> fieldRefs = ((StructObjectInspector)oi).getAllStructFieldRefs();
+    
+    if (fields == null) {      
+      fields = new LazyBinaryObject[fieldRefs.size()];
+      for (int i = 0 ; i < fields.length; i++) {
+        fields[i] = LazyBinaryFactory.createLazyBinaryObject(fieldRefs.get(i).getFieldObjectInspector());
+      }
+      fieldInited = new boolean[fields.length];
+      fieldIsNull = new boolean[fields.length];
+      fieldStart  = new int[fields.length];
+      fieldLength = new int[fields.length];
+    }
+    
+    /**
+     * Please note that one null byte is followed by eight fields,
+     * then more null byte and fields. 
+     */
+    
+    int fieldId = 0;
+    int structByteEnd = start + length;       
+    byte[] bytes = this.bytes.getData();
+
+    byte nullByte = bytes[start];    
+    int lastFieldByteEnd = start + 1;    
+    // Go through all bytes in the byte[]
+    for (int i=0; i<fields.length; i++) {      
+      fieldIsNull[i] = true;
+      if ((nullByte & (1 << (i%8))) !=0) {
+        fieldIsNull[i] = false;
+        LazyBinaryUtils.checkObjectByteInfo(fieldRefs.get(i).getFieldObjectInspector(), 
+            bytes, lastFieldByteEnd, recordInfo);
+        fieldStart[i]  = lastFieldByteEnd + recordInfo.elementOffset;
+        fieldLength[i] = recordInfo.elementSize;
+        lastFieldByteEnd = fieldStart[i] + fieldLength[i];
+      }  
+
+      // count how many fields are there
+      if (lastFieldByteEnd <= structByteEnd)
+        fieldId ++;
+      // next byte is a null byte if there are more bytes to go
+      if (7 == (i%8)) {
+        if (lastFieldByteEnd < structByteEnd) {
+          nullByte = bytes[lastFieldByteEnd];
+          lastFieldByteEnd ++;
+        } else {
+          // otherwise all null afterwards
+          nullByte = 0;
+          lastFieldByteEnd ++;
+        }        
+      }      
+    }
+      
+    // Extra bytes at the end?
+    if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) {
+      extraFieldWarned = true;
+      LOG.warn("Extra bytes detected at the end of the row! Ignoring similar "
+          + "problems.");
+    }
+    
+    // Missing fields?
+    if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) {
+      missingFieldWarned = true;
+      LOG.warn("Missing fields! Expected " + fields.length + " fields but "
+          + "only got " + fieldId + "! Ignoring similar problems.");
+    }
+
+    Arrays.fill(fieldInited, false);   
+    parsed = true;    
+  }
+  
+  /**
+   * Get one field out of the struct.
+   * 
+   * If the field is a primitive field, return the actual object.
+   * Otherwise return the LazyObject.  This is because PrimitiveObjectInspector
+   * does not have control over the object used by the user - the user simply
+   * directly use the Object instead of going through 
+   * Object PrimitiveObjectInspector.get(Object).  
+   * 
+   * @param fieldID  The field ID
+   * @return         The field as a LazyObject
+   */
+  public Object getField(int fieldID) {
+    if (!parsed) {
+      parse();
+    }
+    return uncheckedGetField(fieldID);
+  }
+  
+  /**
+   * Get the field out of the row without checking parsed.
+   * This is called by both getField and getFieldsAsList.
+   * @param fieldID  The id of the field starting from 0.
+   * @return  The value of the field
+   */
+  private Object uncheckedGetField(int fieldID) {
+    // Test the length first so in most cases we avoid doing a byte[] 
+    // comparison.
+    if (fieldIsNull[fieldID]) {
+      return null;
+    }
+    if (!fieldInited[fieldID]) {
+      fieldInited[fieldID] = true;
+      fields[fieldID].init(bytes, fieldStart[fieldID], fieldLength[fieldID]);
+    }
+    return fields[fieldID].getObject();
+  }
+
+  ArrayList<Object> cachedList;
+  /**
+   * Get the values of the fields as an ArrayList.
+   * @return The values of the fields as an ArrayList.
+   */
+  public ArrayList<Object> getFieldsAsList() {
+    if (!parsed) {
+      parse();
+    }
+    if (cachedList == null) {
+      cachedList = new ArrayList<Object>();
+    } else {
+      cachedList.clear();
+    }
+    for (int i=0; i<fields.length; i++) {
+      cachedList.add(uncheckedGetField(i));
+    }
+    return cachedList;
+  }
+  
+  @Override
+  public Object getObject() {
+    return this;
+  }
+}