You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2009/08/06 04:19:02 UTC
svn commit: r801497 [1/2] - in /hadoop/hive/trunk: ./
serde/src/java/org/apache/hadoop/hive/serde2/lazy/
serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/
serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/
serde/src/java/org/apac...
Author: zshao
Date: Thu Aug 6 02:19:01 2009
New Revision: 801497
URL: http://svn.apache.org/viewvc?rev=801497&view=rev
Log:
HIVE-640. Add LazyBinarySerDe to Hive. (Yuntao Jia via zshao)
Added:
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryListObjectInspector.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryMapObjectInspector.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryStructObjectInspector.java
hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/
hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java
hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java
hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java
Modified:
hadoop/hive/trunk/CHANGES.txt
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardListObjectInspector.java
hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardMapObjectInspector.java
hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java
hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestInnerStruct.java
hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java
Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=801497&r1=801496&r2=801497&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Thu Aug 6 02:19:01 2009
@@ -116,6 +116,8 @@
HIVE-487. Make hive compatibale with hadoop 20
(Todd Lipcon via namit)
+ HIVE-640. Add LazyBinarySerDe to Hive. (Yuntao Jia via zshao)
+
IMPROVEMENTS
HIVE-389. Option to build without ivy (jssarma)
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java?rev=801497&r1=801496&r2=801497&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyMap.java Thu Aug 6 02:19:01 2009
@@ -228,7 +228,6 @@
/**
* Get the value object with the index without checking parsed.
* @param index The index into the array starting from 0
- * @param nullSequence The byte sequence representing the NULL value
*/
private LazyObject uncheckedGetValue(int index) {
Text nullSequence = oi.getNullSequence();
@@ -254,7 +253,6 @@
/**
* Get the key object with the index without checking parsed.
* @param index The index into the array starting from 0
- * @param nullSequence The byte sequence representing the NULL value
*/
private LazyPrimitive<?,?> uncheckedGetKey(int index) {
Text nullSequence = oi.getNullSequence();
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java?rev=801497&r1=801496&r2=801497&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java Thu Aug 6 02:19:01 2009
@@ -39,8 +39,8 @@
* SerDe classes should call the static functions in this library to create an ObjectInspector
* to return to the caller of SerDe2.getObjectInspector().
*
- * The reason of having caches here is that ObjectInspector is because ObjectInspectors do
- * not have an internal state - so ObjectInspectors with the same construction parameters should
+ * The reason of having caches here is that ObjectInspectors do not have an internal
+ * state - so ObjectInspectors with the same construction parameters should
* result in exactly the same ObjectInspector.
*/
public class LazyObjectInspectorFactory {
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryArray.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,236 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazy.LazyObject;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+ * LazyBinaryArray is serialized as follows:
+ * start A b b b b b b end
+ * bytes[] -> |--------|---|---|---|---| ... |---|---|
+ *
+ * Section A is the null-bytes. Suppose the list has
+ * N elements, then there are (N+7)/8 bytes used as null-bytes.
+ * Each bit corresponds to an element and it indicates whether
+ * that element is null (0) or not null (1).
+ *
+ * After A, all b(s) represent the elements of the list.
+ * Each of them is again a LazyBinaryObject.
+ *
+ */
+
+public class LazyBinaryArray extends LazyBinaryNonPrimitive<LazyBinaryListObjectInspector> {
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed = false;
+ /**
+ * The length of the array.
+ * Only valid when the data is parsed.
+ */
+ int arraySize = 0;
+
+ /**
+ * The start positions and lengths of array elements.
+ * Only valid when the data is parsed.
+ */
+ int[] elementStart;
+ int[] elementLength;
+
+ /**
+ * Whether an element is initialized or not
+ */
+ boolean[] elementInited;
+
+ /**
+ * Whether an element is null or not.
+ * Because length is 0 does not means the field is null.
+ * In particular, a 0-length string is not null.
+ */
+ boolean[] elementIsNull;
+
+ /**
+ * The elements of the array. Note that we call
+ * arrayElements[i].init(bytes, begin, length)
+ * only when that element is accessed.
+ */
+ LazyBinaryObject[] arrayElements;
+
+ /**
+ * Construct a LazyBinaryArray object with the ObjectInspector.
+ * @param oi the oi representing the type of this LazyBinaryArray
+ */
+ protected LazyBinaryArray(LazyBinaryListObjectInspector oi) {
+ super(oi);
+ }
+
+ /**
+ * Set the row data for this LazyBinaryArray.
+ * @see LazyObject#init(ByteArrayRef, int, int)
+ */
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ }
+
+ /**
+ * Enlarge the size of arrays storing information for the elements inside
+ * the array.
+ */
+ private void adjustArraySize(int newSize) {
+ if (elementStart == null || elementStart.length < newSize) {
+ elementStart = new int[newSize];
+ elementLength = new int[newSize];
+ elementInited = new boolean[newSize];
+ elementIsNull = new boolean[newSize];
+ arrayElements = new LazyBinaryObject[newSize];
+ }
+ }
+
+ VInt vInt = new LazyBinaryUtils.VInt();
+ RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();
+ /**
+ * Parse the bytes and fill elementStart, elementLength,
+ * elementInited and elementIsNull.
+ */
+ private void parse() {
+
+ byte[] bytes = this.bytes.getData();
+
+ // get the vlong that represents the map size
+ LazyBinaryUtils.readVInt(bytes, start, vInt);
+ arraySize = vInt.value;
+ if(0 == arraySize) {
+ parsed = true;
+ return;
+ }
+
+ // adjust arrays
+ adjustArraySize(arraySize);
+ // find out the null-bytes
+ int arryByteStart = start + vInt.length;
+ int nullByteCur = arryByteStart;
+ int nullByteEnd = arryByteStart + (arraySize+7) / 8;
+ // the begin the real elements
+ int lastElementByteEnd = nullByteEnd;
+ // the list element object inspector
+ ObjectInspector listEleObjectInspector =
+ ((ListObjectInspector)oi).getListElementObjectInspector();
+ // parsing elements one by one
+ for (int i=0; i<arraySize; i++) {
+ elementIsNull[i] = true;
+ if ((bytes[nullByteCur] & (1 << (i%8))) !=0) {
+ elementIsNull[i] = false;
+ LazyBinaryUtils.checkObjectByteInfo(listEleObjectInspector,
+ bytes, lastElementByteEnd, recordInfo);
+ elementStart [i] = lastElementByteEnd + recordInfo.elementOffset;
+ elementLength[i] = recordInfo.elementSize;
+ lastElementByteEnd = elementStart [i] + elementLength[i];
+ }
+ // move onto the next null byte
+ if(7 == (i%8)) {
+ nullByteCur ++;
+ }
+ }
+
+ Arrays.fill(elementInited, 0, arraySize, false);
+ parsed = true;
+ }
+
+ /**
+ * Returns the actual primitive object at the index position
+ * inside the array represented by this LazyBinaryObject.
+ */
+ public Object getListElementObject(int index) {
+ if (!parsed) {
+ parse();
+ }
+ if (index < 0 || index >= arraySize) {
+ return null;
+ }
+ return uncheckedGetElement(index);
+ }
+
+ /**
+ * Get the element without checking out-of-bound index.
+ * @param index index to the array element
+ */
+ private Object uncheckedGetElement(int index) {
+
+ if (elementIsNull[index]) {
+ return null;
+ } else {
+ if (!elementInited[index]) {
+ elementInited[index] = true;
+ if (arrayElements[index] == null) {
+ arrayElements[index] = LazyBinaryFactory.createLazyBinaryObject(
+ ((LazyBinaryListObjectInspector)oi).getListElementObjectInspector());
+ }
+ arrayElements[index].init(bytes, elementStart[index], elementLength[index]);
+ }
+ }
+ return arrayElements[index].getObject();
+ }
+
+ /**
+ * Returns the array size.
+ */
+ public int getListLength() {
+ if (!parsed) {
+ parse();
+ }
+ return arraySize;
+ }
+
+ /**
+ * cachedList is reused every time getList is called.
+ * Different LazyBianryArray instances cannot share
+ * the same cachedList.
+ */
+ ArrayList<Object> cachedList;
+
+ /** Returns the List of actual primitive objects.
+ * Returns null for null array.
+ */
+ public List<Object> getList() {
+ if (!parsed) {
+ parse();
+ }
+ if (cachedList == null) {
+ cachedList = new ArrayList<Object>(arraySize);
+ } else {
+ cachedList.clear();
+ }
+ for (int index=0; index<arraySize; index++) {
+ cachedList.add(uncheckedGetElement(index));
+ }
+ return cachedList;
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryBoolean.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,59 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
+import org.apache.hadoop.io.BooleanWritable;
+
+/**
+ * LazyBinaryObject for boolean which takes one byte
+ *
+ * <p>
+ * Part of the code is adapted from Apache Harmony Project.
+ *
+ * As with the specification, this implementation relied on code laid out in <a
+ * href="http://www.hackersdelight.org/">Henry S. Warren, Jr.'s Hacker's
+ * Delight, (Addison Wesley, 2002)</a> as well as <a
+ * href="http://aggregate.org/MAGIC/">The Aggregate's Magic Algorithms</a>.
+ * </p>
+ *
+ */
+public class LazyBinaryBoolean extends LazyBinaryPrimitive<WritableBooleanObjectInspector, BooleanWritable> {
+
+ public LazyBinaryBoolean(WritableBooleanObjectInspector oi) {
+ super(oi);
+ data = new BooleanWritable();
+ }
+
+ public LazyBinaryBoolean(LazyBinaryBoolean copy) {
+ super(copy);
+ data = new BooleanWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ assert(1 == length);
+ byte val = bytes.getData()[start];
+ if (val == 0) {
+ data.set(false);
+ } else if (val == 1) {
+ data.set(true);
+ }
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryByte.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import org.apache.hadoop.hive.serde2.io.ByteWritable;
+
+/**
+ * LazyBinaryObject for byte which takes one byte
+ */
+public class LazyBinaryByte extends LazyBinaryPrimitive<WritableByteObjectInspector, ByteWritable> {
+
+ LazyBinaryByte(WritableByteObjectInspector oi) {
+ super(oi);
+ data = new ByteWritable();
+ }
+
+ LazyBinaryByte(LazyBinaryByte copy) {
+ super(copy);
+ data = new ByteWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ assert(1 == length);
+ data.set(bytes.getData()[start]);
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryDouble.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+
+/**
+ * LazyBinaryObject for double which takes eight bytes
+ */
+public class LazyBinaryDouble extends LazyBinaryPrimitive<WritableDoubleObjectInspector, DoubleWritable> {
+
+ LazyBinaryDouble(WritableDoubleObjectInspector oi) {
+ super(oi);
+ data = new DoubleWritable();
+ }
+
+ LazyBinaryDouble(LazyBinaryDouble copy) {
+ super(copy);
+ data = new DoubleWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ assert(8 == length);
+ data.set(Double.longBitsToDouble(LazyBinaryUtils.byteArrayToLong(bytes.getData(), start)));
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,91 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryListObjectInspector;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableDoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+
+public class LazyBinaryFactory {
+
+ /**
+ * Create a lazy binary primitive class given the type name.
+ */
+ public static LazyBinaryPrimitive<?,?> createLazyBinaryPrimitiveClass(PrimitiveObjectInspector oi) {
+ PrimitiveCategory p = oi.getPrimitiveCategory();
+ switch(p) {
+ case BOOLEAN: {
+ return new LazyBinaryBoolean((WritableBooleanObjectInspector)oi);
+ }
+ case BYTE: {
+ return new LazyBinaryByte((WritableByteObjectInspector)oi);
+ }
+ case SHORT: {
+ return new LazyBinaryShort((WritableShortObjectInspector)oi);
+ }
+ case INT: {
+ return new LazyBinaryInteger((WritableIntObjectInspector)oi);
+ }
+ case LONG: {
+ return new LazyBinaryLong((WritableLongObjectInspector)oi);
+ }
+ case FLOAT: {
+ return new LazyBinaryFloat((WritableFloatObjectInspector)oi);
+ }
+ case DOUBLE: {
+ return new LazyBinaryDouble((WritableDoubleObjectInspector)oi);
+ }
+ case STRING: {
+ return new LazyBinaryString((WritableStringObjectInspector)oi);
+ }
+ default: {
+ throw new RuntimeException("Internal error: no LazyBinaryObject for " + p);
+ }
+ }
+ }
+
+ /**
+ * Create a hierarchical LazyBinaryObject based on the given typeInfo.
+ */
+ public static LazyBinaryObject createLazyBinaryObject(ObjectInspector oi) {
+ ObjectInspector.Category c = oi.getCategory();
+ switch(c) {
+ case PRIMITIVE:
+ return createLazyBinaryPrimitiveClass((PrimitiveObjectInspector)oi);
+ case MAP:
+ return new LazyBinaryMap((LazyBinaryMapObjectInspector)oi);
+ case LIST:
+ return new LazyBinaryArray((LazyBinaryListObjectInspector)oi);
+ case STRUCT:
+ return new LazyBinaryStruct((LazyBinaryStructObjectInspector)oi);
+ }
+
+ throw new RuntimeException("Hive LazyBinarySerDe Internal error.");
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFloat.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableFloatObjectInspector;
+import org.apache.hadoop.io.FloatWritable;
+
+/**
+ * LazyBinaryObject for float which takes four bytes.
+ */
+public class LazyBinaryFloat extends LazyBinaryPrimitive<WritableFloatObjectInspector, FloatWritable> {
+
+ LazyBinaryFloat(WritableFloatObjectInspector oi) {
+ super(oi);
+ data = new FloatWritable();
+ }
+
+ LazyBinaryFloat(LazyBinaryFloat copy) {
+ super(copy);
+ data = new FloatWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ assert (4 == length);
+ data.set(Float.intBitsToFloat(LazyBinaryUtils.byteArrayToInt(bytes.getData(), start)));
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryInteger.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableIntObjectInspector;
+import org.apache.hadoop.io.IntWritable;
+
+/**
+ * LazyBinaryObject for integer which is serialized as VInt
+ * @see LazyBinaryUtils#readVInt(byte[], int, VInt)
+ */
+public class LazyBinaryInteger extends LazyBinaryPrimitive<WritableIntObjectInspector, IntWritable> {
+
+ LazyBinaryInteger(WritableIntObjectInspector oi) {
+ super(oi);
+ data = new IntWritable();
+ }
+
+ LazyBinaryInteger(LazyBinaryInteger copy) {
+ super(copy);
+ data = new IntWritable(copy.data.get());
+ }
+
+ /**
+ * The reusable vInt for decoding the integer
+ */
+ VInt vInt = new LazyBinaryUtils.VInt();
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ LazyBinaryUtils.readVInt(bytes.getData(), start, vInt);
+ assert(length == vInt.length);
+ data.set(vInt.value);
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryLong.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VLong;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector;
+import org.apache.hadoop.io.LongWritable;
+
+/**
+ * LazyBinaryObject for long which stores as VLong.
+ * @see LazyBinaryUtils#readVLong(byte[], int, VLong)
+ */
+public class LazyBinaryLong extends LazyBinaryPrimitive<WritableLongObjectInspector, LongWritable> {
+
+ LazyBinaryLong(WritableLongObjectInspector oi) {
+ super(oi);
+ data = new LongWritable();
+ }
+
+ LazyBinaryLong(LazyBinaryLong copy) {
+ super(copy);
+ data = new LongWritable(copy.data.get());
+ }
+
+ /**
+ * The reusable vLong for decoding the long
+ */
+ VLong vLong = new LazyBinaryUtils.VLong();
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ LazyBinaryUtils.readVLong(bytes.getData(), start, vLong);
+ assert(length == vLong.length);
+ data.set(vLong.value);
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryMap.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,325 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazy.LazyObject;
+import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+/**
+ * LazyBinaryMap is serialized as follows:
+ * start A b c b c b c end
+ * bytes[] -> |--------|---|---|---|---| ... |---|---|
+ *
+ * Section A is the null-bytes. Suppose the map has
+ * N key-value pairs, then there are (N*2+7)/8 bytes used as null-bytes.
+ * Each bit corresponds to a key or a value and it indicates whether
+ * that key or value is null (0) or not null (1).
+ *
+ * After A, all the bytes are actual serialized data of the map,
+ * which are key-value pairs. b represent the keys and c represent
+ * the values. Each of them is again a LazyBinaryObject.
+ *
+ */
+
+public class LazyBinaryMap extends LazyBinaryNonPrimitive<LazyBinaryMapObjectInspector> {
+
+ private static Log LOG = LogFactory.getLog(LazyBinaryMap.class.getName());
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed;
+
+ /**
+ * The size of the map.
+ * Only valid when the data is parsed.
+ * -1 when the map is NULL.
+ */
+ int mapSize = 0;
+
+ /**
+ * The beginning position and length of key[i] and value[i].
+ * Only valid when the data is parsed.
+ */
+ int[] keyStart;
+ int[] keyLength;
+ int[] valueStart;
+ int[] valueLength;
+ /**
+ * Whether valueObjects[i]/keyObjects[i] is initialized or not.
+ */
+ boolean[] keyInited;
+ boolean[] valueInited;
+
+ /**
+ * Whether valueObjects[i]/keyObjects[i] is null or not
+ * This could not be inferred from the length of the object.
+ * In particular, a 0-length string is not null.
+ */
+ boolean[] keyIsNull;
+ boolean[] valueIsNull;
+
+ /**
+ * The keys are stored in an array of LazyPrimitives.
+ */
+ LazyBinaryPrimitive<?,?>[] keyObjects;
+ /**
+ * The values are stored in an array of LazyObjects.
+ * value[index] will start from KeyEnd[index] + 1,
+ * and ends before KeyStart[index+1] - 1.
+ */
+ LazyBinaryObject[] valueObjects;
+
+ protected LazyBinaryMap(LazyBinaryMapObjectInspector oi) {
+ super(oi);
+ }
+
+ /**
+ * Set the row data for this LazyBinaryMap.
+ * @see LazyBinaryObject#init(ByteArrayRef, int, int)
+ */
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ }
+
+ /**
+ * Adjust the size of arrays:
+ * keyStart, keyLength
+ * valueStart, valueLength
+ * keyInited, keyIsNull
+ * valueInited, valueIsNull
+ */
+ protected void adjustArraySize(int newSize) {
+ if (keyStart == null || keyStart.length < newSize) {
+ keyStart = new int[newSize];
+ keyLength = new int[newSize];
+ valueStart = new int[newSize];
+ valueLength = new int[newSize];
+ keyInited = new boolean[newSize];
+ keyIsNull = new boolean[newSize];
+ valueInited = new boolean[newSize];
+ valueIsNull = new boolean[newSize];
+ keyObjects = new LazyBinaryPrimitive<?,?>[newSize];
+ valueObjects = new LazyBinaryObject[newSize];
+ }
+ }
+
+ boolean nullMapKey = false;
+ VInt vInt = new LazyBinaryUtils.VInt();
+ RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();
+
+ /**
+ * Parse the byte[] and fill keyStart, keyLength, keyIsNull
+ * valueStart, valueLength and valueIsNull
+ */
+ private void parse() {
+
+ byte[] bytes = this.bytes.getData();
+
+ // get the VInt that represents the map size
+ LazyBinaryUtils.readVInt(bytes, start, vInt);
+ mapSize = vInt.value;
+ if(0 == mapSize) {
+ parsed = true;
+ return;
+ }
+
+ // adjust arrays
+ adjustArraySize(mapSize);
+
+ // find out the null-bytes
+ int mapByteStart = start + vInt.length;
+ int nullByteCur = mapByteStart;
+ int nullByteEnd = mapByteStart + (mapSize*2+7) / 8;
+ int lastElementByteEnd = nullByteEnd;
+
+ // parsing the keys and values one by one
+ for (int i=0; i<mapSize; i++) {
+ // parse a key
+ keyIsNull[i] = true;
+ if ((bytes[nullByteCur] & (1 << ((i*2)%8))) !=0) {
+ keyIsNull[i] = false;
+ LazyBinaryUtils.checkObjectByteInfo(((MapObjectInspector)oi).getMapKeyObjectInspector(),
+ bytes, lastElementByteEnd, recordInfo);
+ keyStart[i] = lastElementByteEnd + recordInfo.elementOffset;
+ keyLength[i] = recordInfo.elementSize;
+ lastElementByteEnd = keyStart[i] + keyLength[i];
+ } else if (!nullMapKey) {
+ nullMapKey = true;
+ LOG.warn("Null map key encountered! Ignoring similar problems.");
+ }
+
+ // parse a value
+ valueIsNull[i] = true;
+ if ((bytes[nullByteCur] & (1 << ((i*2+1)%8))) !=0) {
+ valueIsNull[i] = false;
+ LazyBinaryUtils.checkObjectByteInfo(((MapObjectInspector)oi).getMapValueObjectInspector(),
+ bytes, lastElementByteEnd, recordInfo);
+ valueStart[i] = lastElementByteEnd + recordInfo.elementOffset;
+ valueLength[i] = recordInfo.elementSize;
+ lastElementByteEnd = valueStart[i] + valueLength[i];
+ }
+
+ // move onto the next null byte
+ if (3 == (i%4)) {
+ nullByteCur ++;
+ }
+ }
+
+ Arrays.fill(keyInited, 0, mapSize, false);
+ Arrays.fill(valueInited, 0, mapSize, false);
+ parsed = true;
+ }
+
+ /**
+ * Get the value object with the index without checking parsed.
+ * @param index The index into the array starting from 0
+ */
+ private LazyBinaryObject uncheckedGetValue(int index) {
+ if (valueIsNull[index]) {
+ return null;
+ }
+ if (!valueInited[index]) {
+ valueInited[index] = true;
+ if (valueObjects[index] == null) {
+ valueObjects[index] = LazyBinaryFactory.createLazyBinaryObject(
+ ((MapObjectInspector)oi).getMapValueObjectInspector());
+ }
+ valueObjects[index].init(bytes, valueStart[index], valueLength[index]);
+ }
+ return valueObjects[index];
+ }
+
+ /**
+ * Get the value in the map for the key.
+ *
+ * If there are multiple matches (which is possible in the serialized
+ * format), only the first one is returned.
+ *
+ * The most efficient way to get the value for the key is to serialize the
+ * key and then try to find it in the array. We do linear search because in
+ * most cases, user only wants to get one or two values out of the map, and
+ * the cost of building up a HashMap is substantially higher.
+ *
+ * @param key The key object that we are looking for.
+ * @return The corresponding value object, or NULL if not found
+ */
+ public Object getMapValueElement(Object key) {
+ if (!parsed) {
+ parse();
+ }
+ // search for the key
+ for (int i=0; i<mapSize; i++) {
+ LazyBinaryPrimitive<?,?> lazyKeyI = uncheckedGetKey(i);
+ if (lazyKeyI == null) continue;
+ // getWritableObject() will convert LazyPrimitive to actual primitive writable objects.
+ Object keyI = lazyKeyI.getWritableObject();
+ if (keyI == null) continue;
+ if (keyI.equals(key)) {
+ // Got a match, return the value
+ LazyBinaryObject v = uncheckedGetValue(i);
+ return v == null ? v : v.getObject();
+ }
+ }
+ return null;
+ }
+
+
+ /**
+ * Get the key object with the index without checking parsed.
+ * @param index The index into the array starting from 0
+ */
+ private LazyBinaryPrimitive<?,?> uncheckedGetKey(int index) {
+ if (keyIsNull[index]) {
+ return null;
+ }
+ if (!keyInited[index]) {
+ keyInited[index] = true;
+ if (keyObjects[index] == null) {
+ // Keys are always primitive
+ keyObjects[index] = LazyBinaryFactory.createLazyBinaryPrimitiveClass(
+ (PrimitiveObjectInspector)((MapObjectInspector)oi).getMapKeyObjectInspector());
+ }
+ keyObjects[index].init(bytes, keyStart[index], keyLength[index]);
+ }
+ return keyObjects[index];
+ }
+
+ /**
+ * cachedMap is reused for different calls to getMap().
+ * But each LazyBinaryMap has a separate cachedMap so we won't overwrite the
+ * data by accident.
+ */
+ LinkedHashMap<Object, Object> cachedMap;
+
+ /**
+ * Return the map object representing this LazyBinaryMap.
+ * Note that the keyObjects will be Writable primitive objects.
+ * @return the map object
+ */
+ public Map<Object, Object> getMap() {
+ if (!parsed) {
+ parse();
+ }
+ if (cachedMap == null) {
+ // Use LinkedHashMap to provide deterministic order
+ cachedMap = new LinkedHashMap<Object, Object>();
+ } else {
+ cachedMap.clear();
+ }
+
+ // go through each element of the map
+ for (int i = 0; i < mapSize; i++) {
+ LazyBinaryPrimitive<?,?> lazyKey = uncheckedGetKey(i);
+ if (lazyKey == null) continue;
+ Object key = lazyKey.getObject();
+ // do not overwrite if there are duplicate keys
+ if (key != null && !cachedMap.containsKey(key)) {
+ LazyBinaryObject lazyValue = uncheckedGetValue(i);
+ Object value = (lazyValue == null ? null : lazyValue.getObject());
+ cachedMap.put(key, value);
+ }
+ }
+ return cachedMap;
+ }
+
+ /**
+ * Get the size of the map represented by this LazyBinaryMap.
+ * @return The size of the map
+ */
+ public int getMapSize() {
+ if (!parsed) {
+ parse();
+ }
+ return mapSize;
+ }
+}
\ No newline at end of file
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryNonPrimitive.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,53 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+public abstract class LazyBinaryNonPrimitive<OI extends ObjectInspector> extends LazyBinaryObject<OI> {
+
+ protected ByteArrayRef bytes;
+ protected int start;
+ protected int length;
+
+ protected LazyBinaryNonPrimitive(OI oi) {
+ super(oi);
+ bytes = null;
+ start = 0;
+ length = 0;
+ }
+
+ @Override
+ public Object getObject() {
+ return this;
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ if (null == bytes) {
+ throw new RuntimeException("bytes cannot be null!");
+ }
+ if (length <= 0) {
+ throw new RuntimeException("length should be positive!");
+ }
+ this.bytes = bytes;
+ this.start = start;
+ this.length = length;
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryObject.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+ * LazyBinaryObject stores an object in a binary format in a byte[].
+ * For example, a double takes four bytes.
+ *
+ * A LazyBinaryObject can represent any primitive object or hierarchical object
+ * like string, list, map or struct.
+ */
+public abstract class LazyBinaryObject<OI extends ObjectInspector> {
+
+ OI oi;
+
+ /**
+ * Create a LazyBinaryObject.
+ * @param oi Derived classes can access meta information about this Lazy
+ * Binary Object (e.g, length, null-bits) from it.
+ */
+ protected LazyBinaryObject(OI oi) {
+ this.oi = oi;
+ }
+
+ /**
+ * Set the data for this LazyBinaryObject.
+ * We take ByteArrayRef instead of byte[] so that we will be able to drop
+ * the reference to byte[] by a single assignment.
+ * The ByteArrayRef object can be reused across multiple rows.
+ *
+ * Never call this function if the object represent a null!!!
+ *
+ * @param bytes The wrapper of the byte[].
+ * @param start The start position inside the bytes.
+ * @param length The length of the data, starting from "start"
+ * @see ByteArrayRef
+ */
+ public abstract void init(ByteArrayRef bytes, int start, int length);
+
+ /**
+ * If the LazyBinaryObject is a primitive Object, then deserialize it and return
+ * the actual primitive Object.
+ * Otherwise (string, list, map, struct), return this.
+ */
+ public abstract Object getObject();
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryPrimitive.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.io.Writable;
+/**
+ * Defines a LazyBianryPrimitive.
+ *
+ * {@data} will be initialized to corresponding types in
+ * different LazyBinary primitive classes. For example, {@data} will
+ * be a BooleanWritable in the LazyBinaryBoolean class.
+ *
+ * There is no null flag any more,
+ * @see {@link LazyBinaryObject#init(ByteArrayRef, int, int)}
+ *
+ */
+public abstract class LazyBinaryPrimitive<OI extends ObjectInspector,
+T extends Writable> extends LazyBinaryObject<OI> {
+
+ LazyBinaryPrimitive(OI oi) {
+ super(oi);
+ }
+
+ LazyBinaryPrimitive(LazyBinaryPrimitive<OI, T> copy) {
+ super(copy.oi);
+ }
+
+ T data;
+
+ /**
+ * Returns the primitive object represented by this LazyBinaryObject.
+ * This is useful because it can make sure we have "null" for null objects.
+ */
+ public Object getObject() {
+ return data;
+ }
+
+ public T getWritableObject() {
+ return data;
+ }
+
+ public String toString() {
+ return data.toString();
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,469 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.ByteStream;
+import org.apache.hadoop.hive.serde2.Deserializer;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ByteObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.DoubleObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.FloatObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.IntObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+/**
+ * The LazyBinarySerDe class combines the lazy property of
+ * LazySimpleSerDe class and the binary property of BinarySortable
+ * class. Lazy means a field is not deserialized until required.
+ * Binary means a field is serialized in binary compact format.
+ */
+public class LazyBinarySerDe implements SerDe {
+
+ public static final Log LOG = LogFactory.getLog(
+ LazyBinarySerDe.class.getName());
+
+ public LazyBinarySerDe() throws SerDeException {
+ }
+
+ List<String> columnNames;
+ List<TypeInfo> columnTypes;
+
+ TypeInfo rowTypeInfo;
+ ObjectInspector cachedObjectInspector;
+
+ // The object for storing row data
+ LazyBinaryStruct cachedLazyBinaryStruct;
+
+ /**
+ * Initialize the SerDe with configuration and table information
+ * @see SerDe#initialize(Configuration, Properties)
+ */
+ @Override
+ public void initialize(Configuration conf, Properties tbl)
+ throws SerDeException {
+ // Get column names and types
+ String columnNameProperty = tbl.getProperty(Constants.LIST_COLUMNS);
+ String columnTypeProperty = tbl.getProperty(Constants.LIST_COLUMN_TYPES);
+ if (columnNameProperty.length() == 0) {
+ columnNames = new ArrayList<String>();
+ } else {
+ columnNames = Arrays.asList(columnNameProperty.split(","));
+ }
+ if (columnTypeProperty.length() == 0) {
+ columnTypes = new ArrayList<TypeInfo>();
+ } else {
+ columnTypes = TypeInfoUtils.getTypeInfosFromTypeString(columnTypeProperty);
+ }
+ assert(columnNames.size() == columnTypes.size());
+ // Create row related objects
+ rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNames, columnTypes);
+ // Create the object inspector and the lazy binary struct object
+ cachedObjectInspector = LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(rowTypeInfo);
+ cachedLazyBinaryStruct = (LazyBinaryStruct) LazyBinaryFactory
+ .createLazyBinaryObject(cachedObjectInspector);
+ // output debug info
+ LOG.debug("LazyBinarySerDe initialized with: columnNames="
+ + columnNames + " columnTypes=" + columnTypes);
+ }
+
+ /**
+ * Returns the ObjectInspector for the row.
+ * @see Deserializer#getObjectInspector()
+ */
+ @Override
+ public ObjectInspector getObjectInspector() throws SerDeException {
+ return cachedObjectInspector;
+ }
+
+ /**
+ * Returns the Writable Class after serialization.
+ * @see Serializer#getSerializedClass()
+ */
+ @Override
+ public Class<? extends Writable> getSerializedClass() {
+ return BytesWritable.class;
+ }
+
+ // The wrapper for byte array
+ ByteArrayRef byteArrayRef;
+
+ /**
+ * Deserialize a table record to a lazybinary struct.
+ * @see Deserializer#deserialize(Writable)
+ */
+ @Override
+ public Object deserialize(Writable field) throws SerDeException {
+ if (byteArrayRef == null) {
+ byteArrayRef = new ByteArrayRef();
+ }
+ if (field instanceof BytesWritable) {
+ BytesWritable b = (BytesWritable)field;
+ if(b.getSize()==0)
+ return null;
+ // For backward-compatibility with hadoop 0.17
+ byteArrayRef.setData(b.get());
+ cachedLazyBinaryStruct.init(byteArrayRef, 0, b.getSize());
+ } else if (field instanceof Text) {
+ Text t = (Text)field;
+ if(t.getLength()==0)
+ return null;
+ byteArrayRef.setData(t.getBytes());
+ cachedLazyBinaryStruct.init(byteArrayRef, 0, t.getLength());
+ } else {
+ throw new SerDeException(getClass().toString()
+ + ": expects either BytesWritable or Text object!");
+ }
+ return cachedLazyBinaryStruct;
+ }
+
+ /**
+ * The reusable output buffer and serialize byte buffer.
+ */
+ BytesWritable serializeBytesWritable = new BytesWritable();
+ ByteStream.Output serializeByteStream = new ByteStream.Output();
+
+ /**
+ * Serialize an object to a byte buffer in a binary compact way.
+ * @see Serializer#serialize(Object, ObjectInspector)
+ */
+ @Override
+ public Writable serialize(Object obj, ObjectInspector objInspector)
+ throws SerDeException {
+ // make sure it is a struct record
+ if (objInspector.getCategory() != Category.STRUCT) {
+ throw new SerDeException(getClass().toString()
+ + " can only serialize struct types, but we got: "
+ + objInspector.getTypeName());
+ }
+
+ serializeByteStream.reset();
+ // serialize the row as a struct
+ serializeStruct(serializeByteStream, obj,
+ (StructObjectInspector) objInspector);
+ // return the serialized bytes
+ serializeBytesWritable.set(serializeByteStream.getData(),
+ 0, serializeByteStream.getCount());
+ return serializeBytesWritable;
+ }
+
+ boolean nullMapKey = false;
+
+ /**
+ * Serialize a struct object without writing the byte size.
+ * This function is shared by both row serialization and
+ * struct serialization.
+ *
+ * @param byteStream the byte stream storing the serialization data
+ * @param obj the struct object to serialize
+ * @param objInspector the struct object inspector
+ */
+ private void serializeStruct(Output byteStream,
+ Object obj, StructObjectInspector soi) {
+ // do nothing for null struct
+ if(null == obj)
+ return;
+ /*
+ * Interleave serializing one null byte and 8 struct fields
+ * in each round, in order to support data deserialization
+ * with different table schemas
+ */
+ List<? extends StructField> fields = soi.getAllStructFieldRefs();
+ int size = fields.size();
+ int lasti = 0;
+ byte nullByte = 0;
+ for (int i=0; i<size; i++) {
+ // set bit to 1 if a field is not null
+ if (null != soi.getStructFieldData(obj, fields.get(i))) {
+ nullByte |= 1 << (i%8);
+ }
+ // write the null byte every eight elements or
+ // if this is the last element and serialize the
+ // corresponding 8 struct fields at the same time
+ if (7 == i%8 || i == size-1) {
+ serializeByteStream.write(nullByte);
+ for (int j=lasti; j<=i; j++) {
+ serialize(serializeByteStream,
+ soi.getStructFieldData(obj, fields.get(j)),
+ fields.get(j).getFieldObjectInspector());
+ }
+ lasti = i + 1;
+ nullByte = 0;
+ }
+ }
+ }
+
+ /**
+ * A recursive function that serialize an object to a byte buffer
+ * based on its object inspector.
+ * @param byteStream the byte stream storing the serialization data
+ * @param obj the object to serialize
+ * @param objInspector the object inspector
+ * @see LazyBinaryUtils#checkObjectByteInfo(ObjectInspector, byte[], int, LazyBinaryUtils.RecordInfo) for
+ * how the byte sizes of different object are decoded.
+ */
+ private void serialize(Output byteStream,
+ Object obj, ObjectInspector objInspector) {
+
+ // do nothing for null object
+ if (null == obj)
+ return;
+
+ switch (objInspector.getCategory()) {
+ case PRIMITIVE: {
+ PrimitiveObjectInspector poi = (PrimitiveObjectInspector)objInspector;
+ switch (poi.getPrimitiveCategory()) {
+ case VOID: {
+ return;
+ }
+ case BOOLEAN: {
+ BooleanObjectInspector boi = (BooleanObjectInspector)poi;
+ boolean v = ((BooleanObjectInspector)poi).get(obj);
+ byteStream.write((byte)(v ? 1 : 0));
+ return;
+ }
+ case BYTE: {
+ ByteObjectInspector boi = (ByteObjectInspector)poi;
+ byte v = boi.get(obj);
+ byteStream.write(v);
+ return;
+ }
+ case SHORT: {
+ ShortObjectInspector spoi = (ShortObjectInspector)poi;
+ short v = spoi.get(obj);
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case INT: {
+ IntObjectInspector ioi = (IntObjectInspector)poi;
+ int v = ioi.get(obj);
+ LazyBinaryUtils.writeVInt(byteStream, v);
+ return;
+ }
+ case LONG: {
+ LongObjectInspector loi = (LongObjectInspector)poi;
+ long v = loi.get(obj);
+ LazyBinaryUtils.writeVLong(byteStream, v);
+ return;
+ }
+ case FLOAT: {
+ FloatObjectInspector foi = (FloatObjectInspector)poi;
+ int v = Float.floatToIntBits(foi.get(obj));
+ byteStream.write((byte) (v >> 24));
+ byteStream.write((byte) (v >> 16));
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case DOUBLE: {
+ DoubleObjectInspector doi = (DoubleObjectInspector)poi;
+ long v = Double.doubleToLongBits(doi.get(obj));
+ byteStream.write((byte) (v >> 56));
+ byteStream.write((byte) (v >> 48));
+ byteStream.write((byte) (v >> 40));
+ byteStream.write((byte) (v >> 32));
+ byteStream.write((byte) (v >> 24));
+ byteStream.write((byte) (v >> 16));
+ byteStream.write((byte) (v >> 8));
+ byteStream.write((byte) (v));
+ return;
+ }
+ case STRING: {
+ StringObjectInspector soi = (StringObjectInspector)poi;
+ Text t = soi.getPrimitiveWritableObject(obj);
+ /* write byte size of the string which is a vint */
+ int length = t.getLength();
+ LazyBinaryUtils.writeVInt(byteStream, length);
+ /* write string itself */
+ byte[] data = t.getBytes();
+ byteStream.write(data, 0, length);
+ return;
+ }
+ default: {
+ throw new RuntimeException("Unrecognized type: " + poi.getPrimitiveCategory());
+ }
+ }
+ }
+ case LIST: {
+ ListObjectInspector loi = (ListObjectInspector)objInspector;
+ ObjectInspector eoi = loi.getListElementObjectInspector();
+
+ // 1/ reserve spaces for the byte size of the list
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int listStart = byteStream.getCount();
+
+ // 2/ write the size of the list as a VInt
+ int size = loi.getListLength(obj);
+ LazyBinaryUtils.writeVInt(byteStream, size);
+
+ // 3/ write the null bytes
+ byte nullByte = 0;
+ for (int eid = 0; eid < size; eid++) {
+ // set the bit to 1 if an element is not null
+ if (null != loi.getListElement(obj, eid)) {
+ nullByte |= 1 << (eid%8);
+ }
+ // store the byte every eight elements or
+ // if this is the last element
+ if (7 == eid%8 || eid == size-1) {
+ byteStream.write(nullByte);
+ nullByte = 0;
+ }
+ }
+
+ // 4/ write element by element from the list
+ for (int eid = 0; eid < size; eid++) {
+ serialize(byteStream, loi.getListElement(obj, eid), eoi);
+ }
+
+ // 5/ update the list byte size
+ int listEnd = byteStream.getCount();
+ int listSize = listEnd - listStart;
+ byte [] bytes = byteStream.getData();
+ bytes[byteSizeStart ] = (byte) (listSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (listSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (listSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (listSize);
+
+ return;
+ }
+ case MAP: {
+ MapObjectInspector moi = (MapObjectInspector)objInspector;
+ ObjectInspector koi = moi.getMapKeyObjectInspector();
+ ObjectInspector voi = moi.getMapValueObjectInspector();
+ Map<?, ?> map = moi.getMap(obj);
+
+ // 1/ reserve spaces for the byte size of the map
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int mapStart = byteStream.getCount();
+
+ // 2/ write the size of the map which is a VInt
+ int size = map.size();
+ LazyBinaryUtils.writeVInt(byteStream, size);
+
+ // 3/ write the null bytes
+ int b = 0;
+ byte nullByte = 0;
+ for (Map.Entry<?, ?> entry: map.entrySet()) {
+ // set the bit to 1 if a key is not null
+ if (null != entry.getKey()) {
+ nullByte |= 1 << (b%8);
+ } else if (!nullMapKey) {
+ nullMapKey = true;
+ LOG.warn("Null map key encountered! Ignoring similar problems.");
+ }
+ b++;
+ // set the bit to 1 if a value is not null
+ if (null != entry.getValue()) {
+ nullByte |= 1 << (b%8);
+ }
+ b++;
+ // write the byte to stream every 4 key-value pairs
+ // or if this is the last key-value pair
+ if (0 == b%8 || b == size*2) {
+ byteStream.write(nullByte);
+ nullByte = 0;
+ }
+ }
+
+ // 4/ write key-value pairs one by one
+ for(Map.Entry<?, ?> entry: map.entrySet()) {
+ serialize(byteStream, entry.getKey(), koi);
+ serialize(byteStream, entry.getValue(), voi);
+ }
+
+ // 5/ update the byte size of the map
+ int mapEnd = byteStream.getCount();
+ int mapSize = mapEnd - mapStart;
+ byte [] bytes = byteStream.getData();
+ bytes[byteSizeStart ] = (byte) (mapSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (mapSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (mapSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (mapSize);
+
+ return;
+ }
+ case STRUCT: {
+ // 1/ reserve spaces for the byte size of the struct
+ // which is a integer and takes four bytes
+ int byteSizeStart = byteStream.getCount();
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ byteStream.write((byte) 0);
+ int structStart = byteStream.getCount();
+
+ // 2/ serialize the struct
+ serializeStruct(byteStream, obj, (StructObjectInspector) objInspector);
+
+ // 3/ update the byte size of the struct
+ int structEnd = byteStream.getCount();
+ int structSize = structEnd - structStart;
+ byte [] bytes = byteStream.getData();
+ bytes[byteSizeStart ] = (byte) (structSize >> 24);
+ bytes[byteSizeStart + 1] = (byte) (structSize >> 16);
+ bytes[byteSizeStart + 2] = (byte) (structSize >> 8);
+ bytes[byteSizeStart + 3] = (byte) (structSize);
+
+ return;
+ }
+ default: {
+ throw new RuntimeException("Unrecognized type: " + objInspector.getCategory());
+ }
+ }
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryShort.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,44 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.io.ShortWritable;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableShortObjectInspector;
+
+/**
+ * LazyBinaryObject for short which takes two bytes.
+ */
+public class LazyBinaryShort extends LazyBinaryPrimitive<WritableShortObjectInspector, ShortWritable> {
+
+ LazyBinaryShort(WritableShortObjectInspector oi) {
+ super(oi);
+ data = new ShortWritable();
+ }
+
+ LazyBinaryShort(LazyBinaryShort copy) {
+ super(copy);
+ data = new ShortWritable(copy.data.get());
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ assert(2 == length);
+ data.set(LazyBinaryUtils.byteArrayToShort(bytes.getData(), start));
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryString.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,50 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableStringObjectInspector;
+
+/**
+ * The serialization of LazyBinaryString is very simple:
+ * start A end
+ * bytes[] -> |---------------------------------|
+ *
+ * Section A is just an array of bytes which are exactly
+ * the Text contained in this object.
+ *
+ */
+public class LazyBinaryString extends LazyBinaryPrimitive<WritableStringObjectInspector, Text> {
+
+ LazyBinaryString(WritableStringObjectInspector OI) {
+ super(OI);
+ data = new Text();
+ }
+
+ public LazyBinaryString(LazyBinaryString copy) {
+ super(copy);
+ data = new Text(copy.data);
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ assert(length > -1);
+ data.set(bytes.getData(), start, length);
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryStruct.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,231 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.RecordInfo;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+
+/**
+ * LazyBinaryStruct is serialized as follows:
+ * start A B A B A B end
+ * bytes[] -> |-----|---------|--- ... ---|-----|---------|
+ *
+ * Section A is one null-byte, corresponding to eight struct fields in Section B.
+ * Each bit indicates whether the corresponding field is null (0) or not null (1).
+ * Each field is a LazyBinaryObject.
+ *
+ * Following B, there is another section A and B. This pattern repeats until the
+ * all struct fields are serialized.
+ */
+public class LazyBinaryStruct extends LazyBinaryNonPrimitive<LazyBinaryStructObjectInspector> {
+
+ private static Log LOG = LogFactory.getLog(LazyBinaryStruct.class.getName());
+
+ /**
+ * Whether the data is already parsed or not.
+ */
+ boolean parsed;
+
+ /**
+ * The fields of the struct.
+ */
+ LazyBinaryObject[] fields;
+
+ /**
+ * Whether a field is initialized or not.
+ */
+ boolean[] fieldInited;
+
+ /**
+ * Whether a field is null or not.
+ * Because length is 0 does not means the field is null.
+ * In particular, a 0-length string is not null.
+ */
+ boolean[] fieldIsNull;
+
+ /**
+ * The start positions and lengths of struct fields.
+ * Only valid when the data is parsed.
+ */
+ int[] fieldStart;
+ int[] fieldLength;
+
+ /**
+ * Construct a LazyBinaryStruct object with an ObjectInspector.
+ */
+ protected LazyBinaryStruct(LazyBinaryStructObjectInspector oi) {
+ super(oi);
+ }
+
+ @Override
+ public void init(ByteArrayRef bytes, int start, int length) {
+ super.init(bytes, start, length);
+ parsed = false;
+ }
+
+ RecordInfo recordInfo = new LazyBinaryUtils.RecordInfo();
+ boolean missingFieldWarned = false;
+ boolean extraFieldWarned = false;
+ /**
+ * Parse the byte[] and fill fieldStart, fieldLength,
+ * fieldInited and fieldIsNull.
+ */
+ private void parse() {
+
+ List<? extends StructField> fieldRefs = ((StructObjectInspector)oi).getAllStructFieldRefs();
+
+ if (fields == null) {
+ fields = new LazyBinaryObject[fieldRefs.size()];
+ for (int i = 0 ; i < fields.length; i++) {
+ fields[i] = LazyBinaryFactory.createLazyBinaryObject(fieldRefs.get(i).getFieldObjectInspector());
+ }
+ fieldInited = new boolean[fields.length];
+ fieldIsNull = new boolean[fields.length];
+ fieldStart = new int[fields.length];
+ fieldLength = new int[fields.length];
+ }
+
+ /**
+ * Please note that one null byte is followed by eight fields,
+ * then more null byte and fields.
+ */
+
+ int fieldId = 0;
+ int structByteEnd = start + length;
+ byte[] bytes = this.bytes.getData();
+
+ byte nullByte = bytes[start];
+ int lastFieldByteEnd = start + 1;
+ // Go through all bytes in the byte[]
+ for (int i=0; i<fields.length; i++) {
+ fieldIsNull[i] = true;
+ if ((nullByte & (1 << (i%8))) !=0) {
+ fieldIsNull[i] = false;
+ LazyBinaryUtils.checkObjectByteInfo(fieldRefs.get(i).getFieldObjectInspector(),
+ bytes, lastFieldByteEnd, recordInfo);
+ fieldStart[i] = lastFieldByteEnd + recordInfo.elementOffset;
+ fieldLength[i] = recordInfo.elementSize;
+ lastFieldByteEnd = fieldStart[i] + fieldLength[i];
+ }
+
+ // count how many fields are there
+ if (lastFieldByteEnd <= structByteEnd)
+ fieldId ++;
+ // next byte is a null byte if there are more bytes to go
+ if (7 == (i%8)) {
+ if (lastFieldByteEnd < structByteEnd) {
+ nullByte = bytes[lastFieldByteEnd];
+ lastFieldByteEnd ++;
+ } else {
+ // otherwise all null afterwards
+ nullByte = 0;
+ lastFieldByteEnd ++;
+ }
+ }
+ }
+
+ // Extra bytes at the end?
+ if (!extraFieldWarned && lastFieldByteEnd < structByteEnd) {
+ extraFieldWarned = true;
+ LOG.warn("Extra bytes detected at the end of the row! Ignoring similar "
+ + "problems.");
+ }
+
+ // Missing fields?
+ if (!missingFieldWarned && lastFieldByteEnd > structByteEnd) {
+ missingFieldWarned = true;
+ LOG.warn("Missing fields! Expected " + fields.length + " fields but "
+ + "only got " + fieldId + "! Ignoring similar problems.");
+ }
+
+ Arrays.fill(fieldInited, false);
+ parsed = true;
+ }
+
+ /**
+ * Get one field out of the struct.
+ *
+ * If the field is a primitive field, return the actual object.
+ * Otherwise return the LazyObject. This is because PrimitiveObjectInspector
+ * does not have control over the object used by the user - the user simply
+ * directly use the Object instead of going through
+ * Object PrimitiveObjectInspector.get(Object).
+ *
+ * @param fieldID The field ID
+ * @return The field as a LazyObject
+ */
+ public Object getField(int fieldID) {
+ if (!parsed) {
+ parse();
+ }
+ return uncheckedGetField(fieldID);
+ }
+
+ /**
+ * Get the field out of the row without checking parsed.
+ * This is called by both getField and getFieldsAsList.
+ * @param fieldID The id of the field starting from 0.
+ * @return The value of the field
+ */
+ private Object uncheckedGetField(int fieldID) {
+ // Test the length first so in most cases we avoid doing a byte[]
+ // comparison.
+ if (fieldIsNull[fieldID]) {
+ return null;
+ }
+ if (!fieldInited[fieldID]) {
+ fieldInited[fieldID] = true;
+ fields[fieldID].init(bytes, fieldStart[fieldID], fieldLength[fieldID]);
+ }
+ return fields[fieldID].getObject();
+ }
+
+ ArrayList<Object> cachedList;
+ /**
+ * Get the values of the fields as an ArrayList.
+ * @return The values of the fields as an ArrayList.
+ */
+ public ArrayList<Object> getFieldsAsList() {
+ if (!parsed) {
+ parse();
+ }
+ if (cachedList == null) {
+ cachedList = new ArrayList<Object>();
+ } else {
+ cachedList.clear();
+ }
+ for (int i=0; i<fields.length; i++) {
+ cachedList.add(uncheckedGetField(i));
+ }
+ return cachedList;
+ }
+
+ @Override
+ public Object getObject() {
+ return this;
+ }
+}