You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by zs...@apache.org on 2009/08/06 04:19:02 UTC
svn commit: r801497 [2/2] - in /hadoop/hive/trunk: ./
serde/src/java/org/apache/hadoop/hive/serde2/lazy/
serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/
serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/
serde/src/java/org/apac...
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,344 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.ByteStream.Output;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
+import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.io.WritableUtils;
+
+public class LazyBinaryUtils {
+
+ /**
+ * Convert the byte array to an int starting from the given offset.
+ * Refer to code by aeden on DZone Snippets:
+ * @param b the byte array
+ * @param offset the array offset
+ * @return the integer
+ */
+ public static int byteArrayToInt(byte[] b, int offset) {
+ int value = 0;
+ for (int i = 0; i < 4; i++) {
+ int shift = (4 - 1 - i) * 8;
+ value += (b[i + offset] & 0x000000FF) << shift;
+ }
+ return value;
+ }
+
+ /**
+ * Convert the byte array to a long starting from the given offset.
+ * @param b the byte array
+ * @param offset the array offset
+ * @return the long
+ */
+ public static long byteArrayToLong(byte[] b, int offset) {
+ long value = 0;
+ for (int i = 0; i < 8; i++) {
+ int shift = (8 - 1 - i) * 8;
+ value += ((long) (b[i + offset] & 0x00000000000000FF)) << shift;
+ }
+ return value;
+ }
+
+ /**
+ * Convert the byte array to a short starting from the given offset.
+ * @param b the byte array
+ * @param offset the array offset
+ * @return the short
+ */
+ public static short byteArrayToShort(byte[] b, int offset) {
+ short value = 0;
+ value += (b[offset ] & 0x000000FF) << 8;
+ value += (b[offset+1] & 0x000000FF);
+ return value;
+ }
+
+ /**
+ * Record is the unit that data is serialized in.
+ * A record includes two parts. The first part stores the
+ * size of the element and the second part stores the
+ * real element.
+ * size element
+ * record -> |----|-------------------------|
+ *
+ * A RecordInfo stores two information of a record,
+ * the size of the "size" part which is the element offset
+ * and the size of the element part which is element size.
+ */
+ public static class RecordInfo {
+ public RecordInfo () {
+ elementOffset = 0;
+ elementSize = 0;
+ }
+ public byte elementOffset;
+ public int elementSize;
+ }
+
+ static VInt vInt = new LazyBinaryUtils.VInt();
+ /**
+ * Check a particular field and set its size and offset in bytes
+ * based on the field type and the bytes arrays.
+ *
+ * For void, boolean, byte, short, int, long, float and double,
+ * there is no offset and the size is fixed. For string, map,
+ * list, struct, the first four bytes are used to store the size.
+ * So the offset is 4 and the size is computed by concating the
+ * first four bytes together. The first four bytes are defined
+ * with respect to the offset in the bytes arrays.
+ *
+ * @param objectInspector object inspector of the field
+ * @param bytes bytes arrays store the table row
+ * @param offset offset of this field
+ * @param recordInfo modify this byteinfo object and return it
+ * @return size and offset in bytes of this field
+ */
+ public static void checkObjectByteInfo(ObjectInspector objectInspector, byte[] bytes, int offset, RecordInfo recordInfo) {
+ Category category = objectInspector.getCategory();
+ switch (category) {
+ case PRIMITIVE:
+ PrimitiveCategory primitiveCategory = ((PrimitiveObjectInspector)objectInspector).getPrimitiveCategory();
+ switch (primitiveCategory) {
+ case VOID:
+ case BOOLEAN:
+ case BYTE:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 1;
+ break;
+ case SHORT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 2;
+ break;
+ case FLOAT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 4;
+ break;
+ case DOUBLE:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = 8;
+ break;
+ case INT:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
+ break;
+ case LONG:
+ recordInfo.elementOffset = 0;
+ recordInfo.elementSize = WritableUtils.decodeVIntSize(bytes[offset]);
+ break;
+ case STRING:
+ // using vint instead of 4 bytes
+ LazyBinaryUtils.readVInt(bytes, offset, vInt);
+ recordInfo.elementOffset = vInt.length;
+ recordInfo.elementSize = vInt.value;
+ break;
+ default: {
+ throw new RuntimeException("Unrecognized primitive type: " + primitiveCategory);
+ }
+ }
+ break;
+ case LIST:
+ case MAP:
+ case STRUCT:
+ recordInfo.elementOffset = 4;
+ recordInfo.elementSize = LazyBinaryUtils.byteArrayToInt(bytes, offset);
+ break;
+ default : {
+ throw new RuntimeException("Unrecognized non-primitive type: " + category);
+ }
+ }
+ }
+
+ /**
+ * A zero-compressed encoded long
+ * @see WritableUtils#readVLong(java.io.DataInput)
+ */
+ public static class VLong {
+ public VLong() {
+ value = 0;
+ length = 0;
+ }
+ public long value;
+ public byte length;
+ };
+
+ /**
+ * Reads a zero-compressed encoded long from a byte array and returns it.
+ * @param bytes the byte array
+ * @param offset offset of the array to read from
+ * @param vlong storing the deserialized long and its size in byte
+ * @see WritableUtils#readVLong(java.io.DataInput)
+ */
+ public static void readVLong(byte[] bytes, int offset, VLong vlong) {
+ byte firstByte = bytes[offset];
+ vlong.length = (byte)WritableUtils.decodeVIntSize(firstByte);
+ if (vlong.length == 1) {
+ vlong.value = firstByte;
+ return;
+ }
+ long i = 0;
+ for (int idx = 0; idx < vlong.length-1; idx++) {
+ byte b = bytes[offset+1+idx];
+ i = i << 8;
+ i = i | (b & 0xFF);
+ }
+ vlong.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1L) : i);
+ }
+
+ /**
+ * A zero-compressed encoded integer
+ * @see WritableUtils#readVInt(java.io.DataInput)
+ */
+ public static class VInt {
+ public VInt() {
+ value = 0;
+ length = 0;
+ }
+ public int value;
+ public byte length;
+ };
+
+ /**
+ * Reads a zero-compressed encoded int from a byte array and returns it.
+ * @param bytes the byte array
+ * @param offset offset of the array to read from
+ * @param vint storing the deserialized int and its size in byte
+ * @see WritableUtils#readVInt(java.io.DataInput)
+ */
+ public static void readVInt(byte[] bytes, int offset, VInt vInt) {
+ byte firstByte = bytes[offset];
+ vInt.length = (byte)WritableUtils.decodeVIntSize(firstByte);
+ if (vInt.length == 1) {
+ vInt.value = firstByte;
+ return;
+ }
+ int i = 0;
+ for (int idx = 0; idx < vInt.length-1; idx++) {
+ byte b = bytes[offset+1+idx];
+ i = i << 8;
+ i = i | (b & 0xFF);
+ }
+ vInt.value = (WritableUtils.isNegativeVInt(firstByte) ? (i ^ -1) : i);
+ }
+
+ /**
+ * Writes a zero-compressed encoded int to a byte array.
+ * @param byteStream the byte array/stream
+ * @param i the int
+ * @see LazyBinaryUtils#writeVLong(Output, long)
+ */
+ public static void writeVInt(Output byteStream, int i) {
+ writeVLong(byteStream, i);
+ }
+
+ /**
+ * Write a zero-compressed encoded long to a byte array.
+ * @param byteStream the byte array/stream
+ * @param l the long
+ * @see WritableUtils#writeVLong(java.io.DataOutput, long)
+ */
+ public static void writeVLong(Output byteStream, long l) {
+ if (l >= -112 && l <= 127) {
+ byteStream.write((byte)l);
+ return;
+ }
+
+ int len = -112;
+ if (l < 0) {
+ l ^= -1L; // take one's complement'
+ len = -120;
+ }
+
+ long tmp = l;
+ while (tmp != 0) {
+ tmp = tmp >> 8;
+ len--;
+ }
+
+ byteStream.write((byte)len);
+
+ len = (len < -120) ? -(len + 120) : -(len + 112);
+
+ for (int idx = len; idx != 0; idx--) {
+ int shiftbits = (idx - 1) * 8;
+ long mask = 0xFFL << shiftbits;
+ byteStream.write((byte)((l & mask) >> shiftbits));
+ }
+ }
+
+ static HashMap<TypeInfo, ObjectInspector> cachedLazyBinaryObjectInspector = new HashMap<TypeInfo, ObjectInspector>();
+
+ /**
+ * Returns the lazy binary object inspector that can be used to inspect an
+ * lazy binary object of that typeInfo
+ *
+ * For primitive types, we use the standard writable object inspector.
+ */
+ public static ObjectInspector getLazyBinaryObjectInspectorFromTypeInfo(TypeInfo typeInfo) {
+ ObjectInspector result = cachedLazyBinaryObjectInspector.get(typeInfo);
+ if (result == null) {
+ switch(typeInfo.getCategory()) {
+ case PRIMITIVE: {
+ result = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(
+ ((PrimitiveTypeInfo)typeInfo).getPrimitiveCategory());
+ break;
+ }
+ case LIST: {
+ ObjectInspector elementObjectInspector = getLazyBinaryObjectInspectorFromTypeInfo(
+ ((ListTypeInfo)typeInfo).getListElementTypeInfo());
+ result = LazyBinaryObjectInspectorFactory.getLazyBinaryListObjectInspector(elementObjectInspector);
+ break;
+ }
+ case MAP: {
+ MapTypeInfo mapTypeInfo = (MapTypeInfo)typeInfo;
+ ObjectInspector keyObjectInspector = getLazyBinaryObjectInspectorFromTypeInfo(mapTypeInfo.getMapKeyTypeInfo());
+ ObjectInspector valueObjectInspector = getLazyBinaryObjectInspectorFromTypeInfo(mapTypeInfo.getMapValueTypeInfo());
+ result = LazyBinaryObjectInspectorFactory.getLazyBinaryMapObjectInspector(keyObjectInspector, valueObjectInspector);
+ break;
+ }
+ case STRUCT: {
+ StructTypeInfo structTypeInfo = (StructTypeInfo)typeInfo;
+ List<String> fieldNames = structTypeInfo.getAllStructFieldNames();
+ List<TypeInfo> fieldTypeInfos = structTypeInfo.getAllStructFieldTypeInfos();
+ List<ObjectInspector> fieldObjectInspectors = new ArrayList<ObjectInspector>(fieldTypeInfos.size());
+ for(int i=0; i<fieldTypeInfos.size(); i++) {
+ fieldObjectInspectors.add(getLazyBinaryObjectInspectorFromTypeInfo(fieldTypeInfos.get(i)));
+ }
+ result = LazyBinaryObjectInspectorFactory.getLazyBinaryStructObjectInspector(fieldNames, fieldObjectInspectors);
+ break;
+ }
+ default: {
+ result = null;
+ }
+ }
+ cachedLazyBinaryObjectInspector.put(typeInfo, result);
+ }
+ return result;
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryListObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryListObjectInspector.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryListObjectInspector.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryListObjectInspector.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary.objectinspector;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryArray;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+ * ObjectInspector for LazyBinaryList
+ * @see LazyBinaryList
+ */
+public class LazyBinaryListObjectInspector extends StandardListObjectInspector {
+
+ protected LazyBinaryListObjectInspector(
+ ObjectInspector listElementObjectInspector) {
+ super(listElementObjectInspector);
+ }
+
+ @Override
+ public List<?> getList(Object data) {
+ if (data == null) {
+ return null;
+ }
+ LazyBinaryArray array = (LazyBinaryArray) data;
+ return array.getList();
+ }
+
+ @Override
+ public Object getListElement(Object data, int index) {
+ if (data == null) {
+ return null;
+ }
+ LazyBinaryArray array = (LazyBinaryArray) data;
+ return array.getListElementObject(index);
+ }
+
+ @Override
+ public int getListLength(Object data) {
+ if (data == null) {
+ return -1;
+ }
+ LazyBinaryArray array = (LazyBinaryArray) data;
+ return array.getListLength();
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryMapObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryMapObjectInspector.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryMapObjectInspector.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryMapObjectInspector.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary.objectinspector;
+
+import java.util.Map;
+
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryMap;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardMapObjectInspector;
+
+/**
+ * ObjectInspector for LazyBinaryMap
+ * @see LazyBinaryMap
+ */
+public class LazyBinaryMapObjectInspector extends StandardMapObjectInspector {
+
+ protected LazyBinaryMapObjectInspector(ObjectInspector mapKeyObjectInspector,
+ ObjectInspector mapValueObjectInspector) {
+ super(mapKeyObjectInspector, mapValueObjectInspector);
+ }
+
+ @Override
+ public Map<?, ?> getMap(Object data) {
+ if (data == null) {
+ return null;
+ }
+ return ((LazyBinaryMap)data).getMap();
+ }
+
+ @Override
+ public int getMapSize(Object data) {
+ if (data == null) {
+ return -1;
+ }
+ return ((LazyBinaryMap)data).getMapSize();
+ }
+
+ @Override
+ public Object getMapValueElement(Object data, Object key) {
+ if (data == null) {
+ return -1;
+ }
+ return ((LazyBinaryMap)data).getMapValueElement(key);
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,66 @@
+package org.apache.hadoop.hive.serde2.lazybinary.objectinspector;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+/**
+ * ObjectInspectorFactory is the primary way to create new ObjectInspector instances.
+ *
+ * SerDe classes should call the static functions in this library to create an ObjectInspector
+ * to return to the caller of SerDe2.getObjectInspector().
+ *
+ * The reason of having caches here is that ObjectInspectors do not have an internal
+ * state - so ObjectInspectors with the same construction parameters should
+ * result in exactly the same ObjectInspector.
+ */
+
+public class LazyBinaryObjectInspectorFactory {
+
+ static HashMap<ArrayList<Object>, LazyBinaryStructObjectInspector> cachedLazyBinaryStructObjectInspector =
+ new HashMap<ArrayList<Object>, LazyBinaryStructObjectInspector>();
+ public static LazyBinaryStructObjectInspector getLazyBinaryStructObjectInspector(List<String> structFieldNames,
+ List<ObjectInspector> structFieldObjectInspectors) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(structFieldNames);
+ signature.add(structFieldObjectInspectors);
+ LazyBinaryStructObjectInspector result = cachedLazyBinaryStructObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyBinaryStructObjectInspector(structFieldNames, structFieldObjectInspectors);
+ cachedLazyBinaryStructObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ static HashMap<ArrayList<Object>, LazyBinaryListObjectInspector> cachedLazyBinaryListObjectInspector =
+ new HashMap<ArrayList<Object>, LazyBinaryListObjectInspector>();
+ public static LazyBinaryListObjectInspector getLazyBinaryListObjectInspector(
+ ObjectInspector listElementObjectInspector) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(listElementObjectInspector);
+ LazyBinaryListObjectInspector result = cachedLazyBinaryListObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyBinaryListObjectInspector(listElementObjectInspector);
+ cachedLazyBinaryListObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+
+ static HashMap<ArrayList<Object>, LazyBinaryMapObjectInspector> cachedLazyBinaryMapObjectInspector =
+ new HashMap<ArrayList<Object>, LazyBinaryMapObjectInspector>();
+ public static LazyBinaryMapObjectInspector getLazyBinaryMapObjectInspector(
+ ObjectInspector mapKeyObjectInspector, ObjectInspector mapValueObjectInspector) {
+ ArrayList<Object> signature = new ArrayList<Object>();
+ signature.add(mapKeyObjectInspector);
+ signature.add(mapValueObjectInspector);
+ LazyBinaryMapObjectInspector result = cachedLazyBinaryMapObjectInspector.get(signature);
+ if (result == null) {
+ result = new LazyBinaryMapObjectInspector(mapKeyObjectInspector,
+ mapValueObjectInspector);
+ cachedLazyBinaryMapObjectInspector.put(signature, result);
+ }
+ return result;
+ }
+}
Added: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryStructObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryStructObjectInspector.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryStructObjectInspector.java (added)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryStructObjectInspector.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,63 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary.objectinspector;
+
+import java.util.List;
+
+import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+
+/**
+ * ObjectInspector for LazyBinaryStruct
+ * @see LazyBinaryStruct
+ */
+public class LazyBinaryStructObjectInspector extends StandardStructObjectInspector {
+
+ protected LazyBinaryStructObjectInspector(List<String> structFieldNames, List<ObjectInspector> structFieldObjectInspectors) {
+ super(structFieldNames, structFieldObjectInspectors);
+ }
+
+ protected LazyBinaryStructObjectInspector(List<StructField> fields) {
+ super(fields);
+ }
+
+ @Override
+ public Object getStructFieldData(Object data, StructField fieldRef) {
+ if (data == null) {
+ return null;
+ }
+ LazyBinaryStruct struct = (LazyBinaryStruct)data;
+ MyField f = (MyField) fieldRef;
+
+ int fieldID = f.getFieldID();
+ assert(fieldID >= 0 && fieldID < fields.size());
+
+ return struct.getField(fieldID);
+ }
+
+ @Override
+ public List<Object> getStructFieldsDataAsList(Object data) {
+ if (data == null) {
+ return null;
+ }
+ LazyBinaryStruct struct = (LazyBinaryStruct)data;
+ return struct.getFieldsAsList();
+ }
+}
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java?rev=801497&r1=801496&r2=801497&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorUtils.java Thu Aug 6 02:19:01 2009
@@ -462,7 +462,7 @@
case LIST: {
ListObjectInspector loi1 = (ListObjectInspector)oi1;
ListObjectInspector loi2 = (ListObjectInspector)oi2;
- int minimum = Math.min(loi1.getListLength(o1), loi1.getListLength(o2));
+ int minimum = Math.min(loi1.getListLength(o1), loi2.getListLength(o2));
for (int i=0; i<minimum; i++) {
int r = compare(
loi1.getListElement(o1, i),
@@ -471,7 +471,7 @@
loi2.getListElementObjectInspector());
if (r != 0) return r;
}
- return loi1.getListLength(o1) - loi1.getListLength(o2);
+ return loi1.getListLength(o1) - loi2.getListLength(o2);
}
case MAP: {
throw new RuntimeException("Compare on map type not supported!");
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardListObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardListObjectInspector.java?rev=801497&r1=801496&r2=801497&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardListObjectInspector.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardListObjectInspector.java Thu Aug 6 02:19:01 2009
@@ -27,7 +27,7 @@
* Always use the ObjectInspectorFactory to create new ObjectInspector objects, instead
* of directly creating an instance of this class.
*/
-class StandardListObjectInspector implements ListObjectInspector {
+public class StandardListObjectInspector implements ListObjectInspector {
ObjectInspector listElementObjectInspector;
Modified: hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardMapObjectInspector.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardMapObjectInspector.java?rev=801497&r1=801496&r2=801497&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardMapObjectInspector.java (original)
+++ hadoop/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/StandardMapObjectInspector.java Thu Aug 6 02:19:01 2009
@@ -32,7 +32,7 @@
* Always use the ObjectInspectorFactory to create new ObjectInspector objects, instead
* of directly creating an instance of this class.
*/
-class StandardMapObjectInspector implements MapObjectInspector {
+public class StandardMapObjectInspector implements MapObjectInspector {
ObjectInspector mapKeyObjectInspector;
ObjectInspector mapValueObjectInspector;
Modified: hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java?rev=801497&r1=801496&r2=801497&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java (original)
+++ hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestClass.java Thu Aug 6 02:19:01 2009
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.hadoop.hive.serde2.binarysortable;
import java.util.List;
@@ -12,4 +29,21 @@
String myString;
MyTestInnerStruct myStruct;
List<Integer> myList;
+
+ public MyTestClass() {
+ }
+
+ public MyTestClass(Byte b, Short s, Integer i, Long l,
+ Float f, Double d, String st, MyTestInnerStruct is,
+ List<Integer> li) {
+ this.myByte = b;
+ this.myShort = s;
+ this.myInt = i;
+ this.myLong = l;
+ this.myFloat = f;
+ this.myDouble = d;
+ this.myString = st;
+ this.myStruct = is;
+ this.myList = li;
+ }
}
Modified: hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestInnerStruct.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestInnerStruct.java?rev=801497&r1=801496&r2=801497&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestInnerStruct.java (original)
+++ hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/MyTestInnerStruct.java Thu Aug 6 02:19:01 2009
@@ -1,3 +1,20 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
package org.apache.hadoop.hive.serde2.binarysortable;
public class MyTestInnerStruct {
Modified: hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java?rev=801497&r1=801496&r2=801497&view=diff
==============================================================================
--- hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java (original)
+++ hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/binarysortable/TestBinarySortableSerDe.java Thu Aug 6 02:19:01 2009
@@ -47,7 +47,7 @@
return r;
}
- public String hexString(BytesWritable bytes) {
+ public static String hexString(BytesWritable bytes) {
StringBuilder sb = new StringBuilder();
for (int i=0; i<bytes.getSize(); i++) {
byte b = bytes.get()[i];
@@ -55,7 +55,7 @@
sb.append(String.format("x%02x", v));
}
return sb.toString();
- }
+ }
private SerDe getSerDe(String fieldNames, String fieldTypes, String order) throws Throwable {
Properties schema = new Properties();
@@ -117,7 +117,7 @@
}
}
- static String getRandString(Random r) {
+ public static String getRandString(Random r) {
int length = r.nextInt(10);
StringBuilder sb = new StringBuilder();
for(int i=0; i<length; i++) {
@@ -126,7 +126,7 @@
return sb.toString();
}
- static List<Integer> getRandIntegerArray(Random r) {
+ public static List<Integer> getRandIntegerArray(Random r) {
int length = r.nextInt(10);
ArrayList<Integer> result = new ArrayList<Integer>(length);
for(int i=0; i<length; i++) {
Added: hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java (added)
+++ hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassBigger.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,54 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct;
+
+public class MyTestClassBigger {
+ Byte myByte;
+ Short myShort;
+ Integer myInt;
+ Long myLong;
+ Float myFloat;
+ Double myDouble;
+ String myString;
+ MyTestInnerStruct myStruct;
+ List<Integer> myList;
+ Map<String, List<MyTestInnerStruct>> myMap;
+
+ public MyTestClassBigger() {
+ }
+
+ public MyTestClassBigger(Byte b, Short s, Integer i, Long l,
+ Float f, Double d, String st, MyTestInnerStruct is,
+ List<Integer> li, Map<String, List<MyTestInnerStruct>> mp) {
+ this.myByte = b;
+ this.myShort = s;
+ this.myInt = i;
+ this.myLong = l;
+ this.myFloat = f;
+ this.myDouble = d;
+ this.myString = st;
+ this.myStruct = is;
+ this.myList = li;
+ this.myMap = mp;
+ }
+}
Added: hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java (added)
+++ hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/MyTestClassSmaller.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct;
+
+public class MyTestClassSmaller {
+ Byte myByte;
+ Short myShort;
+ Integer myInt;
+ Long myLong;
+ Float myFloat;
+ Double myDouble;
+ String myString;
+ MyTestInnerStruct myStruct;
+
+ public MyTestClassSmaller() {
+ }
+
+ public MyTestClassSmaller(Byte b, Short s, Integer i, Long l,
+ Float f, Double d, String st, MyTestInnerStruct is) {
+ this.myByte = b;
+ this.myShort = s;
+ this.myInt = i;
+ this.myLong = l;
+ this.myFloat = f;
+ this.myDouble = d;
+ this.myString = st;
+ this.myStruct = is;
+ }
+}
Added: hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java?rev=801497&view=auto
==============================================================================
--- hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java (added)
+++ hadoop/hive/trunk/serde/src/test/org/apache/hadoop/hive/serde2/lazybinary/TestLazyBinarySerDe.java Thu Aug 6 02:19:01 2009
@@ -0,0 +1,468 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.lazybinary;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Properties;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.serde.Constants;
+import org.apache.hadoop.hive.serde2.SerDe;
+import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.binarysortable.MyTestClass;
+import org.apache.hadoop.hive.serde2.binarysortable.MyTestInnerStruct;
+import org.apache.hadoop.hive.serde2.binarysortable.TestBinarySortableSerDe;
+import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryMapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
+import org.apache.hadoop.io.BytesWritable;
+
+import junit.framework.TestCase;
+
+public class TestLazyBinarySerDe extends TestCase {
+
+ /**
+ * Generate a random struct array
+ * @param r random number generator
+ * @return an struct array
+ */
+ static List<MyTestInnerStruct> getRandStructArray(Random r) {
+ int length = r.nextInt(10);
+ ArrayList<MyTestInnerStruct> result = new ArrayList<MyTestInnerStruct>(length);
+ for(int i=0; i<length; i++) {
+ MyTestInnerStruct ti = new MyTestInnerStruct(r.nextInt(), r.nextInt());
+ result.add(ti);
+ }
+ return result;
+ }
+
+ /**
+ * Initialize the LazyBinarySerDe
+ * @param fieldNames table field names
+ * @param fieldTypes table field types
+ * @return the initialized LazyBinarySerDe
+ * @throws Throwable
+ */
+ private SerDe getSerDe(String fieldNames, String fieldTypes) throws Throwable {
+ Properties schema = new Properties();
+ schema.setProperty(Constants.LIST_COLUMNS, fieldNames);
+ schema.setProperty(Constants.LIST_COLUMN_TYPES, fieldTypes);
+
+ LazyBinarySerDe serde = new LazyBinarySerDe();
+ serde.initialize(new Configuration(), schema);
+ return serde;
+ }
+
+ /**
+ * Test the LazyBinarySerDe.
+ * @param rows array of structs to be serialized
+ * @param rowOI array of struct object inspectors
+ * @param serde the serde
+ * @throws Throwable
+ */
+ private void testLazyBinarySerDe(Object[] rows,
+ ObjectInspector rowOI, SerDe serde) throws Throwable{
+
+ ObjectInspector serdeOI = serde.getObjectInspector();
+
+ // Try to serialize
+ BytesWritable bytes[] = new BytesWritable[rows.length];
+ for (int i=0; i<rows.length; i++) {
+ BytesWritable s = (BytesWritable)serde.serialize(rows[i], rowOI);
+ bytes[i] = new BytesWritable();
+ bytes[i].set(s);
+ }
+
+ // Try to deserialize
+ Object[] deserialized = new Object[rows.length];
+ for (int i=0; i<rows.length; i++) {
+ deserialized[i] = serde.deserialize(bytes[i]);
+ if (0 != ObjectInspectorUtils.compare(rows[i], rowOI, deserialized[i], serdeOI)) {
+ System.out.println("structs[" + i + "] = " + SerDeUtils.getJSONString(rows[i], rowOI));
+ System.out.println("deserialized[" + i + "] = " + SerDeUtils.getJSONString(deserialized[i], serdeOI));
+ System.out.println("serialized[" + i + "] = " + TestBinarySortableSerDe.hexString(bytes[i]));
+ assertEquals(rows[i], deserialized[i]);
+ }
+ }
+ }
+
+ /**
+ * Compare two structs that have different number of fields.
+ * We just compare the first few common fields, ignoring the
+ * fields existing in one struct but not the other.
+ *
+ * @see ObjectInspectorUtils#compare(Object, ObjectInspector, Object, ObjectInspector)
+ */
+ int compareDiffSizedStructs(Object o1, ObjectInspector oi1, Object o2, ObjectInspector oi2) {
+ StructObjectInspector soi1 = (StructObjectInspector)oi1;
+ StructObjectInspector soi2 = (StructObjectInspector)oi2;
+ List<? extends StructField> fields1 = soi1.getAllStructFieldRefs();
+ List<? extends StructField> fields2 = soi2.getAllStructFieldRefs();
+ int minimum = Math.min(fields1.size(), fields2.size());
+ for (int i=0; i<minimum; i++) {
+ int result = ObjectInspectorUtils.compare(
+ soi1.getStructFieldData(o1, fields1.get(i)),
+ fields1.get(i).getFieldObjectInspector(),
+ soi2.getStructFieldData(o2, fields2.get(i)),
+ fields2.get(i).getFieldObjectInspector());
+ if (result != 0) {
+ return result;
+ }
+ }
+ return 0;
+ }
+
+
+ /**
+ * Test shorter schema deserialization where a bigger struct is serialized
+ * and it is then deserialized with a smaller struct.
+ * Here the serialized struct has 10 fields and we deserialized to a
+ * struct of 9 fields.
+ */
+ private void testShorterSchemaDeserialization(Random r) throws Throwable{
+
+ StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClassBigger.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
+ String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
+ SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
+ ObjectInspector serdeOI1 = serde1.getObjectInspector();
+
+ StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClass.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
+ String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
+ SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
+ ObjectInspector serdeOI2 = serde2.getObjectInspector();
+
+ int num = 100;
+ for (int itest=0; itest<num; itest++) {
+ int randField = r.nextInt(11);
+ Byte b = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
+ Short s = randField > 1 ? null : Short.valueOf((short)r.nextInt());
+ Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
+ Long l = randField > 3 ? null : Long.valueOf(r.nextLong());
+ Float f = randField > 4 ? null : Float.valueOf(r.nextFloat());
+ Double d = randField > 5 ? null : Double.valueOf(r.nextDouble());
+ String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
+ MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
+ List<Integer> li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
+ Map<String, List<MyTestInnerStruct>> mp = new HashMap<String, List<MyTestInnerStruct>>();
+ String key = TestBinarySortableSerDe.getRandString(r);
+ List<MyTestInnerStruct> value = randField > 10 ? null: getRandStructArray(r);
+ mp.put(key, value);
+ String key1 = TestBinarySortableSerDe.getRandString(r);
+ mp.put(key1, null);
+ String key2 = TestBinarySortableSerDe.getRandString(r);
+ List<MyTestInnerStruct> value2 = getRandStructArray(r);
+ mp.put(key2, value2);
+
+ MyTestClassBigger input = new MyTestClassBigger(b,s,n,l,f,d,st,is,li,mp);
+ BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);
+ Object output = serde2.deserialize(bw);
+
+ if(0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
+ System.out.println("structs = " + SerDeUtils.getJSONString(input, rowOI1));
+ System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
+ System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
+ assertEquals(input, output);
+ }
+ }
+ }
+
+ /**
+ * Test shorter schema deserialization where a bigger struct is serialized
+ * and it is then deserialized with a smaller struct.
+ * Here the serialized struct has 9 fields and we deserialized to a
+ * struct of 8 fields.
+ */
+ private void testShorterSchemaDeserialization1(Random r) throws Throwable{
+
+ StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClass.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
+ String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
+ SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
+ ObjectInspector serdeOI1 = serde1.getObjectInspector();
+
+ StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClassSmaller.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
+ String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
+ SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
+ ObjectInspector serdeOI2 = serde2.getObjectInspector();
+
+ int num = 100;
+ for (int itest=0; itest<num; itest++) {
+ int randField = r.nextInt(10);
+ Byte b = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
+ Short s = randField > 1 ? null : Short.valueOf((short)r.nextInt());
+ Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
+ Long l = randField > 3 ? null : Long.valueOf(r.nextLong());
+ Float f = randField > 4 ? null : Float.valueOf(r.nextFloat());
+ Double d = randField > 5 ? null : Double.valueOf(r.nextDouble());
+ String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
+ MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
+ List<Integer> li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
+
+ MyTestClass input = new MyTestClass(b,s,n,l,f,d,st,is,li);
+ BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);
+ Object output = serde2.deserialize(bw);
+
+ if(0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
+ System.out.println("structs = " + SerDeUtils.getJSONString(input, rowOI1));
+ System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
+ System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
+ assertEquals(input, output);
+ }
+ }
+ }
+
+ /**
+ * Test longer schema deserialization where a smaller struct is serialized
+ * and it is then deserialized with a bigger struct
+ * Here the serialized struct has 9 fields and we deserialized to a
+ * struct of 10 fields.
+ */
+ void testLongerSchemaDeserialization(Random r) throws Throwable{
+
+ StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClass.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
+ String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
+ SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
+ ObjectInspector serdeOI1 = serde1.getObjectInspector();
+
+ StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClassBigger.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
+ String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
+ SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
+ ObjectInspector serdeOI2 = serde2.getObjectInspector();
+
+ int num = 100;
+ for (int itest=0; itest<num; itest++) {
+ int randField = r.nextInt(10);
+ Byte b = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
+ Short s = randField > 1 ? null : Short.valueOf((short)r.nextInt());
+ Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
+ Long l = randField > 3 ? null : Long.valueOf(r.nextLong());
+ Float f = randField > 4 ? null : Float.valueOf(r.nextFloat());
+ Double d = randField > 5 ? null : Double.valueOf(r.nextDouble());
+ String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
+ MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
+ List<Integer> li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
+
+ MyTestClass input = new MyTestClass(b,s,n,l,f,d,st,is,li);
+ BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);
+ Object output = serde2.deserialize(bw);
+
+ if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
+ System.out.println("structs = " + SerDeUtils.getJSONString(input, rowOI1));
+ System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
+ System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
+ assertEquals(input, output);
+ }
+ }
+ }
+
+ /**
+ * Test longer schema deserialization where a smaller struct is serialized
+ * and it is then deserialized with a bigger struct
+ * Here the serialized struct has 8 fields and we deserialized to a
+ * struct of 9 fields.
+ */
+ void testLongerSchemaDeserialization1(Random r) throws Throwable{
+
+ StructObjectInspector rowOI1 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClassSmaller.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames1 = ObjectInspectorUtils.getFieldNames(rowOI1);
+ String fieldTypes1 = ObjectInspectorUtils.getFieldTypes(rowOI1);
+ SerDe serde1 = getSerDe(fieldNames1, fieldTypes1);
+ ObjectInspector serdeOI1 = serde1.getObjectInspector();
+
+ StructObjectInspector rowOI2 = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClass.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames2 = ObjectInspectorUtils.getFieldNames(rowOI2);
+ String fieldTypes2 = ObjectInspectorUtils.getFieldTypes(rowOI2);
+ SerDe serde2 = getSerDe(fieldNames2, fieldTypes2);
+ ObjectInspector serdeOI2 = serde2.getObjectInspector();
+
+ int num = 100;
+ for (int itest=0; itest<num; itest++) {
+ int randField = r.nextInt(9);
+ Byte b = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
+ Short s = randField > 1 ? null : Short.valueOf((short)r.nextInt());
+ Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
+ Long l = randField > 3 ? null : Long.valueOf(r.nextLong());
+ Float f = randField > 4 ? null : Float.valueOf(r.nextFloat());
+ Double d = randField > 5 ? null : Double.valueOf(r.nextDouble());
+ String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
+ MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
+
+ MyTestClassSmaller input = new MyTestClassSmaller(b,s,n,l,f,d,st,is);
+ BytesWritable bw = (BytesWritable) serde1.serialize(input, rowOI1);
+ Object output = serde2.deserialize(bw);
+
+ if (0 != compareDiffSizedStructs(input, rowOI1, output, serdeOI2)) {
+ System.out.println("structs = " + SerDeUtils.getJSONString(input, rowOI1));
+ System.out.println("deserialized = " + SerDeUtils.getJSONString(output, serdeOI2));
+ System.out.println("serialized = " + TestBinarySortableSerDe.hexString(bw));
+ assertEquals(input, output);
+ }
+ }
+ }
+
+ void testLazyBinaryMap(Random r) throws Throwable {
+
+ StructObjectInspector rowOI = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClassBigger.class,
+ ObjectInspectorOptions.JAVA);
+ String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
+ String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
+ SerDe serde = getSerDe(fieldNames, fieldTypes);
+ ObjectInspector serdeOI = serde.getObjectInspector();
+
+ StructObjectInspector soi1 = (StructObjectInspector)serdeOI;
+ List<? extends StructField> fields1 = soi1.getAllStructFieldRefs();
+ LazyBinaryMapObjectInspector lazympoi = (LazyBinaryMapObjectInspector) fields1.get(9).getFieldObjectInspector();
+ ObjectInspector lazympkeyoi = lazympoi.getMapKeyObjectInspector();
+ ObjectInspector lazympvalueoi = lazympoi.getMapValueObjectInspector();
+
+ StructObjectInspector soi2 = (StructObjectInspector)rowOI;
+ List<? extends StructField> fields2 = soi2.getAllStructFieldRefs();
+ MapObjectInspector inputmpoi = (MapObjectInspector) fields2.get(9).getFieldObjectInspector();
+ ObjectInspector inputmpkeyoi = inputmpoi.getMapKeyObjectInspector();
+ ObjectInspector inputmpvalueoi = inputmpoi.getMapValueObjectInspector();
+
+ int num = 100;
+ for (int testi=0; testi<num; testi++) {
+
+ Map<String, List<MyTestInnerStruct>> mp = new LinkedHashMap<String, List<MyTestInnerStruct>>();
+
+ int randFields = r.nextInt(10);
+ for (int i=0; i<randFields; i++) {
+ String key = TestBinarySortableSerDe.getRandString(r);
+ int randField = r.nextInt(10);
+ List<MyTestInnerStruct> value = randField > 4 ? null: getRandStructArray(r);
+ mp.put(key, value);
+ }
+
+ MyTestClassBigger input = new MyTestClassBigger(null,null,null,null,null,null,null,null,null,mp);
+ BytesWritable bw = (BytesWritable) serde.serialize(input, rowOI);
+ Object output = serde.deserialize(bw);
+ Object lazyobj = soi1.getStructFieldData(output, fields1.get(9));
+ Map<?, ?> outputmp = lazympoi.getMap(lazyobj);
+
+ if (outputmp.size() != mp.size()) {
+ throw new RuntimeException("Map size changed from " + mp.size() + " to " + outputmp.size() + " after serialization!");
+ }
+
+ for (Map.Entry<?, ?> entryinput: mp.entrySet()) {
+ boolean bEqual = false;
+ for (Map.Entry<?, ?> entryoutput: outputmp.entrySet()) {
+ // find the same key
+ if (0 == ObjectInspectorUtils.compare(entryoutput.getKey(), lazympkeyoi, entryinput.getKey(), inputmpkeyoi)) {
+ if(0 != ObjectInspectorUtils.compare(entryoutput.getValue(), lazympvalueoi, entryinput.getValue(), inputmpvalueoi)) {
+ assertEquals(entryoutput.getValue(), entryinput.getValue());
+ } else {
+ bEqual = true;
+ }
+ break;
+ }
+ }
+ if(!bEqual)
+ throw new RuntimeException("Could not find matched key in deserialized map : " + entryinput.getKey());
+ }
+ }
+ }
+
+ /**
+ * The test entrance function
+ * @throws Throwable
+ */
+ public void testLazyBinarySerDe() throws Throwable {
+ try {
+
+ System.out.println("Beginning Test TestLazyBinarySerDe:");
+
+ // generate the data
+ int num = 1000;
+ Random r = new Random(1234);
+ MyTestClass rows[] = new MyTestClass[num];
+ for (int i=0; i<num; i++) {
+ int randField = r.nextInt(10);
+ Byte b = randField > 0 ? null : Byte.valueOf((byte)r.nextInt());
+ Short s = randField > 1 ? null : Short.valueOf((short)r.nextInt());
+ Integer n = randField > 2 ? null : Integer.valueOf(r.nextInt());
+ Long l = randField > 3 ? null : Long.valueOf(r.nextLong());
+ Float f = randField > 4 ? null : Float.valueOf(r.nextFloat());
+ Double d = randField > 5 ? null : Double.valueOf(r.nextDouble());
+ String st = randField > 6 ? null : TestBinarySortableSerDe.getRandString(r);
+ MyTestInnerStruct is = randField > 7 ? null : new MyTestInnerStruct(r.nextInt(5)-2, r.nextInt(5)-2);
+ List<Integer> li = randField > 8 ? null: TestBinarySortableSerDe.getRandIntegerArray(r);
+ MyTestClass t = new MyTestClass(b,s,n,l,f,d,st,is,li);
+ rows[i] = t;
+ }
+
+ StructObjectInspector rowOI = (StructObjectInspector)ObjectInspectorFactory
+ .getReflectionObjectInspector(MyTestClass.class,
+ ObjectInspectorOptions.JAVA);
+
+ String fieldNames = ObjectInspectorUtils.getFieldNames(rowOI);
+ String fieldTypes = ObjectInspectorUtils.getFieldTypes(rowOI);
+
+ // call the tests
+ // 1/ test LazyBinarySerDe
+ testLazyBinarySerDe(rows, rowOI, getSerDe(fieldNames, fieldTypes));
+ // 2/ test LazyBinaryMap
+ testLazyBinaryMap(r);
+ // 3/ test serialization and deserialization with different schemas
+ testShorterSchemaDeserialization(r);
+ // 4/ test serialization and deserialization with different schemas
+ testLongerSchemaDeserialization(r);
+ // 5/ test serialization and deserialization with different schemas
+ testShorterSchemaDeserialization1(r);
+ // 6/ test serialization and deserialization with different schemas
+ testLongerSchemaDeserialization1(r);
+
+ System.out.println("Test TestLazyBinarySerDe passed!");
+ } catch (Throwable e) {
+ e.printStackTrace();
+ throw e;
+ }
+ }
+}