You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/09/09 17:18:30 UTC

svn commit: r1623845 [3/3] - in /hive/trunk: hbase-handler/ hbase-handler/if/ hbase-handler/if/test/ hbase-handler/src/java/org/apache/hadoop/hive/hbase/ hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/ hbase-handler/src/test/org/apache/hado...

Added: hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java (added)
+++ hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java Tue Sep  9 15:18:29 2014
@@ -0,0 +1,194 @@
+/**
+ * Autogenerated by Avro
+ * 
+ * DO NOT EDIT DIRECTLY
+ */
+package org.apache.hadoop.hive.hbase.avro;  
+@SuppressWarnings("all")
+@org.apache.avro.specific.AvroGenerated
+public class OfficePhone extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
+  public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"OfficePhone\",\"namespace\":\"org.apache.hadoop.hive.hbase.avro\",\"fields\":[{\"name\":\"areaCode\",\"type\":\"long\"},{\"name\":\"number\",\"type\":\"long\"}]}");
+  public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
+  @Deprecated public long areaCode;
+  @Deprecated public long number;
+
+  /**
+   * Default constructor.  Note that this does not initialize fields
+   * to their default values from the schema.  If that is desired then
+   * one should use <code>newBuilder()</code>. 
+   */
+  public OfficePhone() {}
+
+  /**
+   * All-args constructor.
+   */
+  public OfficePhone(java.lang.Long areaCode, java.lang.Long number) {
+    this.areaCode = areaCode;
+    this.number = number;
+  }
+
+  public org.apache.avro.Schema getSchema() { return SCHEMA$; }
+  // Used by DatumWriter.  Applications should not call. 
+  public java.lang.Object get(int field$) {
+    switch (field$) {
+    case 0: return areaCode;
+    case 1: return number;
+    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    }
+  }
+  // Used by DatumReader.  Applications should not call. 
+  @SuppressWarnings(value="unchecked")
+  public void put(int field$, java.lang.Object value$) {
+    switch (field$) {
+    case 0: areaCode = (java.lang.Long)value$; break;
+    case 1: number = (java.lang.Long)value$; break;
+    default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+    }
+  }
+
+  /**
+   * Gets the value of the 'areaCode' field.
+   */
+  public java.lang.Long getAreaCode() {
+    return areaCode;
+  }
+
+  /**
+   * Sets the value of the 'areaCode' field.
+   * @param value the value to set.
+   */
+  public void setAreaCode(java.lang.Long value) {
+    this.areaCode = value;
+  }
+
+  /**
+   * Gets the value of the 'number' field.
+   */
+  public java.lang.Long getNumber() {
+    return number;
+  }
+
+  /**
+   * Sets the value of the 'number' field.
+   * @param value the value to set.
+   */
+  public void setNumber(java.lang.Long value) {
+    this.number = value;
+  }
+
+  /** Creates a new OfficePhone RecordBuilder */
+  public static org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder newBuilder() {
+    return new org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder();
+  }
+  
+  /** Creates a new OfficePhone RecordBuilder by copying an existing Builder */
+  public static org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder other) {
+    return new org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder(other);
+  }
+  
+  /** Creates a new OfficePhone RecordBuilder by copying an existing OfficePhone instance */
+  public static org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.OfficePhone other) {
+    return new org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder(other);
+  }
+  
+  /**
+   * RecordBuilder for OfficePhone instances.
+   */
+  public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<OfficePhone>
+    implements org.apache.avro.data.RecordBuilder<OfficePhone> {
+
+    private long areaCode;
+    private long number;
+
+    /** Creates a new Builder */
+    private Builder() {
+      super(org.apache.hadoop.hive.hbase.avro.OfficePhone.SCHEMA$);
+    }
+    
+    /** Creates a Builder by copying an existing Builder */
+    private Builder(org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder other) {
+      super(other);
+      if (isValidValue(fields()[0], other.areaCode)) {
+        this.areaCode = data().deepCopy(fields()[0].schema(), other.areaCode);
+        fieldSetFlags()[0] = true;
+      }
+      if (isValidValue(fields()[1], other.number)) {
+        this.number = data().deepCopy(fields()[1].schema(), other.number);
+        fieldSetFlags()[1] = true;
+      }
+    }
+    
+    /** Creates a Builder by copying an existing OfficePhone instance */
+    private Builder(org.apache.hadoop.hive.hbase.avro.OfficePhone other) {
+            super(org.apache.hadoop.hive.hbase.avro.OfficePhone.SCHEMA$);
+      if (isValidValue(fields()[0], other.areaCode)) {
+        this.areaCode = data().deepCopy(fields()[0].schema(), other.areaCode);
+        fieldSetFlags()[0] = true;
+      }
+      if (isValidValue(fields()[1], other.number)) {
+        this.number = data().deepCopy(fields()[1].schema(), other.number);
+        fieldSetFlags()[1] = true;
+      }
+    }
+
+    /** Gets the value of the 'areaCode' field */
+    public java.lang.Long getAreaCode() {
+      return areaCode;
+    }
+    
+    /** Sets the value of the 'areaCode' field */
+    public org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder setAreaCode(long value) {
+      validate(fields()[0], value);
+      this.areaCode = value;
+      fieldSetFlags()[0] = true;
+      return this; 
+    }
+    
+    /** Checks whether the 'areaCode' field has been set */
+    public boolean hasAreaCode() {
+      return fieldSetFlags()[0];
+    }
+    
+    /** Clears the value of the 'areaCode' field */
+    public org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder clearAreaCode() {
+      fieldSetFlags()[0] = false;
+      return this;
+    }
+
+    /** Gets the value of the 'number' field */
+    public java.lang.Long getNumber() {
+      return number;
+    }
+    
+    /** Sets the value of the 'number' field */
+    public org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder setNumber(long value) {
+      validate(fields()[1], value);
+      this.number = value;
+      fieldSetFlags()[1] = true;
+      return this; 
+    }
+    
+    /** Checks whether the 'number' field has been set */
+    public boolean hasNumber() {
+      return fieldSetFlags()[1];
+    }
+    
+    /** Clears the value of the 'number' field */
+    public org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder clearNumber() {
+      fieldSetFlags()[1] = false;
+      return this;
+    }
+
+    @Override
+    public OfficePhone build() {
+      try {
+        OfficePhone record = new OfficePhone();
+        record.areaCode = fieldSetFlags()[0] ? this.areaCode : (java.lang.Long) defaultValue(fields()[0]);
+        record.number = fieldSetFlags()[1] ? this.number : (java.lang.Long) defaultValue(fields()[1]);
+        return record;
+      } catch (Exception e) {
+        throw new org.apache.avro.AvroRuntimeException(e);
+      }
+    }
+  }
+}

Modified: hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java (original)
+++ hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java Tue Sep  9 15:18:29 2014
@@ -37,6 +37,8 @@ public class serdeConstants {
 
   public static final String SERIALIZATION_CLASS = "serialization.class";
 
+  public static final String SERIALIZATION_TYPE = "serialization.type";
+
   public static final String SERIALIZATION_FORMAT = "serialization.format";
 
   public static final String SERIALIZATION_DDL = "serialization.ddl";

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java Tue Sep  9 15:18:29 2014
@@ -17,6 +17,7 @@
  */
 package org.apache.hadoop.hive.serde2.avro;
 
+import java.io.ByteArrayInputStream;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.DataOutputStream;
@@ -25,6 +26,7 @@ import java.io.InputStream;
 import java.rmi.server.UID;
 
 import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileStream;
 import org.apache.avro.generic.GenericData;
 import org.apache.avro.generic.GenericDatumReader;
 import org.apache.avro.generic.GenericDatumWriter;
@@ -101,6 +103,28 @@ public class AvroGenericRecordWritable i
     GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(schema);
     record = gdr.read(record, binaryDecoder);
   }
+  
+  public void readFields(byte[] bytes, int offset, int length, Schema writerSchema, Schema readerSchema) throws IOException {
+    fileSchema = writerSchema;
+    record = new GenericData.Record(writerSchema);
+    binaryDecoder =
+        DecoderFactory.get().binaryDecoder(bytes, offset, length - offset,
+	          binaryDecoder);
+    GenericDatumReader<GenericRecord> gdr =
+        new GenericDatumReader<GenericRecord>(writerSchema, readerSchema);
+    record = gdr.read(null, binaryDecoder);
+  }
+
+  public void readFields(byte[] bytes, Schema writerSchema, Schema readerSchema) throws IOException {
+    fileSchema = writerSchema;
+    record = new GenericData.Record(writerSchema);
+    GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
+    gdr.setExpected(readerSchema);
+    ByteArrayInputStream is = new ByteArrayInputStream(bytes);
+    DataFileStream<GenericRecord> dfr = new DataFileStream<GenericRecord>(is, gdr);
+    record = dfr.next(record);
+    dfr.close();
+  }
 
   public UID getRecordReaderID() {
     return recordReaderID;
@@ -117,5 +141,4 @@ public class AvroGenericRecordWritable i
   public void setFileSchema(Schema originalSchema) {
     this.fileSchema = originalSchema;
   }
-
 }

Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java Tue Sep  9 15:18:29 2014
@@ -0,0 +1,506 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.avro;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.commons.lang.ClassUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazy.LazyArray;
+import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
+import org.apache.hadoop.hive.serde2.lazy.LazyMap;
+import org.apache.hadoop.hive.serde2.lazy.LazyObject;
+import org.apache.hadoop.hive.serde2.lazy.LazyStruct;
+import org.apache.hadoop.hive.serde2.lazy.LazyUnion;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyListObjectInspector;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.io.Text;
+
+/**
+ * Lazy objectinspector for avro serialization
+ * */
+public class AvroLazyObjectInspector extends LazySimpleStructObjectInspector {
+
+  /**
+   * Reader {@link Schema} for the avro data
+   * */
+  private Schema readerSchema;
+
+  /**
+   * {@link AvroSchemaRetriever} to retrieve avro schema
+   * */
+  private AvroSchemaRetriever schemaRetriever;
+
+  /**
+   * LOGGER
+   * */
+  public static final Log LOG = LogFactory.getLog(AvroLazyObjectInspector.class);
+
+  /**
+   * Constructor
+   *
+   * @param structFieldNames fields within the given protobuf object
+   * @param structFieldObjectInspectors object inspectors for the fields
+   * @param structFieldComments comments for the given fields
+   * @param separator separator between different fields
+   * @param nullSequence sequence to represent null value
+   * @param lastColumnTakesRest whether the last column of the struct should take the rest of the
+   *          row if there are extra fields.
+   * @param escaped whether the data is escaped or not
+   * @param escapeChar if escaped is true, the escape character
+   * */
+  public AvroLazyObjectInspector(List<String> structFieldNames,
+      List<ObjectInspector> structFieldObjectInspectors, List<String> structFieldComments,
+      byte separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped,
+      byte escapeChar) {
+    super(structFieldNames, structFieldObjectInspectors, structFieldComments, separator,
+        nullSequence, lastColumnTakesRest, escaped, escapeChar);
+  }
+
+  /**
+   * Set the reader schema for the {@link AvroLazyObjectInspector} to the given schema
+   * */
+  public void setReaderSchema(Schema readerSchema) {
+    this.readerSchema = readerSchema;
+  }
+
+  /**
+   * Set the {@link AvroSchemaRetriever} for the {@link AvroLazyObjectInspector} to the given class
+   *
+   * @param scheamRetrieverClass the schema retriever class to be set
+   * */
+  public void setSchemaRetriever(AvroSchemaRetriever schemaRetriever) {
+    this.schemaRetriever = schemaRetriever;
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public Object getStructFieldData(Object data, StructField fieldRef) {
+    if (data == null) {
+      return null;
+    }
+
+    if (!(fieldRef instanceof MyField)) {
+      throw new IllegalArgumentException("fieldRef has to be of MyField");
+    }
+
+    MyField f = (MyField) fieldRef;
+    int fieldID = f.getFieldID();
+
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Getting struct field data for field: [" + f.getFieldName() + "] on data ["
+          + data.getClass() + "]");
+    }
+
+    if (data instanceof LazyStruct) {
+      LazyStruct row = (LazyStruct) data;
+
+      // get the field out of struct
+      Object rowField = row.getField(fieldID);
+
+      if (rowField instanceof LazyStruct) {
+
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Deserializing struct [" + rowField.getClass() + "]");
+        }
+
+        return deserializeStruct(rowField, f.getFieldName());
+
+      } else if (rowField instanceof LazyMap) {
+        // We have found a map. Systematically deserialize the values of the map and return back the
+        // map
+        LazyMap lazyMap = (LazyMap) rowField;
+
+        for (Entry<Object, Object> entry : lazyMap.getMap().entrySet()) {
+          Object _key = entry.getKey();
+          Object _value = entry.getValue();
+
+          if (_value instanceof LazyStruct) {
+            lazyMap.getMap().put(_key, deserializeStruct(_value, f.getFieldName()));
+          }
+        }
+
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Returning a lazy map for field [" + f.getFieldName() + "]");
+        }
+
+        return lazyMap;
+
+      } else {
+        if (LOG.isDebugEnabled()) {
+          LOG.debug("Returning [" + rowField.toString() + "] for field [" + f.getFieldName() + "]");
+        }
+
+        // Just return the object. We need no further operation on it
+        return rowField;
+      }
+    } else {
+
+      // The Avro deserializer would deserialize our object and return back a list of object that
+      // hive can operate on. Here we should be getting the same object back.
+      if (!(data instanceof List)) {
+        throw new IllegalArgumentException("data should be an instance of list");
+      }
+
+      if (!(fieldID < ((List<Object>) data).size())) {
+        return null;
+      }
+
+      // lookup the field corresponding to the given field ID and return
+      Object field = ((List<Object>) data).get(fieldID);
+
+      if (field == null) {
+        return null;
+      }
+
+      // convert to a lazy object and return
+      return toLazyObject(field, fieldRef.getFieldObjectInspector());
+    }
+  }
+
+  @Override
+  public List<Object> getStructFieldsDataAsList(Object data) {
+    if (data == null) {
+      return null;
+    }
+
+    List<Object> result = new ArrayList<Object>(fields.size());
+
+    for (int i = 0; i < fields.size(); i++) {
+      result.add(getStructFieldData(data, fields.get(i)));
+    }
+
+    return result;
+  }
+
+  /**
+   * Deserialize the given struct object
+   *
+   * @param struct the object to deserialize
+   * @param fieldName name of the field on which we are currently operating on
+   * @return a deserialized object can hive can further operate on
+   * @throws AvroObjectInspectorException if something goes wrong during deserialization
+   * */
+  private Object deserializeStruct(Object struct, String fieldName) {
+    byte[] data = ((LazyStruct) struct).getBytes();
+    AvroDeserializer deserializer = new AvroDeserializer();
+
+    if (data == null) {
+      return null;
+    }
+
+    if (readerSchema == null && schemaRetriever == null) {
+      throw new IllegalArgumentException("reader schema or schemaRetriever must be set for field ["
+          + fieldName + "]");
+    }
+
+    Schema ws = null;
+    Schema rs = null;
+    int offset = 0;
+
+    AvroGenericRecordWritable avroWritable = new AvroGenericRecordWritable();
+
+    if (readerSchema == null) {
+
+      rs = schemaRetriever.retrieveReaderSchema(data);
+
+      if (rs == null) {
+        // still nothing, Raise exception
+        throw new IllegalStateException(
+            "A valid reader schema could not be retrieved either directly or from the schema retriever for field ["
+                + fieldName + "]");
+      }
+
+      ws = schemaRetriever.retrieveWriterSchema(data);
+
+      if (ws == null) {
+        throw new IllegalStateException(
+            "Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]");
+      }
+
+      // adjust the data bytes according to any possible offset that was provided
+      offset = schemaRetriever.getOffset();
+
+      if (data.length < offset) {
+        throw new IllegalArgumentException("Data size cannot be less than [" + offset
+            + "]. Found [" + data.length + "]");
+      }
+
+      if (LOG.isDebugEnabled()) {
+        LOG.debug("Retrieved writer Schema: " + ws.toString());
+        LOG.debug("Retrieved reader Schema: " + rs.toString());
+      }
+
+      try {
+        avroWritable.readFields(data, offset, data.length, ws, rs);
+      } catch (IOException ioe) {
+        throw new AvroObjectInspectorException("Error deserializing avro payload", ioe);
+      }
+    } else {
+      // a reader schema was provided
+      if (schemaRetriever != null) {
+        // a schema retriever has been provided as well. Attempt to read the write schema from the
+        // retriever
+        ws = schemaRetriever.retrieveWriterSchema(data);
+
+        if (ws == null) {
+          throw new IllegalStateException(
+              "Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]");
+        }
+      } else {
+        // attempt retrieving the schema from the data
+        ws = retrieveSchemaFromBytes(data);
+      }
+
+      rs = readerSchema;
+
+      try {
+        avroWritable.readFields(data, ws, rs);
+      } catch (IOException ioe) {
+        throw new AvroObjectInspectorException("Error deserializing avro payload", ioe);
+      }
+    }
+
+    AvroObjectInspectorGenerator oiGenerator = null;
+    Object deserializedObject = null;
+
+    try {
+      oiGenerator = new AvroObjectInspectorGenerator(rs);
+      deserializedObject =
+          deserializer.deserialize(oiGenerator.getColumnNames(), oiGenerator.getColumnTypes(),
+              avroWritable, rs);
+    } catch (SerDeException se) {
+      throw new AvroObjectInspectorException("Error deserializing avro payload", se);
+    }
+
+    return deserializedObject;
+  }
+
+  /**
+   * Retrieve schema from the given bytes
+   *
+   * @return the retrieved {@link Schema schema}
+   * */
+  private Schema retrieveSchemaFromBytes(byte[] data) {
+    ByteArrayInputStream bais = new ByteArrayInputStream(data);
+    DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
+
+    Schema schema = null;
+
+    try {
+      // dfs is AutoCloseable
+      @SuppressWarnings("resource")
+      DataFileStream<GenericRecord> dfs = new DataFileStream<GenericRecord>(bais, reader);
+      schema = dfs.getSchema();
+    } catch (IOException ioe) {
+      throw new AvroObjectInspectorException("An error occurred retrieving schema from bytes", ioe);
+    }
+
+    return schema;
+  }
+
+  /**
+   * Converts the given field to a lazy object
+   *
+   * @param field to be converted to a lazy object
+   * @param fieldOI {@link ObjectInspector} for the given field
+   * @return returns the converted lazy object
+   * */
+  private Object toLazyObject(Object field, ObjectInspector fieldOI) {
+    if (isPrimitive(field.getClass())) {
+      return toLazyPrimitiveObject(field, fieldOI);
+    } else if (fieldOI instanceof LazyListObjectInspector) {
+      return toLazyListObject(field, fieldOI);
+    } else if (field instanceof StandardUnion) {
+      return toLazyUnionObject(field, fieldOI);
+    } else if (fieldOI instanceof LazyMapObjectInspector) {
+      return toLazyMapObject(field, fieldOI);
+    } else {
+      return field;
+    }
+  }
+
+  /**
+   * Convert the given object to a lazy object using the given {@link ObjectInspector}
+   *
+   * @param obj Object to be converted to a {@link LazyObject}
+   * @param oi ObjectInspector used for the conversion
+   * @return the created {@link LazyObject lazy object}
+   * */
+  private LazyObject<? extends ObjectInspector> toLazyPrimitiveObject(Object obj, ObjectInspector oi) {
+    if (obj == null) {
+      return null;
+    }
+
+    LazyObject<? extends ObjectInspector> lazyObject = LazyFactory.createLazyObject(oi);
+    ByteArrayRef ref = new ByteArrayRef();
+
+    String objAsString = obj.toString().trim();
+
+    ref.setData(objAsString.getBytes());
+
+    // initialize the lazy object
+    lazyObject.init(ref, 0, ref.getData().length);
+
+    return lazyObject;
+  }
+
+  /**
+   * Convert the given object to a lazy object using the given {@link ObjectInspector}
+   *
+   * @param obj Object to be converted to a {@link LazyObject}
+   * @param oi ObjectInspector used for the conversion
+   * @return the created {@link LazyObject lazy object}
+   * */
+  private Object toLazyListObject(Object obj, ObjectInspector objectInspector) {
+    if (obj == null) {
+      return null;
+    }
+
+    List<?> listObj = (List<?>) obj;
+
+    LazyArray retList = (LazyArray) LazyFactory.createLazyObject(objectInspector);
+
+    List<Object> lazyList = retList.getList();
+
+    ObjectInspector listElementOI =
+        ((ListObjectInspector) objectInspector).getListElementObjectInspector();
+
+    for (int i = 0; i < listObj.size(); i++) {
+      lazyList.add(toLazyObject(listObj.get(i), listElementOI));
+    }
+
+    return retList;
+  }
+
+  /**
+   * Convert the given object to a lazy object using the given {@link ObjectInspector}
+   *
+   * @param obj Object to be converted to a {@link LazyObject}
+   * @param oi ObjectInspector used for the conversion
+   * @return the created {@link LazyObject lazy object}
+   * */
+  @SuppressWarnings({ "rawtypes", "unchecked" })
+  private Object toLazyMapObject(Object obj, ObjectInspector objectInspector) {
+    if (obj == null) {
+      return null;
+    }
+
+    // avro guarantees that the key will be of type string. So we just need to worry about
+    // deserializing the value here
+
+    LazyMap lazyMap = (LazyMap) LazyFactory.createLazyObject(objectInspector);
+
+    Map map = lazyMap.getMap();
+
+    Map<Object, Object> origMap = (Map) obj;
+
+    ObjectInspector keyObjectInspector =
+        ((MapObjectInspector) objectInspector).getMapKeyObjectInspector();
+    ObjectInspector valueObjectInspector =
+        ((MapObjectInspector) objectInspector).getMapValueObjectInspector();
+
+    for (Entry entry : origMap.entrySet()) {
+      Object value = entry.getValue();
+
+      map.put(toLazyPrimitiveObject(entry.getKey(), keyObjectInspector),
+          toLazyObject(value, valueObjectInspector));
+    }
+
+    return lazyMap;
+  }
+
+  /**
+   * Convert the given object to a lazy object using the given {@link ObjectInspector}
+   *
+   * @param obj Object to be converted to a {@link LazyObject}
+   * @param oi ObjectInspector used for the conversion
+   * @return the created {@link LazyObject lazy object}
+   * */
+  private Object toLazyUnionObject(Object obj, ObjectInspector objectInspector) {
+    if (obj == null) {
+      return null;
+    }
+
+    if (!(objectInspector instanceof LazyUnionObjectInspector)) {
+      throw new IllegalArgumentException(
+          "Invalid objectinspector found. Expected LazyUnionObjectInspector, Found "
+              + objectInspector.getClass());
+    }
+
+    StandardUnion standardUnion = (StandardUnion) obj;
+
+    // Grab the tag and the field
+    byte tag = standardUnion.getTag();
+    Object field = standardUnion.getObject();
+
+    ObjectInspector fieldOI =
+        ((LazyUnionObjectInspector) objectInspector).getObjectInspectors().get(tag);
+
+    // convert to lazy object
+    Object convertedObj = null;
+
+    if (field != null) {
+      convertedObj = toLazyObject(field, fieldOI);
+    }
+
+    if (convertedObj == null) {
+      return null;
+    }
+
+    LazyUnion lazyUnion = (LazyUnion) LazyFactory.createLazyObject(objectInspector);
+
+    lazyUnion.setField(convertedObj);
+    lazyUnion.setTag(tag);
+
+    return lazyUnion;
+  }
+
+  /**
+   * Determines if the given object is a primitive or a wrapper to a primitive. Note, even though a
+   * <code>String</code> may not be a primitive in the traditional sense, but it is considered one
+   * here as it is <i>not</i> a struct.
+   *
+   * @param clazz input class
+   * @return true, if the object is a primitive or a wrapper to a primitive, false otherwise.
+   * */
+  private boolean isPrimitive(Class<?> clazz) {
+    return clazz.isPrimitive() || ClassUtils.wrapperToPrimitive(clazz) != null
+        || clazz.getSimpleName().equals("String");
+  }
+}
\ No newline at end of file

Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java Tue Sep  9 15:18:29 2014
@@ -0,0 +1,25 @@
+package org.apache.hadoop.hive.serde2.avro;
+
+/**
+ * Exception for the {@link AvroLazyObjectInspector}
+ * */
+public class AvroObjectInspectorException extends RuntimeException {
+
+  private static final long serialVersionUID = 1L;
+
+  public AvroObjectInspectorException() {
+    super();
+  }
+
+  public AvroObjectInspectorException(String message) {
+    super(message);
+  }
+
+  public AvroObjectInspectorException(Throwable cause) {
+    super(cause);
+  }
+
+  public AvroObjectInspectorException(String message, Throwable cause) {
+    super(message, cause);
+  }
+}
\ No newline at end of file

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java Tue Sep  9 15:18:29 2014
@@ -40,7 +40,7 @@ import org.apache.hadoop.hive.serde2.typ
  *   * A list of those fields equivalent types in Hive
  *   * An ObjectInspector capable of working with an instance of that datum.
  */
-class AvroObjectInspectorGenerator {
+public class AvroObjectInspectorGenerator {
   final private List<String> columnNames;
   final private List<TypeInfo> columnTypes;
   final private ObjectInspector oi;

Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java Tue Sep  9 15:18:29 2014
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.avro;
+
+import org.apache.avro.Schema;
+
+/**
+ * Retrieves the avro schema from the given source. "Source" is a little loose term here in the
+ * sense it can range from being an HDFS url location pointing to the schema or it can be even as
+ * simple as a {@link Properties properties} file with a simple key-value mapping to the schema. For
+ * cases where the {@link Schema schema} is a part of the serialized data itself, "Source" would
+ * refer to the data bytes from which the {@link Schema schema} has to retrieved.
+ *
+ * */
+public abstract class AvroSchemaRetriever {
+
+  /**
+   * Retrieve the writer avro schema from the given source
+   *
+   * @param source source from which the schema has to retrieved
+   * @return the retrieved writer {@link Schema}
+   * */
+  public abstract Schema retrieveWriterSchema(Object source);
+
+  /**
+   * Retrieve the reader avro schema from the given source
+   *
+   * @param source source from which the schema has to retrieved
+   * @return the retrieved reader {@link Schema}
+   * */
+  public Schema retrieveReaderSchema(Object source) {
+    return null;
+  }
+
+  /**
+   * Possible offset associated with schema. This is useful when the schema is stored inline along
+   * with the data.
+   *
+   * <p>
+   * Defaulted to zero. Consumers can choose to override this value to provide a custom offset.
+   * </p>
+   * */
+  public int getOffset() {
+    return 0;
+  }
+}
\ No newline at end of file

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java Tue Sep  9 15:18:29 2014
@@ -57,6 +57,7 @@ public class AvroSerdeUtils {
   public static final String EXCEPTION_MESSAGE = "Neither " + SCHEMA_LITERAL + " nor "
           + SCHEMA_URL + " specified, can't determine table schema";
   public static final String AVRO_SERDE_SCHEMA = "avro.serde.schema";
+  public static final String SCHEMA_RETRIEVER = "avro.schema.retriever";
 
   /**
    * Determine the schema to that's been provided for Avro serde work.

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java Tue Sep  9 15:18:29 2014
@@ -53,6 +53,7 @@ import org.apache.hadoop.hive.serde2.laz
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
 import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
@@ -217,11 +218,11 @@ public final class LazyFactory {
    */
   public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
       byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
-      byte escapeChar) throws SerDeException {
+      byte escapeChar, ObjectInspectorOptions option) throws SerDeException {
     return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence,
-        escaped, escapeChar, false);
+        escaped, escapeChar, false, option);
   }
-
+  
   /**
    * Create a hierarchical ObjectInspector for LazyObject with the given
    * typeInfo.
@@ -236,13 +237,54 @@ public final class LazyFactory {
    *          delimiting entries, the second one for delimiting key and values.
    * @param nullSequence
    *          The sequence of bytes representing NULL.
+   * @return The ObjectInspector
+   * @throws SerDeException
+   */
+  public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
+      byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
+      byte escapeChar) throws SerDeException {
+    return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence,
+        escaped, escapeChar, false, ObjectInspectorOptions.JAVA);
+  }
+
+  /**
+   * Create a hierarchical ObjectInspector for LazyObject with the given typeInfo.
+   *
+   * @param typeInfo The type information for the LazyObject
+   * @param separator The array of separators for delimiting each level
+   * @param separatorIndex The current level (for separators). List(array), struct uses 1 level of
+   *          separator, and map uses 2 levels: the first one for delimiting entries, the second one
+   *          for delimiting key and values.
+   * @param nullSequence The sequence of bytes representing NULL.
    * @param extendedBooleanLiteral whether extended boolean literal set is legal
+   * @param option the {@link ObjectInspectorOption}
    * @return The ObjectInspector
    * @throws SerDeException
    */
   public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
       byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
       byte escapeChar, boolean extendedBooleanLiteral) throws SerDeException {
+    return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, escaped,
+        escapeChar, extendedBooleanLiteral, ObjectInspectorOptions.JAVA);
+  }
+  
+  /**
+   * Create a hierarchical ObjectInspector for LazyObject with the given typeInfo.
+   *
+   * @param typeInfo The type information for the LazyObject
+   * @param separator The array of separators for delimiting each level
+   * @param separatorIndex The current level (for separators). List(array), struct uses 1 level of
+   *          separator, and map uses 2 levels: the first one for delimiting entries, the second one
+   *          for delimiting key and values.
+   * @param nullSequence The sequence of bytes representing NULL.
+   * @param extendedBooleanLiteral whether extended boolean literal set is legal
+   * @param option the {@link ObjectInspectorOption}
+   * @return The ObjectInspector
+   * @throws SerDeException
+   */
+  public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
+      byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
+      byte escapeChar, boolean extendedBooleanLiteral, ObjectInspectorOptions option) throws SerDeException {
     ObjectInspector.Category c = typeInfo.getCategory();
     switch (c) {
     case PRIMITIVE:
@@ -252,9 +294,9 @@ public final class LazyFactory {
       return LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(
           createLazyObjectInspector(((MapTypeInfo) typeInfo)
           .getMapKeyTypeInfo(), separator, separatorIndex + 2,
-          nullSequence, escaped, escapeChar, extendedBooleanLiteral), createLazyObjectInspector(
+          nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), createLazyObjectInspector(
           ((MapTypeInfo) typeInfo).getMapValueTypeInfo(), separator,
-          separatorIndex + 2, nullSequence, escaped, escapeChar, extendedBooleanLiteral),
+          separatorIndex + 2, nullSequence, escaped, escapeChar, extendedBooleanLiteral, option),
           LazyUtils.getSeparator(separator, separatorIndex),
           LazyUtils.getSeparator(separator, separatorIndex+1),
           nullSequence, escaped, escapeChar);
@@ -262,7 +304,7 @@ public final class LazyFactory {
       return LazyObjectInspectorFactory.getLazySimpleListObjectInspector(
           createLazyObjectInspector(((ListTypeInfo) typeInfo)
           .getListElementTypeInfo(), separator, separatorIndex + 1,
-          nullSequence, escaped, escapeChar, extendedBooleanLiteral), LazyUtils.getSeparator(separator, separatorIndex),
+          nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), LazyUtils.getSeparator(separator, separatorIndex),
           nullSequence, escaped, escapeChar);
     case STRUCT:
       StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
@@ -274,19 +316,20 @@ public final class LazyFactory {
       for (int i = 0; i < fieldTypeInfos.size(); i++) {
         fieldObjectInspectors.add(createLazyObjectInspector(fieldTypeInfos
             .get(i), separator, separatorIndex + 1, nullSequence, escaped,
-            escapeChar, extendedBooleanLiteral));
+            escapeChar, extendedBooleanLiteral, option));
       }
       return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
           fieldNames, fieldObjectInspectors,
           LazyUtils.getSeparator(separator, separatorIndex),
-          nullSequence, false, escaped, escapeChar);
+ nullSequence,
+          false, escaped, escapeChar, option);
     case UNION:
       UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
       List<ObjectInspector> lazyOIs = new ArrayList<ObjectInspector>();
       for (TypeInfo uti : unionTypeInfo.getAllUnionObjectTypeInfos()) {
         lazyOIs.add(createLazyObjectInspector(uti, separator,
             separatorIndex + 1, nullSequence, escaped,
-            escapeChar, extendedBooleanLiteral));
+            escapeChar, extendedBooleanLiteral, option));
       }
       return LazyObjectInspectorFactory.getLazyUnionObjectInspector(lazyOIs,
           LazyUtils.getSeparator(separator, separatorIndex),

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java Tue Sep  9 15:18:29 2014
@@ -342,4 +342,13 @@ public class LazyStruct extends LazyNonP
     }
     return indexes;
   }
-}
+
+  /**
+   * Return the data in bytes corresponding to this given struct. This is useful specifically in
+   * cases where the data is stored in serialized formats like protobufs or thrift and would need
+   * custom deserializers to be deserialized.
+   * */
+  public byte[] getBytes() {
+    return bytes.getData();
+  }
+}
\ No newline at end of file

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java Tue Sep  9 15:18:29 2014
@@ -18,7 +18,6 @@
 package org.apache.hadoop.hive.serde2.lazy;
 
 import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.io.Text;
 
 /**
@@ -26,8 +25,7 @@ import org.apache.hadoop.io.Text;
  * non-primitive.
  *
  */
-public class LazyUnion extends
-    LazyNonPrimitive<LazyUnionObjectInspector> {
+public class LazyUnion extends LazyNonPrimitive<LazyUnionObjectInspector> {
   /**
    * Whether the data is already parsed or not.
    */
@@ -41,7 +39,7 @@ public class LazyUnion extends
   /**
    * The object of the union.
    */
-  private LazyObject<? extends ObjectInspector> field;
+  private Object field;
 
   /**
    * Tag of the Union
@@ -54,6 +52,16 @@ public class LazyUnion extends
   private boolean fieldInited = false;
 
   /**
+   * Whether the tag has been set or not
+   * */
+  private boolean tagSet = false;
+
+  /**
+   * Whether the field has been set or not
+   * */
+  private boolean fieldSet = false;
+
+  /**
    * Construct a LazyUnion object with the ObjectInspector.
    */
   public LazyUnion(LazyUnionObjectInspector oi) {
@@ -123,6 +131,7 @@ public class LazyUnion extends
    *
    * @return The value of the field
    */
+  @SuppressWarnings("rawtypes")
   private Object uncheckedGetField() {
     Text nullSequence = oi.getNullSequence();
     int fieldLength = start + length - startPosition;
@@ -134,9 +143,9 @@ public class LazyUnion extends
 
     if (!fieldInited) {
       fieldInited = true;
-      field.init(bytes, startPosition, fieldLength);
+      ((LazyObject) field).init(bytes, startPosition, fieldLength);
     }
-    return field.getObject();
+    return ((LazyObject) field).getObject();
   }
 
   /**
@@ -145,6 +154,10 @@ public class LazyUnion extends
    * @return The field as a LazyObject
    */
   public Object getField() {
+    if (fieldSet) {
+      return field;
+    }
+
     if (!parsed) {
       parse();
     }
@@ -157,9 +170,33 @@ public class LazyUnion extends
    * @return The tag byte
    */
   public byte getTag() {
+    if (tagSet) {
+      return tag;
+    }
+
     if (!parsed) {
       parse();
     }
     return tag;
   }
-}
+
+  /**
+   * Set the field of the union
+   *
+   * @param field the field to be set
+   * */
+  public void setField(Object field) {
+    this.field = field;
+    fieldSet = true;
+  }
+
+  /**
+   * Set the tag for the union
+   *
+   * @param tag the tag to be set
+   * */
+  public void setTag(byte tag) {
+    this.tag = tag;
+    tagSet = true;
+  }
+}
\ No newline at end of file

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java Tue Sep  9 15:18:29 2014
@@ -22,7 +22,9 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.ConcurrentHashMap;
 
+import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
 import org.apache.hadoop.io.Text;
 
 /**
@@ -48,14 +50,34 @@ public final class LazyObjectInspectorFa
       byte escapeChar) {
     return getLazySimpleStructObjectInspector(structFieldNames,
       structFieldObjectInspectors, null, separator, nullSequence,
-      lastColumnTakesRest, escaped, escapeChar);
+      lastColumnTakesRest, escaped, escapeChar, ObjectInspectorOptions.JAVA);
+  }
+  
+  public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(
+      List<String> structFieldNames,
+      List<ObjectInspector> structFieldObjectInspectors, byte separator,
+      Text nullSequence, boolean lastColumnTakesRest, boolean escaped,
+      byte escapeChar, ObjectInspectorOptions option) {
+    return getLazySimpleStructObjectInspector(structFieldNames,
+      structFieldObjectInspectors, null, separator, nullSequence,
+      lastColumnTakesRest, escaped, escapeChar, option);
   }
 
   public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(
       List<String> structFieldNames,
       List<ObjectInspector> structFieldObjectInspectors, List<String> structFieldComments,
       byte separator, Text nullSequence, boolean lastColumnTakesRest,
-      boolean escaped,byte escapeChar) {
+      boolean escaped, byte escapeChar) {
+    return getLazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors,
+      structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, escapeChar,
+      ObjectInspectorOptions.JAVA);
+  }
+  
+  public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(
+      List<String> structFieldNames,
+      List<ObjectInspector> structFieldObjectInspectors, List<String> structFieldComments,
+      byte separator, Text nullSequence, boolean lastColumnTakesRest,
+      boolean escaped,byte escapeChar, ObjectInspectorOptions option) {
     ArrayList<Object> signature = new ArrayList<Object>();
     signature.add(structFieldNames);
     signature.add(structFieldObjectInspectors);
@@ -64,15 +86,30 @@ public final class LazyObjectInspectorFa
     signature.add(Boolean.valueOf(lastColumnTakesRest));
     signature.add(Boolean.valueOf(escaped));
     signature.add(Byte.valueOf(escapeChar));
+    signature.add(option);
     if(structFieldComments != null) {
       signature.add(structFieldComments);
     }
     LazySimpleStructObjectInspector result = cachedLazySimpleStructObjectInspector
         .get(signature);
     if (result == null) {
-      result = new LazySimpleStructObjectInspector(structFieldNames,
-          structFieldObjectInspectors, structFieldComments, separator,
-          nullSequence, lastColumnTakesRest, escaped, escapeChar);
+      switch (option) {
+      case JAVA:
+        result =
+            new LazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors,
+                structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped,
+                escapeChar);
+        break;
+      case AVRO:
+        result =
+            new AvroLazyObjectInspector(structFieldNames, structFieldObjectInspectors,
+                structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped,
+                escapeChar);
+        break;
+      default:
+        throw new IllegalArgumentException("Illegal ObjectInspector type [" + option + "]");
+      }
+
       cachedLazySimpleStructObjectInspector.put(signature, result);
     }
     return result;

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java Tue Sep  9 15:18:29 2014
@@ -18,10 +18,13 @@
 
 package org.apache.hadoop.hive.serde2.lazy.objectinspector;
 
+import java.util.ArrayList;
 import java.util.List;
 
 import org.apache.hadoop.hive.serde2.BaseStructObjectInspector;
 import org.apache.hadoop.hive.serde2.StructObject;
+import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.io.Text;
@@ -100,6 +103,20 @@ public class LazySimpleStructObjectInspe
     int fieldID = f.getFieldID();
     assert (fieldID >= 0 && fieldID < fields.size());
 
+    ObjectInspector oi = f.getFieldObjectInspector();
+
+    if (oi instanceof AvroLazyObjectInspector) {
+      return ((AvroLazyObjectInspector) oi).getStructFieldData(data, fieldRef);
+    }
+
+    if (oi instanceof MapObjectInspector) {
+      ObjectInspector valueOI = ((MapObjectInspector) oi).getMapValueObjectInspector();
+
+      if (valueOI instanceof AvroLazyObjectInspector) {
+        return ((AvroLazyObjectInspector) valueOI).getStructFieldData(data, fieldRef);
+      }
+    }
+
     return struct.getField(fieldID);
   }
 
@@ -108,8 +125,15 @@ public class LazySimpleStructObjectInspe
     if (data == null) {
       return null;
     }
-    StructObject struct = (StructObject) data;
-    return struct.getFieldsAsList();
+
+    // Iterate over all the fields picking up the nested structs within them
+    List<Object> result = new ArrayList<Object>(fields.size());
+
+    for (MyField myField : fields) {
+      result.add(getStructFieldData(data, myField));
+    }
+
+    return result;
   }
 
   // For LazyStruct

Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java Tue Sep  9 15:18:29 2014
@@ -57,7 +57,7 @@ public final class ObjectInspectorFactor
    * for the same Java type.
    */
   public enum ObjectInspectorOptions {
-    JAVA, THRIFT, PROTOCOL_BUFFERS
+    JAVA, THRIFT, PROTOCOL_BUFFERS, AVRO
   };
 
   private static ConcurrentHashMap<Type, ObjectInspector> objectInspectorCache = new ConcurrentHashMap<Type, ObjectInspector>();