You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/09/09 17:18:30 UTC
svn commit: r1623845 [3/3] - in /hive/trunk: hbase-handler/
hbase-handler/if/ hbase-handler/if/test/
hbase-handler/src/java/org/apache/hadoop/hive/hbase/
hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/
hbase-handler/src/test/org/apache/hado...
Added: hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java (added)
+++ hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java Tue Sep 9 15:18:29 2014
@@ -0,0 +1,194 @@
+/**
+ * Autogenerated by Avro
+ *
+ * DO NOT EDIT DIRECTLY
+ */
+package org.apache.hadoop.hive.hbase.avro;
+@SuppressWarnings("all")
+@org.apache.avro.specific.AvroGenerated
+public class OfficePhone extends org.apache.avro.specific.SpecificRecordBase implements org.apache.avro.specific.SpecificRecord {
+ public static final org.apache.avro.Schema SCHEMA$ = new org.apache.avro.Schema.Parser().parse("{\"type\":\"record\",\"name\":\"OfficePhone\",\"namespace\":\"org.apache.hadoop.hive.hbase.avro\",\"fields\":[{\"name\":\"areaCode\",\"type\":\"long\"},{\"name\":\"number\",\"type\":\"long\"}]}");
+ public static org.apache.avro.Schema getClassSchema() { return SCHEMA$; }
+ @Deprecated public long areaCode;
+ @Deprecated public long number;
+
+ /**
+ * Default constructor. Note that this does not initialize fields
+ * to their default values from the schema. If that is desired then
+ * one should use <code>newBuilder()</code>.
+ */
+ public OfficePhone() {}
+
+ /**
+ * All-args constructor.
+ */
+ public OfficePhone(java.lang.Long areaCode, java.lang.Long number) {
+ this.areaCode = areaCode;
+ this.number = number;
+ }
+
+ public org.apache.avro.Schema getSchema() { return SCHEMA$; }
+ // Used by DatumWriter. Applications should not call.
+ public java.lang.Object get(int field$) {
+ switch (field$) {
+ case 0: return areaCode;
+ case 1: return number;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+ }
+ }
+ // Used by DatumReader. Applications should not call.
+ @SuppressWarnings(value="unchecked")
+ public void put(int field$, java.lang.Object value$) {
+ switch (field$) {
+ case 0: areaCode = (java.lang.Long)value$; break;
+ case 1: number = (java.lang.Long)value$; break;
+ default: throw new org.apache.avro.AvroRuntimeException("Bad index");
+ }
+ }
+
+ /**
+ * Gets the value of the 'areaCode' field.
+ */
+ public java.lang.Long getAreaCode() {
+ return areaCode;
+ }
+
+ /**
+ * Sets the value of the 'areaCode' field.
+ * @param value the value to set.
+ */
+ public void setAreaCode(java.lang.Long value) {
+ this.areaCode = value;
+ }
+
+ /**
+ * Gets the value of the 'number' field.
+ */
+ public java.lang.Long getNumber() {
+ return number;
+ }
+
+ /**
+ * Sets the value of the 'number' field.
+ * @param value the value to set.
+ */
+ public void setNumber(java.lang.Long value) {
+ this.number = value;
+ }
+
+ /** Creates a new OfficePhone RecordBuilder */
+ public static org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder newBuilder() {
+ return new org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder();
+ }
+
+ /** Creates a new OfficePhone RecordBuilder by copying an existing Builder */
+ public static org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder other) {
+ return new org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder(other);
+ }
+
+ /** Creates a new OfficePhone RecordBuilder by copying an existing OfficePhone instance */
+ public static org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder newBuilder(org.apache.hadoop.hive.hbase.avro.OfficePhone other) {
+ return new org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder(other);
+ }
+
+ /**
+ * RecordBuilder for OfficePhone instances.
+ */
+ public static class Builder extends org.apache.avro.specific.SpecificRecordBuilderBase<OfficePhone>
+ implements org.apache.avro.data.RecordBuilder<OfficePhone> {
+
+ private long areaCode;
+ private long number;
+
+ /** Creates a new Builder */
+ private Builder() {
+ super(org.apache.hadoop.hive.hbase.avro.OfficePhone.SCHEMA$);
+ }
+
+ /** Creates a Builder by copying an existing Builder */
+ private Builder(org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder other) {
+ super(other);
+ if (isValidValue(fields()[0], other.areaCode)) {
+ this.areaCode = data().deepCopy(fields()[0].schema(), other.areaCode);
+ fieldSetFlags()[0] = true;
+ }
+ if (isValidValue(fields()[1], other.number)) {
+ this.number = data().deepCopy(fields()[1].schema(), other.number);
+ fieldSetFlags()[1] = true;
+ }
+ }
+
+ /** Creates a Builder by copying an existing OfficePhone instance */
+ private Builder(org.apache.hadoop.hive.hbase.avro.OfficePhone other) {
+ super(org.apache.hadoop.hive.hbase.avro.OfficePhone.SCHEMA$);
+ if (isValidValue(fields()[0], other.areaCode)) {
+ this.areaCode = data().deepCopy(fields()[0].schema(), other.areaCode);
+ fieldSetFlags()[0] = true;
+ }
+ if (isValidValue(fields()[1], other.number)) {
+ this.number = data().deepCopy(fields()[1].schema(), other.number);
+ fieldSetFlags()[1] = true;
+ }
+ }
+
+ /** Gets the value of the 'areaCode' field */
+ public java.lang.Long getAreaCode() {
+ return areaCode;
+ }
+
+ /** Sets the value of the 'areaCode' field */
+ public org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder setAreaCode(long value) {
+ validate(fields()[0], value);
+ this.areaCode = value;
+ fieldSetFlags()[0] = true;
+ return this;
+ }
+
+ /** Checks whether the 'areaCode' field has been set */
+ public boolean hasAreaCode() {
+ return fieldSetFlags()[0];
+ }
+
+ /** Clears the value of the 'areaCode' field */
+ public org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder clearAreaCode() {
+ fieldSetFlags()[0] = false;
+ return this;
+ }
+
+ /** Gets the value of the 'number' field */
+ public java.lang.Long getNumber() {
+ return number;
+ }
+
+ /** Sets the value of the 'number' field */
+ public org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder setNumber(long value) {
+ validate(fields()[1], value);
+ this.number = value;
+ fieldSetFlags()[1] = true;
+ return this;
+ }
+
+ /** Checks whether the 'number' field has been set */
+ public boolean hasNumber() {
+ return fieldSetFlags()[1];
+ }
+
+ /** Clears the value of the 'number' field */
+ public org.apache.hadoop.hive.hbase.avro.OfficePhone.Builder clearNumber() {
+ fieldSetFlags()[1] = false;
+ return this;
+ }
+
+ @Override
+ public OfficePhone build() {
+ try {
+ OfficePhone record = new OfficePhone();
+ record.areaCode = fieldSetFlags()[0] ? this.areaCode : (java.lang.Long) defaultValue(fields()[0]);
+ record.number = fieldSetFlags()[1] ? this.number : (java.lang.Long) defaultValue(fields()[1]);
+ return record;
+ } catch (Exception e) {
+ throw new org.apache.avro.AvroRuntimeException(e);
+ }
+ }
+ }
+}
Modified: hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java (original)
+++ hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java Tue Sep 9 15:18:29 2014
@@ -37,6 +37,8 @@ public class serdeConstants {
public static final String SERIALIZATION_CLASS = "serialization.class";
+ public static final String SERIALIZATION_TYPE = "serialization.type";
+
public static final String SERIALIZATION_FORMAT = "serialization.format";
public static final String SERIALIZATION_DDL = "serialization.ddl";
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java Tue Sep 9 15:18:29 2014
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.hive.serde2.avro;
+import java.io.ByteArrayInputStream;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.DataOutputStream;
@@ -25,6 +26,7 @@ import java.io.InputStream;
import java.rmi.server.UID;
import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileStream;
import org.apache.avro.generic.GenericData;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
@@ -101,6 +103,28 @@ public class AvroGenericRecordWritable i
GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>(schema);
record = gdr.read(record, binaryDecoder);
}
+
+ public void readFields(byte[] bytes, int offset, int length, Schema writerSchema, Schema readerSchema) throws IOException {
+ fileSchema = writerSchema;
+ record = new GenericData.Record(writerSchema);
+ binaryDecoder =
+ DecoderFactory.get().binaryDecoder(bytes, offset, length - offset,
+ binaryDecoder);
+ GenericDatumReader<GenericRecord> gdr =
+ new GenericDatumReader<GenericRecord>(writerSchema, readerSchema);
+ record = gdr.read(null, binaryDecoder);
+ }
+
+ public void readFields(byte[] bytes, Schema writerSchema, Schema readerSchema) throws IOException {
+ fileSchema = writerSchema;
+ record = new GenericData.Record(writerSchema);
+ GenericDatumReader<GenericRecord> gdr = new GenericDatumReader<GenericRecord>();
+ gdr.setExpected(readerSchema);
+ ByteArrayInputStream is = new ByteArrayInputStream(bytes);
+ DataFileStream<GenericRecord> dfr = new DataFileStream<GenericRecord>(is, gdr);
+ record = dfr.next(record);
+ dfr.close();
+ }
public UID getRecordReaderID() {
return recordReaderID;
@@ -117,5 +141,4 @@ public class AvroGenericRecordWritable i
public void setFileSchema(Schema originalSchema) {
this.fileSchema = originalSchema;
}
-
}
Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java Tue Sep 9 15:18:29 2014
@@ -0,0 +1,506 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.avro;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileStream;
+import org.apache.avro.generic.GenericDatumReader;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumReader;
+import org.apache.commons.lang.ClassUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
+import org.apache.hadoop.hive.serde2.lazy.LazyArray;
+import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
+import org.apache.hadoop.hive.serde2.lazy.LazyMap;
+import org.apache.hadoop.hive.serde2.lazy.LazyObject;
+import org.apache.hadoop.hive.serde2.lazy.LazyStruct;
+import org.apache.hadoop.hive.serde2.lazy.LazyUnion;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyListObjectInspector;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyMapObjectInspector;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazySimpleStructObjectInspector;
+import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.io.Text;
+
+/**
+ * Lazy objectinspector for avro serialization
+ * */
+public class AvroLazyObjectInspector extends LazySimpleStructObjectInspector {
+
+ /**
+ * Reader {@link Schema} for the avro data
+ * */
+ private Schema readerSchema;
+
+ /**
+ * {@link AvroSchemaRetriever} to retrieve avro schema
+ * */
+ private AvroSchemaRetriever schemaRetriever;
+
+ /**
+ * LOGGER
+ * */
+ public static final Log LOG = LogFactory.getLog(AvroLazyObjectInspector.class);
+
+ /**
+ * Constructor
+ *
+ * @param structFieldNames fields within the given protobuf object
+ * @param structFieldObjectInspectors object inspectors for the fields
+ * @param structFieldComments comments for the given fields
+ * @param separator separator between different fields
+ * @param nullSequence sequence to represent null value
+ * @param lastColumnTakesRest whether the last column of the struct should take the rest of the
+ * row if there are extra fields.
+ * @param escaped whether the data is escaped or not
+ * @param escapeChar if escaped is true, the escape character
+ * */
+ public AvroLazyObjectInspector(List<String> structFieldNames,
+ List<ObjectInspector> structFieldObjectInspectors, List<String> structFieldComments,
+ byte separator, Text nullSequence, boolean lastColumnTakesRest, boolean escaped,
+ byte escapeChar) {
+ super(structFieldNames, structFieldObjectInspectors, structFieldComments, separator,
+ nullSequence, lastColumnTakesRest, escaped, escapeChar);
+ }
+
+ /**
+ * Set the reader schema for the {@link AvroLazyObjectInspector} to the given schema
+ * */
+ public void setReaderSchema(Schema readerSchema) {
+ this.readerSchema = readerSchema;
+ }
+
+ /**
+ * Set the {@link AvroSchemaRetriever} for the {@link AvroLazyObjectInspector} to the given class
+ *
+ * @param scheamRetrieverClass the schema retriever class to be set
+ * */
+ public void setSchemaRetriever(AvroSchemaRetriever schemaRetriever) {
+ this.schemaRetriever = schemaRetriever;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public Object getStructFieldData(Object data, StructField fieldRef) {
+ if (data == null) {
+ return null;
+ }
+
+ if (!(fieldRef instanceof MyField)) {
+ throw new IllegalArgumentException("fieldRef has to be of MyField");
+ }
+
+ MyField f = (MyField) fieldRef;
+ int fieldID = f.getFieldID();
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Getting struct field data for field: [" + f.getFieldName() + "] on data ["
+ + data.getClass() + "]");
+ }
+
+ if (data instanceof LazyStruct) {
+ LazyStruct row = (LazyStruct) data;
+
+ // get the field out of struct
+ Object rowField = row.getField(fieldID);
+
+ if (rowField instanceof LazyStruct) {
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Deserializing struct [" + rowField.getClass() + "]");
+ }
+
+ return deserializeStruct(rowField, f.getFieldName());
+
+ } else if (rowField instanceof LazyMap) {
+ // We have found a map. Systematically deserialize the values of the map and return back the
+ // map
+ LazyMap lazyMap = (LazyMap) rowField;
+
+ for (Entry<Object, Object> entry : lazyMap.getMap().entrySet()) {
+ Object _key = entry.getKey();
+ Object _value = entry.getValue();
+
+ if (_value instanceof LazyStruct) {
+ lazyMap.getMap().put(_key, deserializeStruct(_value, f.getFieldName()));
+ }
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Returning a lazy map for field [" + f.getFieldName() + "]");
+ }
+
+ return lazyMap;
+
+ } else {
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Returning [" + rowField.toString() + "] for field [" + f.getFieldName() + "]");
+ }
+
+ // Just return the object. We need no further operation on it
+ return rowField;
+ }
+ } else {
+
+ // The Avro deserializer would deserialize our object and return back a list of object that
+ // hive can operate on. Here we should be getting the same object back.
+ if (!(data instanceof List)) {
+ throw new IllegalArgumentException("data should be an instance of list");
+ }
+
+ if (!(fieldID < ((List<Object>) data).size())) {
+ return null;
+ }
+
+ // lookup the field corresponding to the given field ID and return
+ Object field = ((List<Object>) data).get(fieldID);
+
+ if (field == null) {
+ return null;
+ }
+
+ // convert to a lazy object and return
+ return toLazyObject(field, fieldRef.getFieldObjectInspector());
+ }
+ }
+
+ @Override
+ public List<Object> getStructFieldsDataAsList(Object data) {
+ if (data == null) {
+ return null;
+ }
+
+ List<Object> result = new ArrayList<Object>(fields.size());
+
+ for (int i = 0; i < fields.size(); i++) {
+ result.add(getStructFieldData(data, fields.get(i)));
+ }
+
+ return result;
+ }
+
+ /**
+ * Deserialize the given struct object
+ *
+ * @param struct the object to deserialize
+ * @param fieldName name of the field on which we are currently operating on
+ * @return a deserialized object can hive can further operate on
+ * @throws AvroObjectInspectorException if something goes wrong during deserialization
+ * */
+ private Object deserializeStruct(Object struct, String fieldName) {
+ byte[] data = ((LazyStruct) struct).getBytes();
+ AvroDeserializer deserializer = new AvroDeserializer();
+
+ if (data == null) {
+ return null;
+ }
+
+ if (readerSchema == null && schemaRetriever == null) {
+ throw new IllegalArgumentException("reader schema or schemaRetriever must be set for field ["
+ + fieldName + "]");
+ }
+
+ Schema ws = null;
+ Schema rs = null;
+ int offset = 0;
+
+ AvroGenericRecordWritable avroWritable = new AvroGenericRecordWritable();
+
+ if (readerSchema == null) {
+
+ rs = schemaRetriever.retrieveReaderSchema(data);
+
+ if (rs == null) {
+ // still nothing, Raise exception
+ throw new IllegalStateException(
+ "A valid reader schema could not be retrieved either directly or from the schema retriever for field ["
+ + fieldName + "]");
+ }
+
+ ws = schemaRetriever.retrieveWriterSchema(data);
+
+ if (ws == null) {
+ throw new IllegalStateException(
+ "Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]");
+ }
+
+ // adjust the data bytes according to any possible offset that was provided
+ offset = schemaRetriever.getOffset();
+
+ if (data.length < offset) {
+ throw new IllegalArgumentException("Data size cannot be less than [" + offset
+ + "]. Found [" + data.length + "]");
+ }
+
+ if (LOG.isDebugEnabled()) {
+ LOG.debug("Retrieved writer Schema: " + ws.toString());
+ LOG.debug("Retrieved reader Schema: " + rs.toString());
+ }
+
+ try {
+ avroWritable.readFields(data, offset, data.length, ws, rs);
+ } catch (IOException ioe) {
+ throw new AvroObjectInspectorException("Error deserializing avro payload", ioe);
+ }
+ } else {
+ // a reader schema was provided
+ if (schemaRetriever != null) {
+ // a schema retriever has been provided as well. Attempt to read the write schema from the
+ // retriever
+ ws = schemaRetriever.retrieveWriterSchema(data);
+
+ if (ws == null) {
+ throw new IllegalStateException(
+ "Null writer schema retrieved from schemaRetriever for field [" + fieldName + "]");
+ }
+ } else {
+ // attempt retrieving the schema from the data
+ ws = retrieveSchemaFromBytes(data);
+ }
+
+ rs = readerSchema;
+
+ try {
+ avroWritable.readFields(data, ws, rs);
+ } catch (IOException ioe) {
+ throw new AvroObjectInspectorException("Error deserializing avro payload", ioe);
+ }
+ }
+
+ AvroObjectInspectorGenerator oiGenerator = null;
+ Object deserializedObject = null;
+
+ try {
+ oiGenerator = new AvroObjectInspectorGenerator(rs);
+ deserializedObject =
+ deserializer.deserialize(oiGenerator.getColumnNames(), oiGenerator.getColumnTypes(),
+ avroWritable, rs);
+ } catch (SerDeException se) {
+ throw new AvroObjectInspectorException("Error deserializing avro payload", se);
+ }
+
+ return deserializedObject;
+ }
+
+ /**
+ * Retrieve schema from the given bytes
+ *
+ * @return the retrieved {@link Schema schema}
+ * */
+ private Schema retrieveSchemaFromBytes(byte[] data) {
+ ByteArrayInputStream bais = new ByteArrayInputStream(data);
+ DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>();
+
+ Schema schema = null;
+
+ try {
+ // dfs is AutoCloseable
+ @SuppressWarnings("resource")
+ DataFileStream<GenericRecord> dfs = new DataFileStream<GenericRecord>(bais, reader);
+ schema = dfs.getSchema();
+ } catch (IOException ioe) {
+ throw new AvroObjectInspectorException("An error occurred retrieving schema from bytes", ioe);
+ }
+
+ return schema;
+ }
+
+ /**
+ * Converts the given field to a lazy object
+ *
+ * @param field to be converted to a lazy object
+ * @param fieldOI {@link ObjectInspector} for the given field
+ * @return returns the converted lazy object
+ * */
+ private Object toLazyObject(Object field, ObjectInspector fieldOI) {
+ if (isPrimitive(field.getClass())) {
+ return toLazyPrimitiveObject(field, fieldOI);
+ } else if (fieldOI instanceof LazyListObjectInspector) {
+ return toLazyListObject(field, fieldOI);
+ } else if (field instanceof StandardUnion) {
+ return toLazyUnionObject(field, fieldOI);
+ } else if (fieldOI instanceof LazyMapObjectInspector) {
+ return toLazyMapObject(field, fieldOI);
+ } else {
+ return field;
+ }
+ }
+
+ /**
+ * Convert the given object to a lazy object using the given {@link ObjectInspector}
+ *
+ * @param obj Object to be converted to a {@link LazyObject}
+ * @param oi ObjectInspector used for the conversion
+ * @return the created {@link LazyObject lazy object}
+ * */
+ private LazyObject<? extends ObjectInspector> toLazyPrimitiveObject(Object obj, ObjectInspector oi) {
+ if (obj == null) {
+ return null;
+ }
+
+ LazyObject<? extends ObjectInspector> lazyObject = LazyFactory.createLazyObject(oi);
+ ByteArrayRef ref = new ByteArrayRef();
+
+ String objAsString = obj.toString().trim();
+
+ ref.setData(objAsString.getBytes());
+
+ // initialize the lazy object
+ lazyObject.init(ref, 0, ref.getData().length);
+
+ return lazyObject;
+ }
+
+ /**
+ * Convert the given object to a lazy object using the given {@link ObjectInspector}
+ *
+ * @param obj Object to be converted to a {@link LazyObject}
+ * @param oi ObjectInspector used for the conversion
+ * @return the created {@link LazyObject lazy object}
+ * */
+ private Object toLazyListObject(Object obj, ObjectInspector objectInspector) {
+ if (obj == null) {
+ return null;
+ }
+
+ List<?> listObj = (List<?>) obj;
+
+ LazyArray retList = (LazyArray) LazyFactory.createLazyObject(objectInspector);
+
+ List<Object> lazyList = retList.getList();
+
+ ObjectInspector listElementOI =
+ ((ListObjectInspector) objectInspector).getListElementObjectInspector();
+
+ for (int i = 0; i < listObj.size(); i++) {
+ lazyList.add(toLazyObject(listObj.get(i), listElementOI));
+ }
+
+ return retList;
+ }
+
+ /**
+ * Convert the given object to a lazy object using the given {@link ObjectInspector}
+ *
+ * @param obj Object to be converted to a {@link LazyObject}
+ * @param oi ObjectInspector used for the conversion
+ * @return the created {@link LazyObject lazy object}
+ * */
+ @SuppressWarnings({ "rawtypes", "unchecked" })
+ private Object toLazyMapObject(Object obj, ObjectInspector objectInspector) {
+ if (obj == null) {
+ return null;
+ }
+
+ // avro guarantees that the key will be of type string. So we just need to worry about
+ // deserializing the value here
+
+ LazyMap lazyMap = (LazyMap) LazyFactory.createLazyObject(objectInspector);
+
+ Map map = lazyMap.getMap();
+
+ Map<Object, Object> origMap = (Map) obj;
+
+ ObjectInspector keyObjectInspector =
+ ((MapObjectInspector) objectInspector).getMapKeyObjectInspector();
+ ObjectInspector valueObjectInspector =
+ ((MapObjectInspector) objectInspector).getMapValueObjectInspector();
+
+ for (Entry entry : origMap.entrySet()) {
+ Object value = entry.getValue();
+
+ map.put(toLazyPrimitiveObject(entry.getKey(), keyObjectInspector),
+ toLazyObject(value, valueObjectInspector));
+ }
+
+ return lazyMap;
+ }
+
+ /**
+ * Convert the given object to a lazy object using the given {@link ObjectInspector}
+ *
+ * @param obj Object to be converted to a {@link LazyObject}
+ * @param oi ObjectInspector used for the conversion
+ * @return the created {@link LazyObject lazy object}
+ * */
+ private Object toLazyUnionObject(Object obj, ObjectInspector objectInspector) {
+ if (obj == null) {
+ return null;
+ }
+
+ if (!(objectInspector instanceof LazyUnionObjectInspector)) {
+ throw new IllegalArgumentException(
+ "Invalid objectinspector found. Expected LazyUnionObjectInspector, Found "
+ + objectInspector.getClass());
+ }
+
+ StandardUnion standardUnion = (StandardUnion) obj;
+
+ // Grab the tag and the field
+ byte tag = standardUnion.getTag();
+ Object field = standardUnion.getObject();
+
+ ObjectInspector fieldOI =
+ ((LazyUnionObjectInspector) objectInspector).getObjectInspectors().get(tag);
+
+ // convert to lazy object
+ Object convertedObj = null;
+
+ if (field != null) {
+ convertedObj = toLazyObject(field, fieldOI);
+ }
+
+ if (convertedObj == null) {
+ return null;
+ }
+
+ LazyUnion lazyUnion = (LazyUnion) LazyFactory.createLazyObject(objectInspector);
+
+ lazyUnion.setField(convertedObj);
+ lazyUnion.setTag(tag);
+
+ return lazyUnion;
+ }
+
+ /**
+ * Determines if the given object is a primitive or a wrapper to a primitive. Note, even though a
+ * <code>String</code> may not be a primitive in the traditional sense, but it is considered one
+ * here as it is <i>not</i> a struct.
+ *
+ * @param clazz input class
+ * @return true, if the object is a primitive or a wrapper to a primitive, false otherwise.
+ * */
+ private boolean isPrimitive(Class<?> clazz) {
+ return clazz.isPrimitive() || ClassUtils.wrapperToPrimitive(clazz) != null
+ || clazz.getSimpleName().equals("String");
+ }
+}
\ No newline at end of file
Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java Tue Sep 9 15:18:29 2014
@@ -0,0 +1,25 @@
+package org.apache.hadoop.hive.serde2.avro;
+
+/**
+ * Exception for the {@link AvroLazyObjectInspector}
+ * */
+public class AvroObjectInspectorException extends RuntimeException {
+
+ private static final long serialVersionUID = 1L;
+
+ public AvroObjectInspectorException() {
+ super();
+ }
+
+ public AvroObjectInspectorException(String message) {
+ super(message);
+ }
+
+ public AvroObjectInspectorException(Throwable cause) {
+ super(cause);
+ }
+
+ public AvroObjectInspectorException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java Tue Sep 9 15:18:29 2014
@@ -40,7 +40,7 @@ import org.apache.hadoop.hive.serde2.typ
* * A list of those fields equivalent types in Hive
* * An ObjectInspector capable of working with an instance of that datum.
*/
-class AvroObjectInspectorGenerator {
+public class AvroObjectInspectorGenerator {
final private List<String> columnNames;
final private List<TypeInfo> columnTypes;
final private ObjectInspector oi;
Added: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java (added)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java Tue Sep 9 15:18:29 2014
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.serde2.avro;
+
+import org.apache.avro.Schema;
+
+/**
+ * Retrieves the avro schema from the given source. "Source" is a little loose term here in the
+ * sense it can range from being an HDFS url location pointing to the schema or it can be even as
+ * simple as a {@link Properties properties} file with a simple key-value mapping to the schema. For
+ * cases where the {@link Schema schema} is a part of the serialized data itself, "Source" would
+ * refer to the data bytes from which the {@link Schema schema} has to retrieved.
+ *
+ * */
+public abstract class AvroSchemaRetriever {
+
+ /**
+ * Retrieve the writer avro schema from the given source
+ *
+ * @param source source from which the schema has to retrieved
+ * @return the retrieved writer {@link Schema}
+ * */
+ public abstract Schema retrieveWriterSchema(Object source);
+
+ /**
+ * Retrieve the reader avro schema from the given source
+ *
+ * @param source source from which the schema has to retrieved
+ * @return the retrieved reader {@link Schema}
+ * */
+ public Schema retrieveReaderSchema(Object source) {
+ return null;
+ }
+
+ /**
+ * Possible offset associated with schema. This is useful when the schema is stored inline along
+ * with the data.
+ *
+ * <p>
+ * Defaulted to zero. Consumers can choose to override this value to provide a custom offset.
+ * </p>
+ * */
+ public int getOffset() {
+ return 0;
+ }
+}
\ No newline at end of file
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java Tue Sep 9 15:18:29 2014
@@ -57,6 +57,7 @@ public class AvroSerdeUtils {
public static final String EXCEPTION_MESSAGE = "Neither " + SCHEMA_LITERAL + " nor "
+ SCHEMA_URL + " specified, can't determine table schema";
public static final String AVRO_SERDE_SCHEMA = "avro.serde.schema";
+ public static final String SCHEMA_RETRIEVER = "avro.schema.retriever";
/**
* Determine the schema to that's been provided for Avro serde work.
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java Tue Sep 9 15:18:29 2014
@@ -53,6 +53,7 @@ import org.apache.hadoop.hive.serde2.laz
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
@@ -217,11 +218,11 @@ public final class LazyFactory {
*/
public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
- byte escapeChar) throws SerDeException {
+ byte escapeChar, ObjectInspectorOptions option) throws SerDeException {
return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence,
- escaped, escapeChar, false);
+ escaped, escapeChar, false, option);
}
-
+
/**
* Create a hierarchical ObjectInspector for LazyObject with the given
* typeInfo.
@@ -236,13 +237,54 @@ public final class LazyFactory {
* delimiting entries, the second one for delimiting key and values.
* @param nullSequence
* The sequence of bytes representing NULL.
+ * @return The ObjectInspector
+ * @throws SerDeException
+ */
+ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
+ byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
+ byte escapeChar) throws SerDeException {
+ return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence,
+ escaped, escapeChar, false, ObjectInspectorOptions.JAVA);
+ }
+
+ /**
+ * Create a hierarchical ObjectInspector for LazyObject with the given typeInfo.
+ *
+ * @param typeInfo The type information for the LazyObject
+ * @param separator The array of separators for delimiting each level
+ * @param separatorIndex The current level (for separators). List(array), struct uses 1 level of
+ * separator, and map uses 2 levels: the first one for delimiting entries, the second one
+ * for delimiting key and values.
+ * @param nullSequence The sequence of bytes representing NULL.
* @param extendedBooleanLiteral whether extended boolean literal set is legal
+ * @param option the {@link ObjectInspectorOption}
* @return The ObjectInspector
* @throws SerDeException
*/
public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
byte escapeChar, boolean extendedBooleanLiteral) throws SerDeException {
+ return createLazyObjectInspector(typeInfo, separator, separatorIndex, nullSequence, escaped,
+ escapeChar, extendedBooleanLiteral, ObjectInspectorOptions.JAVA);
+ }
+
+ /**
+ * Create a hierarchical ObjectInspector for LazyObject with the given typeInfo.
+ *
+ * @param typeInfo The type information for the LazyObject
+ * @param separator The array of separators for delimiting each level
+ * @param separatorIndex The current level (for separators). List(array), struct uses 1 level of
+ * separator, and map uses 2 levels: the first one for delimiting entries, the second one
+ * for delimiting key and values.
+ * @param nullSequence The sequence of bytes representing NULL.
+ * @param extendedBooleanLiteral whether extended boolean literal set is legal
+ * @param option the {@link ObjectInspectorOption}
+ * @return The ObjectInspector
+ * @throws SerDeException
+ */
+ public static ObjectInspector createLazyObjectInspector(TypeInfo typeInfo,
+ byte[] separator, int separatorIndex, Text nullSequence, boolean escaped,
+ byte escapeChar, boolean extendedBooleanLiteral, ObjectInspectorOptions option) throws SerDeException {
ObjectInspector.Category c = typeInfo.getCategory();
switch (c) {
case PRIMITIVE:
@@ -252,9 +294,9 @@ public final class LazyFactory {
return LazyObjectInspectorFactory.getLazySimpleMapObjectInspector(
createLazyObjectInspector(((MapTypeInfo) typeInfo)
.getMapKeyTypeInfo(), separator, separatorIndex + 2,
- nullSequence, escaped, escapeChar, extendedBooleanLiteral), createLazyObjectInspector(
+ nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), createLazyObjectInspector(
((MapTypeInfo) typeInfo).getMapValueTypeInfo(), separator,
- separatorIndex + 2, nullSequence, escaped, escapeChar, extendedBooleanLiteral),
+ separatorIndex + 2, nullSequence, escaped, escapeChar, extendedBooleanLiteral, option),
LazyUtils.getSeparator(separator, separatorIndex),
LazyUtils.getSeparator(separator, separatorIndex+1),
nullSequence, escaped, escapeChar);
@@ -262,7 +304,7 @@ public final class LazyFactory {
return LazyObjectInspectorFactory.getLazySimpleListObjectInspector(
createLazyObjectInspector(((ListTypeInfo) typeInfo)
.getListElementTypeInfo(), separator, separatorIndex + 1,
- nullSequence, escaped, escapeChar, extendedBooleanLiteral), LazyUtils.getSeparator(separator, separatorIndex),
+ nullSequence, escaped, escapeChar, extendedBooleanLiteral, option), LazyUtils.getSeparator(separator, separatorIndex),
nullSequence, escaped, escapeChar);
case STRUCT:
StructTypeInfo structTypeInfo = (StructTypeInfo) typeInfo;
@@ -274,19 +316,20 @@ public final class LazyFactory {
for (int i = 0; i < fieldTypeInfos.size(); i++) {
fieldObjectInspectors.add(createLazyObjectInspector(fieldTypeInfos
.get(i), separator, separatorIndex + 1, nullSequence, escaped,
- escapeChar, extendedBooleanLiteral));
+ escapeChar, extendedBooleanLiteral, option));
}
return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
fieldNames, fieldObjectInspectors,
LazyUtils.getSeparator(separator, separatorIndex),
- nullSequence, false, escaped, escapeChar);
+ nullSequence,
+ false, escaped, escapeChar, option);
case UNION:
UnionTypeInfo unionTypeInfo = (UnionTypeInfo) typeInfo;
List<ObjectInspector> lazyOIs = new ArrayList<ObjectInspector>();
for (TypeInfo uti : unionTypeInfo.getAllUnionObjectTypeInfos()) {
lazyOIs.add(createLazyObjectInspector(uti, separator,
separatorIndex + 1, nullSequence, escaped,
- escapeChar, extendedBooleanLiteral));
+ escapeChar, extendedBooleanLiteral, option));
}
return LazyObjectInspectorFactory.getLazyUnionObjectInspector(lazyOIs,
LazyUtils.getSeparator(separator, separatorIndex),
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java Tue Sep 9 15:18:29 2014
@@ -342,4 +342,13 @@ public class LazyStruct extends LazyNonP
}
return indexes;
}
-}
+
+ /**
+ * Return the data in bytes corresponding to this given struct. This is useful specifically in
+ * cases where the data is stored in serialized formats like protobufs or thrift and would need
+ * custom deserializers to be deserialized.
+ * */
+ public byte[] getBytes() {
+ return bytes.getData();
+ }
+}
\ No newline at end of file
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java Tue Sep 9 15:18:29 2014
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.serde2.lazy;
import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyUnionObjectInspector;
-import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.io.Text;
/**
@@ -26,8 +25,7 @@ import org.apache.hadoop.io.Text;
* non-primitive.
*
*/
-public class LazyUnion extends
- LazyNonPrimitive<LazyUnionObjectInspector> {
+public class LazyUnion extends LazyNonPrimitive<LazyUnionObjectInspector> {
/**
* Whether the data is already parsed or not.
*/
@@ -41,7 +39,7 @@ public class LazyUnion extends
/**
* The object of the union.
*/
- private LazyObject<? extends ObjectInspector> field;
+ private Object field;
/**
* Tag of the Union
@@ -54,6 +52,16 @@ public class LazyUnion extends
private boolean fieldInited = false;
/**
+ * Whether the tag has been set or not
+ * */
+ private boolean tagSet = false;
+
+ /**
+ * Whether the field has been set or not
+ * */
+ private boolean fieldSet = false;
+
+ /**
* Construct a LazyUnion object with the ObjectInspector.
*/
public LazyUnion(LazyUnionObjectInspector oi) {
@@ -123,6 +131,7 @@ public class LazyUnion extends
*
* @return The value of the field
*/
+ @SuppressWarnings("rawtypes")
private Object uncheckedGetField() {
Text nullSequence = oi.getNullSequence();
int fieldLength = start + length - startPosition;
@@ -134,9 +143,9 @@ public class LazyUnion extends
if (!fieldInited) {
fieldInited = true;
- field.init(bytes, startPosition, fieldLength);
+ ((LazyObject) field).init(bytes, startPosition, fieldLength);
}
- return field.getObject();
+ return ((LazyObject) field).getObject();
}
/**
@@ -145,6 +154,10 @@ public class LazyUnion extends
* @return The field as a LazyObject
*/
public Object getField() {
+ if (fieldSet) {
+ return field;
+ }
+
if (!parsed) {
parse();
}
@@ -157,9 +170,33 @@ public class LazyUnion extends
* @return The tag byte
*/
public byte getTag() {
+ if (tagSet) {
+ return tag;
+ }
+
if (!parsed) {
parse();
}
return tag;
}
-}
+
+ /**
+ * Set the field of the union
+ *
+ * @param field the field to be set
+ * */
+ public void setField(Object field) {
+ this.field = field;
+ fieldSet = true;
+ }
+
+ /**
+ * Set the tag for the union
+ *
+ * @param tag the tag to be set
+ * */
+ public void setTag(byte tag) {
+ this.tag = tag;
+ tagSet = true;
+ }
+}
\ No newline at end of file
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java Tue Sep 9 15:18:29 2014
@@ -22,7 +22,9 @@ import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
+import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
import org.apache.hadoop.io.Text;
/**
@@ -48,14 +50,34 @@ public final class LazyObjectInspectorFa
byte escapeChar) {
return getLazySimpleStructObjectInspector(structFieldNames,
structFieldObjectInspectors, null, separator, nullSequence,
- lastColumnTakesRest, escaped, escapeChar);
+ lastColumnTakesRest, escaped, escapeChar, ObjectInspectorOptions.JAVA);
+ }
+
+ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(
+ List<String> structFieldNames,
+ List<ObjectInspector> structFieldObjectInspectors, byte separator,
+ Text nullSequence, boolean lastColumnTakesRest, boolean escaped,
+ byte escapeChar, ObjectInspectorOptions option) {
+ return getLazySimpleStructObjectInspector(structFieldNames,
+ structFieldObjectInspectors, null, separator, nullSequence,
+ lastColumnTakesRest, escaped, escapeChar, option);
}
public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(
List<String> structFieldNames,
List<ObjectInspector> structFieldObjectInspectors, List<String> structFieldComments,
byte separator, Text nullSequence, boolean lastColumnTakesRest,
- boolean escaped,byte escapeChar) {
+ boolean escaped, byte escapeChar) {
+ return getLazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors,
+ structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped, escapeChar,
+ ObjectInspectorOptions.JAVA);
+ }
+
+ public static LazySimpleStructObjectInspector getLazySimpleStructObjectInspector(
+ List<String> structFieldNames,
+ List<ObjectInspector> structFieldObjectInspectors, List<String> structFieldComments,
+ byte separator, Text nullSequence, boolean lastColumnTakesRest,
+ boolean escaped,byte escapeChar, ObjectInspectorOptions option) {
ArrayList<Object> signature = new ArrayList<Object>();
signature.add(structFieldNames);
signature.add(structFieldObjectInspectors);
@@ -64,15 +86,30 @@ public final class LazyObjectInspectorFa
signature.add(Boolean.valueOf(lastColumnTakesRest));
signature.add(Boolean.valueOf(escaped));
signature.add(Byte.valueOf(escapeChar));
+ signature.add(option);
if(structFieldComments != null) {
signature.add(structFieldComments);
}
LazySimpleStructObjectInspector result = cachedLazySimpleStructObjectInspector
.get(signature);
if (result == null) {
- result = new LazySimpleStructObjectInspector(structFieldNames,
- structFieldObjectInspectors, structFieldComments, separator,
- nullSequence, lastColumnTakesRest, escaped, escapeChar);
+ switch (option) {
+ case JAVA:
+ result =
+ new LazySimpleStructObjectInspector(structFieldNames, structFieldObjectInspectors,
+ structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped,
+ escapeChar);
+ break;
+ case AVRO:
+ result =
+ new AvroLazyObjectInspector(structFieldNames, structFieldObjectInspectors,
+ structFieldComments, separator, nullSequence, lastColumnTakesRest, escaped,
+ escapeChar);
+ break;
+ default:
+ throw new IllegalArgumentException("Illegal ObjectInspector type [" + option + "]");
+ }
+
cachedLazySimpleStructObjectInspector.put(signature, result);
}
return result;
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java Tue Sep 9 15:18:29 2014
@@ -18,10 +18,13 @@
package org.apache.hadoop.hive.serde2.lazy.objectinspector;
+import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.hive.serde2.BaseStructObjectInspector;
import org.apache.hadoop.hive.serde2.StructObject;
+import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.io.Text;
@@ -100,6 +103,20 @@ public class LazySimpleStructObjectInspe
int fieldID = f.getFieldID();
assert (fieldID >= 0 && fieldID < fields.size());
+ ObjectInspector oi = f.getFieldObjectInspector();
+
+ if (oi instanceof AvroLazyObjectInspector) {
+ return ((AvroLazyObjectInspector) oi).getStructFieldData(data, fieldRef);
+ }
+
+ if (oi instanceof MapObjectInspector) {
+ ObjectInspector valueOI = ((MapObjectInspector) oi).getMapValueObjectInspector();
+
+ if (valueOI instanceof AvroLazyObjectInspector) {
+ return ((AvroLazyObjectInspector) valueOI).getStructFieldData(data, fieldRef);
+ }
+ }
+
return struct.getField(fieldID);
}
@@ -108,8 +125,15 @@ public class LazySimpleStructObjectInspe
if (data == null) {
return null;
}
- StructObject struct = (StructObject) data;
- return struct.getFieldsAsList();
+
+ // Iterate over all the fields picking up the nested structs within them
+ List<Object> result = new ArrayList<Object>(fields.size());
+
+ for (MyField myField : fields) {
+ result.add(getStructFieldData(data, myField));
+ }
+
+ return result;
}
// For LazyStruct
Modified: hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java (original)
+++ hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java Tue Sep 9 15:18:29 2014
@@ -57,7 +57,7 @@ public final class ObjectInspectorFactor
* for the same Java type.
*/
public enum ObjectInspectorOptions {
- JAVA, THRIFT, PROTOCOL_BUFFERS
+ JAVA, THRIFT, PROTOCOL_BUFFERS, AVRO
};
private static ConcurrentHashMap<Type, ObjectInspector> objectInspectorCache = new ConcurrentHashMap<Type, ObjectInspector>();