You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/09/09 17:18:30 UTC

svn commit: r1623845 [1/3] - in /hive/trunk: hbase-handler/ hbase-handler/if/ hbase-handler/if/test/ hbase-handler/src/java/org/apache/hadoop/hive/hbase/ hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/ hbase-handler/src/test/org/apache/hado...

Author: brock
Date: Tue Sep  9 15:18:29 2014
New Revision: 1623845

URL: http://svn.apache.org/r1623845
Log:
HIVE-6147 - Support avro data stored in HBase columns (Swarnim Kulkarni via Brock)

Added:
    hive/trunk/hbase-handler/if/
    hive/trunk/hbase-handler/if/test/
    hive/trunk/hbase-handler/if/test/avro_test.avpr
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java
    hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java
    hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/
    hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Address.java
    hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/ContactInfo.java
    hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Employee.java
    hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/EmployeeAvro.java
    hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/Gender.java
    hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/HomePhone.java
    hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/OfficePhone.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java
Modified:
    hive/trunk/hbase-handler/pom.xml
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
    hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java
    hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java
    hive/trunk/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java
    hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java

Added: hive/trunk/hbase-handler/if/test/avro_test.avpr
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/if/test/avro_test.avpr?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/if/test/avro_test.avpr (added)
+++ hive/trunk/hbase-handler/if/test/avro_test.avpr Tue Sep  9 15:18:29 2014
@@ -0,0 +1,144 @@
+{
+"protocol": "EmployeeAvro",
+"namespace": "org.apache.hadoop.hive.hbase.avro",
+"types": [
+{
+"type": "enum",
+"name": "Gender",
+"symbols": [
+"MALE",
+"FEMALE"
+]
+},
+{
+"type": "record",
+"name": "HomePhone",
+"fields": [
+{
+"name": "areaCode",
+"type": "long"
+},
+{
+"name": "number",
+"type": "long"
+}
+]
+},
+{
+"type": "record",
+"name": "OfficePhone",
+"fields": [
+{
+"name": "areaCode",
+"type": "long"
+},
+{
+"name": "number",
+"type": "long"
+}
+]
+},
+{
+"type": "record",
+"name": "Address",
+"fields": [
+{
+"name": "address1",
+"type": "string"
+},
+{
+"name": "address2",
+"type": "string"
+},
+{
+"name": "city",
+"type": "string"
+},
+{
+"name": "zipcode",
+"type": "long"
+},
+{
+"name": "county",
+"type": [
+"HomePhone",
+"OfficePhone",
+"string",
+"null"
+]
+},
+{
+"name": "aliases",
+"type": [
+{
+"type": "array",
+"items": "string"
+},
+"null"
+]
+},
+{
+"name": "metadata",
+"type": [
+"null",
+{
+"type": "map",
+"values": "string"
+}
+]
+}
+]
+},
+{
+"type": "record",
+"name": "ContactInfo",
+"fields": [
+{
+"name": "address",
+"type": [
+{
+"type": "array",
+"items": "Address"
+},
+"null"
+]
+},
+{
+"name": "homePhone",
+"type": "HomePhone"
+},
+{
+"name": "officePhone",
+"type": "OfficePhone"
+}
+]
+},
+{
+"type": "record",
+"name": "Employee",
+"fields": [
+{
+"name": "employeeName",
+"type": "string"
+},
+{
+"name": "employeeID",
+"type": "long"
+},
+{
+"name": "age",
+"type": "long"
+},
+{
+"name": "gender",
+"type": "Gender"
+},
+{
+"name": "contactInfo",
+"type": "ContactInfo"
+}
+]
+}
+],
+"messages": { }
+}

Modified: hive/trunk/hbase-handler/pom.xml
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/pom.xml?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/pom.xml (original)
+++ hive/trunk/hbase-handler/pom.xml Tue Sep  9 15:18:29 2014
@@ -57,6 +57,11 @@
       <version>${junit.version}</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>1.7.6</version>
+	</dependency>
   </dependencies>
 
   <profiles>
@@ -101,6 +106,12 @@
           <version>${hbase.hadoop1.version}</version>
         </dependency>
         <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop-20S.version}</version>
+          <scope>test</scope>
+        </dependency>
+        <dependency>
           <groupId>org.apache.hbase</groupId>
           <artifactId>hbase-common</artifactId>
           <version>${hbase.hadoop1.version}</version>
@@ -134,11 +145,25 @@
         </dependency>
         <dependency>
           <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+          <version>${hadoop-23.version}</version>
+          <classifier>tests</classifier>
+          <scope>test</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
           <artifactId>hadoop-mapreduce-client-core</artifactId>
           <version>${hadoop-23.version}</version>
           <optional>true</optional>
         </dependency>
         <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-hdfs</artifactId>
+          <version>${hadoop-23.version}</version>
+          <classifier>tests</classifier>
+          <scope>test</scope>
+        </dependency>
+        <dependency>
           <groupId>org.apache.hbase</groupId>
           <artifactId>hbase-hadoop2-compat</artifactId>
           <version>${hbase.hadoop2.version}</version>
@@ -190,6 +215,12 @@
           <type>test-jar</type>
           <scope>test</scope>
         </dependency>
+        <dependency>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-servlet</artifactId>
+          <version>${jersey.version}</version>
+          <scope>test</scope>
+       </dependency>
       </dependencies>
     </profile>
   </profiles>
@@ -209,6 +240,42 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-maven-plugin</artifactId>
+        <version>1.7.6</version>
+        <executions>
+           <execution>
+               <phase>generate-test-sources</phase>
+               <goals>
+                  <goal>protocol</goal>
+               </goals>
+               <configuration>
+                  <testSourceDirectory>${project.basedir}/if/test</testSourceDirectory>
+                  <testOutputDirectory>${project.basedir}/src/test</testOutputDirectory>
+               </configuration>
+           </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <version>1.7</version>
+        <executions>
+           <execution>
+               <id>add-test-sources</id>
+               <phase>generate-test-sources</phase>
+               <goals>
+                  <goal>add-test-source</goal>
+               </goals>
+               <configuration>
+                  <sources>
+                     <source>${project.basedir}/src/gen/avro/gen-java</source>
+                  </sources>
+               </configuration>
+           </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 

Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java Tue Sep  9 15:18:29 2014
@@ -23,16 +23,21 @@
 
 package org.apache.hadoop.hive.hbase;
 
-import com.google.common.collect.Iterators;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
+import com.google.common.collect.Iterators;
 
 public class ColumnMappings implements Iterable<ColumnMappings.ColumnMapping> {
 
@@ -53,24 +58,41 @@ public class ColumnMappings implements I
     return columnsMapping.length;
   }
 
-  String toTypesString() {
+  String toNamesString(Properties tbl, String autogenerate) {
+    if (autogenerate != null && autogenerate.equals("true")) {
+      StringBuilder sb = new StringBuilder();
+      HBaseSerDeHelper.generateColumns(tbl, Arrays.asList(columnsMapping), sb);
+      return sb.toString();
+    }
+
+    return StringUtils.EMPTY; // return empty string
+  }
+
+  String toTypesString(Properties tbl, Configuration conf, String autogenerate)
+      throws SerDeException {
     StringBuilder sb = new StringBuilder();
-    for (ColumnMapping colMap : columnsMapping) {
-      if (sb.length() > 0) {
-        sb.append(":");
-      }
-      if (colMap.hbaseRowKey) {
-        // the row key column becomes a STRING
-        sb.append(serdeConstants.STRING_TYPE_NAME);
-      } else if (colMap.qualifierName == null) {
-        // a column family become a MAP
-        sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
-            + serdeConstants.STRING_TYPE_NAME + ">");
-      } else {
-        // an individual column becomes a STRING
-        sb.append(serdeConstants.STRING_TYPE_NAME);
+
+    if (autogenerate != null && autogenerate.equals("true")) {
+      HBaseSerDeHelper.generateColumnTypes(tbl, Arrays.asList(columnsMapping), sb, conf);
+    } else {
+      for (ColumnMapping colMap : columnsMapping) {
+        if (sb.length() > 0) {
+          sb.append(":");
+        }
+        if (colMap.hbaseRowKey) {
+          // the row key column becomes a STRING
+          sb.append(serdeConstants.STRING_TYPE_NAME);
+        } else if (colMap.qualifierName == null) {
+          // a column family become a MAP
+          sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
+              + serdeConstants.STRING_TYPE_NAME + ">");
+        } else {
+          // an individual column becomes a STRING
+          sb.append(serdeConstants.STRING_TYPE_NAME);
+        }
       }
     }
+
     return sb.toString();
   }
 

Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java Tue Sep  9 15:18:29 2014
@@ -19,7 +19,9 @@
 package org.apache.hadoop.hive.hbase;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
 import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
@@ -94,4 +96,14 @@ public class HBaseCompositeKey extends L
 
     return lazyObject;
   }
+
+  /**
+   * Return the different parts of the key. By default, this returns an empty map. Consumers can
+   * choose to override this to provide their own names and types of parts of the key.
+   *
+   * @return map of parts name to their type
+   * */
+  public Map<String, String> getParts() {
+    return Collections.emptyMap();
+  }
 }

Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java Tue Sep  9 15:18:29 2014
@@ -18,32 +18,31 @@
 
 package org.apache.hadoop.hive.hbase;
 
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
 import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
 import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
-import java.util.ArrayList;
-import java.util.List;
-
 // Does same thing with LazyFactory#createLazyObjectInspector except that this replaces
 // original keyOI with OI which is create by HBaseKeyFactory provided by serde property for hbase
 public class HBaseLazyObjectFactory {
 
   public static ObjectInspector createLazyHBaseStructInspector(
-      SerDeParameters serdeParams, int index, HBaseKeyFactory factory) throws SerDeException {
+      SerDeParameters serdeParams, int index, HBaseKeyFactory keyFactory, List<HBaseValueFactory> valueFactories) throws SerDeException {
     List<TypeInfo> columnTypes = serdeParams.getColumnTypes();
     ArrayList<ObjectInspector> columnObjectInspectors = new ArrayList<ObjectInspector>(
         columnTypes.size());
     for (int i = 0; i < columnTypes.size(); i++) {
       if (i == index) {
-        columnObjectInspectors.add(factory.createKeyObjectInspector(columnTypes.get(i)));
+        columnObjectInspectors.add(keyFactory.createKeyObjectInspector(columnTypes.get(i)));
       } else {
-        columnObjectInspectors.add(LazyFactory.createLazyObjectInspector(
-            columnTypes.get(i), serdeParams.getSeparators(), 1, serdeParams.getNullSequence(),
-            serdeParams.isEscaped(), serdeParams.getEscapeChar()));
+        columnObjectInspectors.add(valueFactories.get(i).createValueObjectInspector(
+            columnTypes.get(i)));
       }
     }
     return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
@@ -51,4 +50,4 @@ public class HBaseLazyObjectFactory {
         serdeParams.getNullSequence(), serdeParams.isLastColumnTakesRest(),
         serdeParams.isEscaped(), serdeParams.getEscapeChar());
   }
-}
+}
\ No newline at end of file

Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java Tue Sep  9 15:18:29 2014
@@ -18,6 +18,10 @@
 
 package org.apache.hadoop.hive.hbase;
 
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
 import org.apache.hadoop.hive.serde2.ByteStream;
@@ -34,10 +38,6 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.io.Writable;
 
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
 public class HBaseRowSerializer {
 
   private final HBaseKeyFactory keyFactory;
@@ -279,6 +279,10 @@ public class HBaseRowSerializer {
           }
         }
         return true;
+       case UNION: {
+        // union type currently not totally supported. See HIVE-2390
+        return false;
+       }
       default:
         throw new RuntimeException("Unknown category type: " + objInspector.getCategory());
     }

Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java Tue Sep  9 15:18:29 2014
@@ -18,6 +18,10 @@
 
 package org.apache.hadoop.hive.hbase;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -25,6 +29,7 @@ import org.apache.hadoop.hbase.util.Byte
 import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.serde2.AbstractSerDe;
+import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeStats;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
@@ -33,10 +38,6 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.JobConf;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
 /**
  * HBaseSerDe can be used to serialize object into an HBase table and
  * deserialize objects from an HBase table.
@@ -50,15 +51,21 @@ public class HBaseSerDe extends Abstract
   public static final String HBASE_KEY_COL = ":key";
   public static final String HBASE_PUT_TIMESTAMP = "hbase.put.timestamp";
   public static final String HBASE_COMPOSITE_KEY_CLASS = "hbase.composite.key.class";
+  public static final String HBASE_COMPOSITE_KEY_TYPES = "hbase.composite.key.types";
   public static final String HBASE_COMPOSITE_KEY_FACTORY = "hbase.composite.key.factory";
   public static final String HBASE_SCAN_CACHE = "hbase.scan.cache";
   public static final String HBASE_SCAN_CACHEBLOCKS = "hbase.scan.cacheblock";
   public static final String HBASE_SCAN_BATCH = "hbase.scan.batch";
+  public static final String HBASE_AUTOGENERATE_STRUCT = "hbase.struct.autogenerate";
   /**
-   *  Determines whether a regex matching should be done on the columns or not. Defaults to true.
-   *  <strong>WARNING: Note that currently this only supports the suffix wildcard .*</strong>
+   * Determines whether a regex matching should be done on the columns or not. Defaults to true.
+   * <strong>WARNING: Note that currently this only supports the suffix wildcard .*</strong>
    */
   public static final String HBASE_COLUMNS_REGEX_MATCHING = "hbase.columns.mapping.regex.matching";
+  /**
+   * Defines the type for a column.
+   **/
+  public static final String SERIALIZATION_TYPE = "serialization.type";
 
   private ObjectInspector cachedObjectInspector;
   private LazyHBaseRow cachedHBaseRow;
@@ -83,8 +90,11 @@ public class HBaseSerDe extends Abstract
       throws SerDeException {
     serdeParams = new HBaseSerDeParameters(conf, tbl, getClass().getName());
 
-    cachedObjectInspector = HBaseLazyObjectFactory.createLazyHBaseStructInspector(
-        serdeParams.getSerdeParams(), serdeParams.getKeyIndex(), serdeParams.getKeyFactory());
+    cachedObjectInspector =
+        HBaseLazyObjectFactory
+            .createLazyHBaseStructInspector(serdeParams.getSerdeParams(),
+                serdeParams.getKeyIndex(), serdeParams.getKeyFactory(),
+                serdeParams.getValueFactories());
 
     cachedHBaseRow = new LazyHBaseRow(
         (LazySimpleStructObjectInspector) cachedObjectInspector,

Added: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java (added)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java Tue Sep  9 15:18:29 2014
@@ -0,0 +1,557 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.hbase;
+
+import static org.apache.hadoop.hive.hbase.HBaseSerDeParameters.AVRO_SERIALIZATION_TYPE;
+
+import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+
+import org.apache.avro.Schema;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.commons.io.IOUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.avro.AvroObjectInspectorGenerator;
+import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+import org.apache.hadoop.util.StringUtils;
+
+/**
+ * Helper class for {@link HBaseSerDe}
+ * */
+public class HBaseSerDeHelper {
+
+  /**
+   * Logger
+   * */
+  public static final Log LOG = LogFactory.getLog(HBaseSerDeHelper.class);
+
+  /**
+   * Autogenerates the columns from the given serialization class
+   * 
+   * @param tbl the hive table properties
+   * @param columnsMapping the hbase columns mapping determining hbase column families and
+   *          qualifiers
+   * @param sb StringBuilder to form the list of columns
+   * @throws IllegalArgumentException if any of the given arguments was null
+   * */
+  public static void generateColumns(Properties tbl, List<ColumnMapping> columnsMapping,
+      StringBuilder sb) {
+    // Generate the columns according to the column mapping provided
+    // Note: The generated column names are same as the
+    // family_name.qualifier_name. If the qualifier
+    // name is null, each column is familyname_col[i] where i is the index of
+    // the column ranging
+    // from 0 to n-1 where n is the size of the column mapping. The filter
+    // function removes any
+    // special characters other than alphabets and numbers from the column
+    // family and qualifier name
+    // as the only special character allowed in a column name is "_" which is
+    // used as a separator
+    // between the column family and qualifier name.
+
+    if (columnsMapping == null) {
+      throw new IllegalArgumentException("columnsMapping cannot be null");
+    }
+
+    if (sb == null) {
+      throw new IllegalArgumentException("StringBuilder cannot be null");
+    }
+
+    for (int i = 0; i < columnsMapping.size(); i++) {
+      ColumnMapping colMap = columnsMapping.get(i);
+
+      if (colMap.hbaseRowKey) {
+        sb.append("key").append(StringUtils.COMMA_STR);
+      } else if (colMap.qualifierName == null) {
+        // this corresponds to a map<string,?>
+
+        if (colMap.qualifierPrefix != null) {
+          sb.append(filter(colMap.familyName)).append("_")
+              .append(filter(colMap.qualifierPrefix) + i).append(StringUtils.COMMA_STR);
+        } else {
+          sb.append(filter(colMap.familyName)).append("_").append("col" + i)
+              .append(StringUtils.COMMA_STR);
+        }
+      } else {
+        // just an individual column
+        sb.append(filter(colMap.familyName)).append("_").append(filter(colMap.qualifierName))
+            .append(StringUtils.COMMA_STR);
+      }
+    }
+
+    // trim off the ending ",", if any
+    trim(sb);
+
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Generated columns: [" + sb.toString() + "]");
+    }
+  }
+
+  /**
+   * Autogenerates the column types from the given serialization class
+   * 
+   * @param tbl the hive table properties
+   * @param columnsMapping the hbase columns mapping determining hbase column families and
+   *          qualifiers
+   * @param sb StringBuilder to form the list of columns
+   * @param conf configuration
+   * @throws IllegalArgumentException if any of the given arguments was null
+   * @throws SerDeException if there was an error generating the column types
+   * */
+  public static void generateColumnTypes(Properties tbl, List<ColumnMapping> columnsMapping,
+      StringBuilder sb, Configuration conf) throws SerDeException {
+
+    if (tbl == null) {
+      throw new IllegalArgumentException("tbl cannot be null");
+    }
+
+    if (columnsMapping == null) {
+      throw new IllegalArgumentException("columnsMapping cannot be null");
+    }
+
+    if (sb == null) {
+      throw new IllegalArgumentException("StringBuilder cannot be null");
+    }
+
+    // Generate the columns according to the column mapping provided
+    for (int i = 0; i < columnsMapping.size(); i++) {
+      if (sb.length() > 0) {
+        sb.append(":");
+      }
+
+      ColumnMapping colMap = columnsMapping.get(i);
+
+      if (colMap.hbaseRowKey) {
+
+        Map<String, String> compositeKeyParts = getCompositeKeyParts(tbl);
+        StringBuilder keyStruct = new StringBuilder();
+
+        if (compositeKeyParts == null || compositeKeyParts.isEmpty()) {
+          String compKeyClass = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS);
+          String compKeyTypes = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_TYPES);
+
+          if (compKeyTypes == null) {
+
+            if (compKeyClass != null) {
+              // a composite key class was provided. But neither the types
+              // property was set and
+              // neither the getParts() method of HBaseCompositeKey was
+              // overidden in the
+              // implementation. Flag exception.
+              throw new SerDeException(
+                  "Either the hbase.composite.key.types property should be set or the getParts method must be overridden in "
+                      + compKeyClass);
+            }
+
+            // the row key column becomes a STRING
+            sb.append(serdeConstants.STRING_TYPE_NAME);
+          } else {
+            generateKeyStruct(compKeyTypes, keyStruct);
+          }
+        } else {
+          generateKeyStruct(compositeKeyParts, keyStruct);
+        }
+        sb.append(keyStruct);
+      } else if (colMap.qualifierName == null) {
+
+        String serClassName = null;
+        String serType = null;
+        String schemaLiteral = null;
+        String schemaUrl = null;
+
+        if (colMap.qualifierPrefix != null) {
+
+          serType =
+              tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+                  + HBaseSerDe.SERIALIZATION_TYPE);
+
+          if (serType == null) {
+            throw new SerDeException(HBaseSerDe.SERIALIZATION_TYPE
+                + " property not provided for column family [" + colMap.familyName
+                + "] and prefix [" + colMap.qualifierPrefix + "]");
+          }
+
+          // we are provided with a prefix
+          serClassName =
+              tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+                  + serdeConstants.SERIALIZATION_CLASS);
+
+          if (serClassName == null) {
+            if (serType.equalsIgnoreCase(HBaseSerDeParameters.AVRO_SERIALIZATION_TYPE)) {
+              // for avro type, the serialization class parameter is optional
+              schemaLiteral =
+                  tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+                      + AvroSerdeUtils.SCHEMA_LITERAL);
+              schemaUrl =
+                  tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+                      + AvroSerdeUtils.SCHEMA_URL);
+
+              if (schemaLiteral == null && schemaUrl == null) {
+                // either schema literal, schema url or serialization class must
+                // be provided
+                throw new SerDeException("For an avro schema, either "
+                    + AvroSerdeUtils.SCHEMA_LITERAL + ", " + AvroSerdeUtils.SCHEMA_URL + " or "
+                    + serdeConstants.SERIALIZATION_CLASS + " property must be set.");
+              }
+
+              if (schemaUrl != null) {
+                schemaLiteral = getSchemaFromFS(schemaUrl, conf).toString();
+              }
+
+            } else {
+              throw new SerDeException(serdeConstants.SERIALIZATION_CLASS
+                  + " property not provided for column family [" + colMap.familyName
+                  + "] and prefix [" + colMap.qualifierPrefix + "]");
+            }
+          }
+        } else {
+          serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE);
+
+          if (serType == null) {
+            throw new SerDeException(HBaseSerDe.SERIALIZATION_TYPE
+                + " property not provided for column family [" + colMap.familyName + "]");
+          }
+
+          serClassName =
+              tbl.getProperty(colMap.familyName + "." + serdeConstants.SERIALIZATION_CLASS);
+
+          if (serClassName == null) {
+
+            if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) {
+              // for avro type, the serialization class parameter is optional
+              schemaLiteral =
+                  tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_LITERAL);
+              schemaUrl = tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_URL);
+
+              if (schemaLiteral == null && schemaUrl == null) {
+                // either schema literal or serialization class must be provided
+                throw new SerDeException("For an avro schema, either "
+                    + AvroSerdeUtils.SCHEMA_LITERAL + " property or "
+                    + serdeConstants.SERIALIZATION_CLASS + " property must be set.");
+              }
+
+              if (schemaUrl != null) {
+                schemaLiteral = getSchemaFromFS(schemaUrl, conf).toString();
+              }
+            } else {
+              throw new SerDeException(serdeConstants.SERIALIZATION_CLASS
+                  + " property not provided for column family [" + colMap.familyName + "]");
+            }
+          }
+        }
+
+        StringBuilder generatedStruct = new StringBuilder();
+
+        // generate struct for each of the given prefixes
+        generateColumnStruct(serType, serClassName, schemaLiteral, colMap, generatedStruct);
+
+        // a column family becomes a MAP
+        sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
+            + generatedStruct + ">");
+
+      } else {
+
+        String qualifierName = colMap.qualifierName;
+
+        if (colMap.qualifierName.endsWith("*")) {
+          // we are provided with a prefix
+          qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1);
+        }
+
+        String serType =
+            tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+                + HBaseSerDe.SERIALIZATION_TYPE);
+
+        if (serType == null) {
+          throw new SerDeException(HBaseSerDe.SERIALIZATION_TYPE
+              + " property not provided for column family [" + colMap.familyName
+              + "] and qualifier [" + qualifierName + "]");
+        }
+
+        String serClassName =
+            tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+                + serdeConstants.SERIALIZATION_CLASS);
+
+        String schemaLiteral = null;
+        String schemaUrl = null;
+
+        if (serClassName == null) {
+
+          if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) {
+            // for avro type, the serialization class parameter is optional
+            schemaLiteral =
+                tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+                    + AvroSerdeUtils.SCHEMA_LITERAL);
+            schemaUrl =
+                tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+                    + AvroSerdeUtils.SCHEMA_URL);
+
+            if (schemaLiteral == null && schemaUrl == null) {
+              // either schema literal, schema url or serialization class must
+              // be provided
+              throw new SerDeException("For an avro schema, either "
+                  + AvroSerdeUtils.SCHEMA_LITERAL + ", " + AvroSerdeUtils.SCHEMA_URL + " or "
+                  + serdeConstants.SERIALIZATION_CLASS + " property must be set.");
+            }
+
+            if (schemaUrl != null) {
+              schemaLiteral = getSchemaFromFS(schemaUrl, conf).toString();
+            }
+          } else {
+            throw new SerDeException(serdeConstants.SERIALIZATION_CLASS
+                + " property not provided for column family [" + colMap.familyName
+                + "] and qualifier [" + qualifierName + "]");
+          }
+        }
+
+        StringBuilder generatedStruct = new StringBuilder();
+
+        generateColumnStruct(serType, serClassName, schemaLiteral, colMap, generatedStruct);
+
+        sb.append(generatedStruct);
+      }
+    }
+
+    // trim off ending ",", if any
+    trim(sb);
+
+    if (LOG.isDebugEnabled()) {
+      LOG.debug("Generated column types: [" + sb.toString() + "]");
+    }
+  }
+
+  /**
+   * Read the schema from the given hdfs url for the schema
+   * */
+  public static Schema getSchemaFromFS(String schemaFSUrl, Configuration conf)
+      throws SerDeException {
+    FSDataInputStream in = null;
+    FileSystem fs = null;
+    try {
+      fs = FileSystem.get(new URI(schemaFSUrl), conf);
+      in = fs.open(new Path(schemaFSUrl));
+      Schema s = Schema.parse(in);
+      return s;
+    } catch (URISyntaxException e) {
+      throw new SerDeException("Failure reading schema from filesystem", e);
+    } catch (IOException e) {
+      throw new SerDeException("Failure reading schema from filesystem", e);
+    } finally {
+      IOUtils.closeQuietly(in);
+    }
+  }
+
+  /**
+   * Auto-generates the key struct for composite keys
+   * 
+   * @param compositeKeyParts map of composite key part name to its type. Usually this would be
+   *          provided by the custom implementation of {@link HBaseCompositeKey composite key}
+   * @param sb StringBuilder object to construct the struct
+   * */
+  private static void generateKeyStruct(Map<String, String> compositeKeyParts, StringBuilder sb) {
+    sb.append("struct<");
+
+    for (Entry<String, String> entry : compositeKeyParts.entrySet()) {
+      sb.append(entry.getKey()).append(":").append(entry.getValue()).append(",");
+    }
+
+    // trim the trailing ","
+    trim(sb);
+    sb.append(">");
+  }
+
+  /**
+   * Auto-generates the key struct for composite keys
+   * 
+   * @param compositeKeyTypes comma separated list of composite key types in order
+   * @param sb StringBuilder object to construct the struct
+   * */
+  private static void generateKeyStruct(String compositeKeyTypes, StringBuilder sb) {
+    sb.append("struct<");
+
+    // composite key types is a comma separated list of different parts of the
+    // composite keys in
+    // order in which they appear in the key
+    String[] keyTypes = compositeKeyTypes.split(",");
+
+    for (int i = 0; i < keyTypes.length; i++) {
+      sb.append("col" + i).append(":").append(keyTypes[i]).append(StringUtils.COMMA_STR);
+    }
+
+    // trim the trailing ","
+    trim(sb);
+    sb.append(">");
+  }
+
+  /**
+   * Auto-generates the column struct
+   * 
+   * @param serType serialization type
+   * @param serClassName serialization class name
+   * @param schemaLiteral schema string
+   * @param colMap hbase column mapping
+   * @param sb StringBuilder to hold the generated struct
+   * @throws SerDeException if something goes wrong while generating the struct
+   * */
+  private static void generateColumnStruct(String serType, String serClassName,
+      String schemaLiteral, ColumnMapping colMap, StringBuilder sb) throws SerDeException {
+
+    if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) {
+
+      if (serClassName != null) {
+        generateAvroStructFromClass(serClassName, sb);
+      } else {
+        generateAvroStructFromSchema(schemaLiteral, sb);
+      }
+    } else {
+      throw new SerDeException("Unknown " + HBaseSerDe.SERIALIZATION_TYPE
+          + " found for column family [" + colMap.familyName + "]");
+    }
+  }
+
+  /**
+   * Auto-generate the avro struct from class
+   * 
+   * @param serClassName serialization class for avro struct
+   * @param sb StringBuilder to hold the generated struct
+   * @throws SerDeException if something goes wrong while generating the struct
+   * */
+  private static void generateAvroStructFromClass(String serClassName, StringBuilder sb)
+      throws SerDeException {
+    Class<?> serClass;
+    try {
+      serClass = Class.forName(serClassName);
+    } catch (ClassNotFoundException e) {
+      throw new SerDeException("Error obtaining descriptor for " + serClassName, e);
+    }
+
+    Schema schema = ReflectData.get().getSchema(serClass);
+
+    generateAvroStructFromSchema(schema, sb);
+  }
+
+  /**
+   * Auto-generate the avro struct from schema
+   * 
+   * @param schemaLiteral schema for the avro struct as string
+   * @param sb StringBuilder to hold the generated struct
+   * @throws SerDeException if something goes wrong while generating the struct
+   * */
+  private static void generateAvroStructFromSchema(String schemaLiteral, StringBuilder sb)
+      throws SerDeException {
+    Schema schema = Schema.parse(schemaLiteral);
+
+    generateAvroStructFromSchema(schema, sb);
+  }
+
+  /**
+   * Auto-generate the avro struct from schema
+   * 
+   * @param schema schema for the avro struct
+   * @param sb StringBuilder to hold the generated struct
+   * @throws SerDeException if something goes wrong while generating the struct
+   * */
+  private static void generateAvroStructFromSchema(Schema schema, StringBuilder sb)
+      throws SerDeException {
+    AvroObjectInspectorGenerator avig = new AvroObjectInspectorGenerator(schema);
+
+    sb.append("struct<");
+
+    // Get the column names and their corresponding types
+    List<String> columnNames = avig.getColumnNames();
+    List<TypeInfo> columnTypes = avig.getColumnTypes();
+
+    if (columnNames.size() != columnTypes.size()) {
+      throw new AssertionError("The number of column names should be the same as column types");
+    }
+
+    for (int i = 0; i < columnNames.size(); i++) {
+      sb.append(columnNames.get(i));
+      sb.append(":");
+      sb.append(columnTypes.get(i).getTypeName());
+      sb.append(",");
+    }
+
+    trim(sb).append(">");
+  }
+
+  /**
+   * Trims by removing the trailing "," if any
+   * 
+   * @param sb StringBuilder to trim
+   * @return StringBuilder trimmed StringBuilder
+   * */
+  private static StringBuilder trim(StringBuilder sb) {
+    if (sb.charAt(sb.length() - 1) == StringUtils.COMMA) {
+      return sb.deleteCharAt(sb.length() - 1);
+    }
+
+    return sb;
+  }
+
+  /**
+   * Filters the given name by removing any special character and convert to lowercase
+   * */
+  private static String filter(String name) {
+    return name.replaceAll("[^a-zA-Z0-9]+", "").toLowerCase();
+  }
+
+  /**
+   * Return the types for the composite key.
+   * 
+   * @param tbl Properties for the table
+   * @return a comma-separated list of composite key types
+   * @throws SerDeException if something goes wrong while getting the composite key parts
+   * */
+  @SuppressWarnings("unchecked")
+  private static Map<String, String> getCompositeKeyParts(Properties tbl) throws SerDeException {
+    String compKeyClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS);
+
+    if (compKeyClassName == null) {
+      // no custom composite key class provided. return null
+      return null;
+    }
+
+    CompositeHBaseKeyFactory<HBaseCompositeKey> keyFactory = null;
+
+    Class<?> keyClass;
+    try {
+      keyClass = Class.forName(compKeyClassName);
+      keyFactory = new CompositeHBaseKeyFactory(keyClass);
+    } catch (Exception e) {
+      throw new SerDeException(e);
+    }
+
+    HBaseCompositeKey compKey = keyFactory.createKey(null);
+    return compKey.getParts();
+  }
+}
\ No newline at end of file

Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java Tue Sep  9 15:18:29 2014
@@ -18,13 +18,20 @@
 
 package org.apache.hadoop.hive.hbase;
 
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Properties;
 
+import org.apache.avro.Schema;
+import org.apache.avro.reflect.ReflectData;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
+import org.apache.hadoop.hive.hbase.struct.AvroHBaseValueFactory;
+import org.apache.hadoop.hive.hbase.struct.DefaultHBaseValueFactory;
+import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -37,10 +44,12 @@ import org.apache.hadoop.util.Reflection
  */
 public class HBaseSerDeParameters {
 
+  public static final String AVRO_SERIALIZATION_TYPE = "avro";
+  public static final String STRUCT_SERIALIZATION_TYPE = "struct";
+
   private final SerDeParameters serdeParams;
 
   private final Configuration job;
-  private final Properties tbl;
 
   private final String columnMappingString;
   private final ColumnMappings columnMappings;
@@ -48,57 +57,50 @@ public class HBaseSerDeParameters {
 
   private final long putTimestamp;
   private final HBaseKeyFactory keyFactory;
+  private final List<HBaseValueFactory> valueFactories;
 
   HBaseSerDeParameters(Configuration job, Properties tbl, String serdeName) throws SerDeException {
     this.job = job;
-    this.tbl = tbl;
-    this.serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName);
-    this.putTimestamp = Long.valueOf(tbl.getProperty(HBaseSerDe.HBASE_PUT_TIMESTAMP, "-1"));
 
     // Read configuration parameters
     columnMappingString = tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING);
-    doColumnRegexMatching = Boolean.valueOf(tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, "true"));
+    doColumnRegexMatching =
+        Boolean.valueOf(tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, "true"));
     // Parse and initialize the HBase columns mapping
     columnMappings = HBaseSerDe.parseColumnsMapping(columnMappingString, doColumnRegexMatching);
-    columnMappings.setHiveColumnDescription(serdeName, serdeParams.getColumnNames(), serdeParams.getColumnTypes());
-
-    // Precondition: make sure this is done after the rest of the SerDe initialization is done.
-    String hbaseTableStorageType = tbl.getProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE);
-    columnMappings.parseColumnStorageTypes(hbaseTableStorageType);
 
     // Build the type property string if not supplied
     String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
-    if (columnTypeProperty == null) {
-      tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, columnMappings.toTypesString());
-    }
+    String autogenerate = tbl.getProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT);
 
-    this.keyFactory = initKeyFactory(job, tbl);
-  }
+    if (columnTypeProperty == null || columnTypeProperty.isEmpty()) {
+      String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
+      if (columnNameProperty == null || columnNameProperty.isEmpty()) {
+        if (autogenerate == null || autogenerate.isEmpty()) {
+          throw new IllegalArgumentException("Either the columns must be specified or the "
+              + HBaseSerDe.HBASE_AUTOGENERATE_STRUCT + " property must be set to true.");
+        }
 
-  private HBaseKeyFactory initKeyFactory(Configuration conf, Properties tbl) throws SerDeException {
-    try {
-      HBaseKeyFactory keyFactory = createKeyFactory(conf, tbl);
-      if (keyFactory != null) {
-        keyFactory.init(this, tbl);
+        tbl.setProperty(serdeConstants.LIST_COLUMNS,
+            columnMappings.toNamesString(tbl, autogenerate));
       }
-      return keyFactory;
-    } catch (Exception e) {
-      throw new SerDeException(e);
-    }
-  }
 
-  private static HBaseKeyFactory createKeyFactory(Configuration job, Properties tbl) throws Exception {
-    String factoryClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_FACTORY);
-    if (factoryClassName != null) {
-      Class<?> factoryClazz = Class.forName(factoryClassName);
-      return (HBaseKeyFactory) ReflectionUtils.newInstance(factoryClazz, job);
+      tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
+          columnMappings.toTypesString(tbl, job, autogenerate));
     }
-    String keyClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS);
-    if (keyClassName != null) {
-      Class<?> keyClass = Class.forName(keyClassName);
-      return new CompositeHBaseKeyFactory(keyClass);
-    }
-    return new DefaultHBaseKeyFactory();
+
+    this.serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName);
+    this.putTimestamp = Long.valueOf(tbl.getProperty(HBaseSerDe.HBASE_PUT_TIMESTAMP, "-1"));
+
+    columnMappings.setHiveColumnDescription(serdeName, serdeParams.getColumnNames(),
+        serdeParams.getColumnTypes());
+
+    // Precondition: make sure this is done after the rest of the SerDe initialization is done.
+    String hbaseTableStorageType = tbl.getProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE);
+    columnMappings.parseColumnStorageTypes(hbaseTableStorageType);
+
+    this.keyFactory = initKeyFactory(job, tbl);
+    this.valueFactories = initValueFactories(job, tbl);
   }
 
   public List<String> getColumnNames() {
@@ -133,6 +135,10 @@ public class HBaseSerDeParameters {
     return keyFactory;
   }
 
+  public List<HBaseValueFactory> getValueFactories() {
+    return valueFactories;
+  }
+
   public Configuration getBaseConfiguration() {
     return job;
   }
@@ -151,4 +157,190 @@ public class HBaseSerDeParameters {
   public String toString() {
     return "[" + columnMappingString + ":" + getColumnNames() + ":" + getColumnTypes() + "]";
   }
-}
+
+  private HBaseKeyFactory initKeyFactory(Configuration conf, Properties tbl) throws SerDeException {
+    try {
+      HBaseKeyFactory keyFactory = createKeyFactory(conf, tbl);
+      if (keyFactory != null) {
+        keyFactory.init(this, tbl);
+      }
+      return keyFactory;
+    } catch (Exception e) {
+      throw new SerDeException(e);
+    }
+  }
+
+  private static HBaseKeyFactory createKeyFactory(Configuration job, Properties tbl)
+      throws Exception {
+    String factoryClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_FACTORY);
+    if (factoryClassName != null) {
+      Class<?> factoryClazz = Class.forName(factoryClassName);
+      return (HBaseKeyFactory) ReflectionUtils.newInstance(factoryClazz, job);
+    }
+    String keyClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS);
+    if (keyClassName != null) {
+      Class<?> keyClass = Class.forName(keyClassName);
+      return new CompositeHBaseKeyFactory(keyClass);
+    }
+    return new DefaultHBaseKeyFactory();
+  }
+
+  private List<HBaseValueFactory> initValueFactories(Configuration conf, Properties tbl)
+      throws SerDeException {
+    List<HBaseValueFactory> valueFactories = createValueFactories(conf, tbl);
+
+    for (HBaseValueFactory valueFactory : valueFactories) {
+      valueFactory.init(this, conf, tbl);
+    }
+
+    return valueFactories;
+  }
+
+  private List<HBaseValueFactory> createValueFactories(Configuration conf, Properties tbl)
+      throws SerDeException {
+    List<HBaseValueFactory> valueFactories = new ArrayList<HBaseValueFactory>();
+
+    try {
+      for (int i = 0; i < columnMappings.size(); i++) {
+        String serType = getSerializationType(conf, tbl, columnMappings.getColumnsMapping()[i]);
+
+        if (serType != null && serType.equals(AVRO_SERIALIZATION_TYPE)) {
+          Schema schema = getSchema(conf, tbl, columnMappings.getColumnsMapping()[i]);
+          valueFactories.add(new AvroHBaseValueFactory(schema));
+        } else {
+          valueFactories.add(new DefaultHBaseValueFactory());
+        }
+      }
+    } catch (Exception e) {
+      throw new SerDeException(e);
+    }
+
+    return valueFactories;
+  }
+
+  /**
+   * Get the type for the given {@link ColumnMapping colMap}
+   * */
+  private String getSerializationType(Configuration conf, Properties tbl,
+      ColumnMapping colMap) throws Exception {
+    String serType = null;
+
+    if (colMap.qualifierName == null) {
+      // only a column family
+
+      if (colMap.qualifierPrefix != null) {
+        serType = tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+            + HBaseSerDe.SERIALIZATION_TYPE);
+      } else {
+        serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE);
+      }
+    } else if (!colMap.hbaseRowKey) {
+      // not an hbase row key. This should either be a prefix or an individual qualifier
+      String qualifierName = colMap.qualifierName;
+
+      if (colMap.qualifierName.endsWith("*")) {
+        qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1);
+      }
+
+      serType =
+          tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+              + HBaseSerDe.SERIALIZATION_TYPE);
+    }
+
+    return serType;
+  }
+
+  private Schema getSchema(Configuration conf, Properties tbl, ColumnMapping colMap)
+      throws Exception {
+    String serType = null;
+    String serClassName = null;
+    String schemaLiteral = null;
+    String schemaUrl = null;
+
+    if (colMap.qualifierName == null) {
+      // only a column family
+
+      if (colMap.qualifierPrefix != null) {
+        serType =
+            tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+                + HBaseSerDe.SERIALIZATION_TYPE);
+
+        serClassName =
+            tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+                + serdeConstants.SERIALIZATION_CLASS);
+
+        schemaLiteral =
+            tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+                + AvroSerdeUtils.SCHEMA_LITERAL);
+
+        schemaUrl =
+            tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+                + AvroSerdeUtils.SCHEMA_URL);
+      } else {
+        serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE);
+
+        serClassName =
+            tbl.getProperty(colMap.familyName + "." + serdeConstants.SERIALIZATION_CLASS);
+
+        schemaLiteral = tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_LITERAL);
+
+        schemaUrl = tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_URL);
+      }
+    } else if (!colMap.hbaseRowKey) {
+      // not an hbase row key. This should either be a prefix or an individual qualifier
+      String qualifierName = colMap.qualifierName;
+
+      if (colMap.qualifierName.endsWith("*")) {
+        qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1);
+      }
+
+      serType =
+          tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+              + HBaseSerDe.SERIALIZATION_TYPE);
+
+      serClassName =
+          tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+              + serdeConstants.SERIALIZATION_CLASS);
+
+      schemaLiteral =
+          tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+              + AvroSerdeUtils.SCHEMA_LITERAL);
+
+      schemaUrl =
+          tbl.getProperty(colMap.familyName + "." + qualifierName + "." + AvroSerdeUtils.SCHEMA_URL);
+    }
+
+    String avroSchemaRetClass = tbl.getProperty(AvroSerdeUtils.SCHEMA_RETRIEVER);
+
+    if (schemaLiteral == null && serClassName == null && schemaUrl == null
+        && avroSchemaRetClass == null) {
+      throw new IllegalArgumentException("serialization.type was set to [" + serType
+          + "] but neither " + AvroSerdeUtils.SCHEMA_LITERAL + ", " + AvroSerdeUtils.SCHEMA_URL
+          + ", serialization.class or " + AvroSerdeUtils.SCHEMA_RETRIEVER + " property was set");
+    }
+
+    Class<?> deserializerClass = null;
+
+    if (serClassName != null) {
+      deserializerClass = conf.getClassByName(serClassName);
+    }
+
+    Schema schema = null;
+
+    // only worry about getting schema if we are dealing with Avro
+    if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) {
+      if (avroSchemaRetClass == null) {
+        // bother about generating a schema only if a schema retriever class wasn't provided
+        if (schemaLiteral != null) {
+          schema = Schema.parse(schemaLiteral);
+        } else if (schemaUrl != null) {
+          schema = HBaseSerDeHelper.getSchemaFromFS(schemaUrl, conf);
+        } else if (deserializerClass != null) {
+          schema = ReflectData.get().getSchema(deserializerClass);
+        }
+      }
+    }
+
+    return schema;
+  }
+}
\ No newline at end of file

Modified: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java?rev=1623845&r1=1623844&r2=1623845&view=diff
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java (original)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java Tue Sep  9 15:18:29 2014
@@ -149,8 +149,16 @@ public class LazyHBaseCellMap extends La
       }
       if (keyI.equals(key)) {
         // Got a match, return the value
-        LazyObject<?> v = (LazyObject<?>) entry.getValue();
-        return v == null ? v : v.getObject();
+        Object _value = entry.getValue();
+
+        // If the given value is a type of LazyObject, then only try and convert it to that form.
+        // Else return it as it is.
+        if (_value instanceof LazyObject) {
+          LazyObject<?> v = (LazyObject<?>) entry.getValue();
+          return v == null ? v : v.getObject();
+        } else {
+          return _value;
+        }
       }
     }
 

Added: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java (added)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/AvroHBaseValueFactory.java Tue Sep  9 15:18:29 2014
@@ -0,0 +1,161 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.hbase.struct;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.avro.Schema;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.hbase.HBaseSerDeParameters;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.avro.AvroLazyObjectInspector;
+import org.apache.hadoop.hive.serde2.avro.AvroSchemaRetriever;
+import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
+import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+/**
+ * Avro specific implementation of the {@link HBaseValueFactory}
+ * */
+public class AvroHBaseValueFactory extends DefaultHBaseValueFactory {
+
+  private AvroSchemaRetriever avroSchemaRetriever;
+  private Schema schema;
+
+  /**
+   * Constructor
+   * 
+   * @param schema the associated {@link Schema schema}
+   * */
+  public AvroHBaseValueFactory(Schema schema) {
+    this.schema = schema;
+  }
+
+  @Override
+  public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Properties properties)
+      throws SerDeException {
+    super.init(hbaseParams, conf, properties);
+    String avroSchemaRetClass = properties.getProperty(AvroSerdeUtils.SCHEMA_RETRIEVER);
+
+    if (avroSchemaRetClass != null) {
+      Class<?> avroSchemaRetrieverClass = null;
+      try {
+        avroSchemaRetrieverClass = conf.getClassByName(avroSchemaRetClass);
+      } catch (ClassNotFoundException e) {
+        throw new SerDeException(e);
+      }
+
+      initAvroSchemaRetriever(avroSchemaRetrieverClass, conf, properties);
+    }
+  }
+
+  @Override
+  public ObjectInspector createValueObjectInspector(TypeInfo type) throws SerDeException {
+    ObjectInspector oi =
+        LazyFactory.createLazyObjectInspector(type, serdeParams.getSeparators(), 1,
+        serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar(),
+        ObjectInspectorOptions.AVRO);
+
+    // initialize the object inspectors
+    initInternalObjectInspectors(oi);
+
+    return oi;
+  }
+
+  @Override
+  public byte[] serializeValue(Object object, StructField field) throws IOException {
+    // Explicit avro serialization not supported yet. Revert to default
+    return super.serializeValue(object, field);
+  }
+  
+  /**
+   * Initialize the instance for {@link AvroSchemaRetriever}
+   *
+   * @throws SerDeException
+   * */
+  private void initAvroSchemaRetriever(Class<?> avroSchemaRetrieverClass, Configuration conf,
+      Properties tbl) throws SerDeException {
+
+    try {
+      avroSchemaRetriever = (AvroSchemaRetriever) avroSchemaRetrieverClass.getDeclaredConstructor(
+          Configuration.class, Properties.class).newInstance(
+          conf, tbl);
+    } catch (NoSuchMethodException e) {
+      // the constructor wasn't defined in the implementation class. Flag error
+      throw new SerDeException("Constructor not defined in schema retriever class [" + avroSchemaRetrieverClass.getName() + "]", e);
+    } catch (Exception e) {
+      throw new SerDeException(e);
+    }
+  }
+
+  /**
+   * Initialize the internal object inspectors
+   * */
+  private void initInternalObjectInspectors(ObjectInspector oi) {
+    if (oi instanceof AvroLazyObjectInspector) {
+      initAvroObjectInspector(oi);
+    } else if (oi instanceof MapObjectInspector) {
+      // we found a map objectinspector. Grab the objectinspector for the value and initialize it
+      // aptly
+      ObjectInspector valueOI = ((MapObjectInspector) oi).getMapValueObjectInspector();
+
+      if (valueOI instanceof AvroLazyObjectInspector) {
+        initAvroObjectInspector(valueOI);
+      }
+    }
+  }
+
+  /**
+   * Recursively initialize the {@link AvroLazyObjectInspector} and all its nested ois
+   *
+   * @param oi ObjectInspector to be recursively initialized
+   * @param schema {@link Schema} to be initialized with
+   * @param schemaRetriever class to be used to retrieve schema
+   * */
+  private void initAvroObjectInspector(ObjectInspector oi) {
+    // Check for a list. If found, recursively init its members
+    if (oi instanceof ListObjectInspector) {
+      ListObjectInspector loi = (ListObjectInspector) oi;
+
+      initAvroObjectInspector(loi.getListElementObjectInspector());
+      return;
+    }
+
+    // Check for a nested message. If found, set the schema, else return.
+    if (!(oi instanceof AvroLazyObjectInspector)) {
+      return;
+    }
+
+    AvroLazyObjectInspector aoi = (AvroLazyObjectInspector) oi;
+
+    aoi.setSchemaRetriever(avroSchemaRetriever);
+    aoi.setReaderSchema(schema);
+
+    // call the method recursively over all the internal fields of the given avro
+    // objectinspector
+    for (StructField field : aoi.getAllStructFieldRefs()) {
+      initAvroObjectInspector(field.getFieldObjectInspector());
+    }
+  }
+}
\ No newline at end of file

Added: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java (added)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/DefaultHBaseValueFactory.java Tue Sep  9 15:18:29 2014
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.hbase.struct;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.hbase.HBaseSerDeParameters;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+/**
+ * Default implementation of the {@link HBaseValueFactory}
+ * */
+public class DefaultHBaseValueFactory implements HBaseValueFactory{
+
+  protected LazySimpleSerDe.SerDeParameters serdeParams;
+  protected HBaseSerDeParameters hbaseParams;
+  protected Properties properties;
+  protected Configuration conf;
+
+	@Override
+  public void init(HBaseSerDeParameters hbaseParams, Configuration conf, Properties properties)
+			throws SerDeException {
+    this.hbaseParams = hbaseParams;
+    this.serdeParams = hbaseParams.getSerdeParams();
+    this.properties = properties;
+    this.conf = conf;
+	}
+
+	@Override
+	public ObjectInspector createValueObjectInspector(TypeInfo type)
+			throws SerDeException {
+    return LazyFactory.createLazyObjectInspector(type, serdeParams.getSeparators(),
+        1, serdeParams.getNullSequence(), serdeParams.isEscaped(), serdeParams.getEscapeChar());
+	}
+
+	@Override
+	public byte[] serializeValue(Object object, StructField field)
+			throws IOException {
+    // TODO Add support for serialization of values here
+		return null;
+	}
+}
\ No newline at end of file

Added: hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java (added)
+++ hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/HBaseValueFactory.java Tue Sep  9 15:18:29 2014
@@ -0,0 +1,64 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.hbase.struct;
+
+import java.io.IOException;
+import java.util.Properties;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.hbase.HBaseSerDeParameters;
+import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.StructField;
+import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
+
+/**
+ * Provides capability to plugin custom implementations for querying of data stored in HBase.
+ * */
+public interface HBaseValueFactory {
+
+  /**
+   * Initialize factory with properties
+   * 
+   * @param hbaseParam the {@link HBaseParameters hbase parameters}
+   * @param conf the hadoop {@link Configuration configuration}
+   * @param properties the custom {@link Properties}
+   * @throws SerDeException if there was an issue initializing the factory
+   */
+  void init(HBaseSerDeParameters hbaseParam, Configuration conf, Properties properties)
+      throws SerDeException;
+
+  /**
+   * create custom object inspector for the value
+   * 
+   * @param type type information
+   * @throws SerDeException if there was an issue creating the {@link ObjectInspector object inspector}
+   */
+  ObjectInspector createValueObjectInspector(TypeInfo type) throws SerDeException;
+
+  /**
+   * Serialize the given hive object
+   * 
+   * @param object the object to be serialized
+   * @param field the {@link StructField}
+   * @return the serialized value
+   * @throws {@link IOException} if there was an issue serializing the value
+   */
+  byte[] serializeValue(Object object, StructField field) throws IOException;
+}
\ No newline at end of file

Added: hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java
URL: http://svn.apache.org/viewvc/hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java?rev=1623845&view=auto
==============================================================================
--- hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java (added)
+++ hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java Tue Sep  9 15:18:29 2014
@@ -0,0 +1,66 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.hbase;
+
+import java.util.Properties;
+
+import org.apache.avro.Schema;
+import org.apache.avro.reflect.ReflectData;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hive.serde2.avro.AvroSchemaRetriever;
+
+/**
+ * Mock implementation
+ * */
+public class HBaseTestAvroSchemaRetriever extends AvroSchemaRetriever {
+
+  private static final byte[] TEST_BYTE_ARRAY = Bytes.toBytes("test");
+
+  public HBaseTestAvroSchemaRetriever(Configuration conf, Properties tbl) {
+  }
+
+  @Override
+  public Schema retrieveWriterSchema(Object source) {
+    Class<?> clazz;
+    try {
+      clazz = Class.forName("org.apache.hadoop.hive.hbase.avro.Employee");
+    } catch (ClassNotFoundException e) {
+      throw new RuntimeException(e);
+    }
+
+    return ReflectData.get().getSchema(clazz);
+  }
+
+  @Override
+  public Schema retrieveReaderSchema(Object source) {
+    Class<?> clazz;
+    try {
+      clazz = Class.forName("org.apache.hadoop.hive.hbase.avro.Employee");
+    } catch (ClassNotFoundException e) {
+      throw new RuntimeException(e);
+    }
+
+    return ReflectData.get().getSchema(clazz);
+  }
+
+  @Override
+  public int getOffset() {
+    return TEST_BYTE_ARRAY.length;
+  }
+}
\ No newline at end of file