You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/09/10 23:41:17 UTC

svn commit: r1624140 [1/3] - in /hive/branches/cbo: ./ common/src/java/org/apache/hadoop/hive/conf/ hbase-handler/ hbase-handler/if/ hbase-handler/src/java/org/apache/hadoop/hive/hbase/ hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/ hbase-...

Author: gunther
Date: Wed Sep 10 21:41:16 2014
New Revision: 1624140

URL: http://svn.apache.org/r1624140
Log:
Merge latest trunk into cbo branch. (Gunther Hagleitner)

Added:
    hive/branches/cbo/hbase-handler/if/
      - copied from r1624134, hive/trunk/hbase-handler/if/
    hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java
      - copied unchanged from r1624134, hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeHelper.java
    hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/
      - copied from r1624134, hive/trunk/hbase-handler/src/java/org/apache/hadoop/hive/hbase/struct/
    hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java
      - copied unchanged from r1624134, hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/HBaseTestAvroSchemaRetriever.java
    hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/
      - copied from r1624134, hive/trunk/hbase-handler/src/test/org/apache/hadoop/hive/hbase/avro/
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
      - copied unchanged from r1624134, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnSetInfo.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java
      - copied unchanged from r1624134, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupKeyHelper.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java
      - copied unchanged from r1624134, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AbstractVectorDesc.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java
      - copied unchanged from r1624134, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorDesc.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java
      - copied unchanged from r1624134, hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/VectorGroupByDesc.java
    hive/branches/cbo/ql/src/test/queries/clientpositive/orc_ppd_boolean.q
      - copied unchanged from r1624134, hive/trunk/ql/src/test/queries/clientpositive/orc_ppd_boolean.q
    hive/branches/cbo/ql/src/test/queries/clientpositive/sort_merge_join_desc_8.q
      - copied unchanged from r1624134, hive/trunk/ql/src/test/queries/clientpositive/sort_merge_join_desc_8.q
    hive/branches/cbo/ql/src/test/results/clientpositive/orc_ppd_boolean.q.out
      - copied unchanged from r1624134, hive/trunk/ql/src/test/results/clientpositive/orc_ppd_boolean.q.out
    hive/branches/cbo/ql/src/test/results/clientpositive/sort_merge_join_desc_8.q.out
      - copied unchanged from r1624134, hive/trunk/ql/src/test/results/clientpositive/sort_merge_join_desc_8.q.out
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
      - copied unchanged from r1624134, hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroLazyObjectInspector.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java
      - copied unchanged from r1624134, hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorException.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java
      - copied unchanged from r1624134, hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSchemaRetriever.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java
      - copied unchanged from r1624134, hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUnion.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java
      - copied unchanged from r1624134, hive/trunk/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryUnionObjectInspector.java
Removed:
    hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/service/server/TestHiveServer2Concurrency.java
Modified:
    hive/branches/cbo/   (props changed)
    hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hive/branches/cbo/hbase-handler/pom.xml   (contents, props changed)
    hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java
    hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java
    hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
    hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java
    hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
    hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
    hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java
    hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java
    hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
    hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
    hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/QueryProperties.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/AggregateDefinition.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapper.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorHashKeyWrapperBatch.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/sarg/SearchArgumentImpl.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/AbstractSMBJoinProc.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/ColumnStatsSemanticAnalyzer.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/GlobalLimitCtx.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/plan/GroupByDesc.java
    hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java
    hive/branches/cbo/ql/src/test/org/apache/hadoop/hive/ql/optimizer/physical/TestVectorizer.java
    hive/branches/cbo/ql/src/test/queries/clientpositive/dynpart_sort_opt_vectorization.q
    hive/branches/cbo/ql/src/test/queries/clientpositive/input_lazyserde.q
    hive/branches/cbo/ql/src/test/results/clientpositive/dynpart_sort_opt_vectorization.q.out
    hive/branches/cbo/ql/src/test/results/clientpositive/input_lazyserde.q.out
    hive/branches/cbo/ql/src/test/results/clientpositive/tez/dynpart_sort_opt_vectorization.q.out
    hive/branches/cbo/ql/src/test/results/clientpositive/tez/tez_join_hash.q.out
    hive/branches/cbo/ql/src/test/results/clientpositive/tez/vector_left_outer_join.q.out
    hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_nested_mapjoin.q.out
    hive/branches/cbo/ql/src/test/results/clientpositive/tez/vectorized_ptf.q.out
    hive/branches/cbo/serde/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/serde/serdeConstants.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/ql/io/sarg/PredicateLeaf.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroGenericRecordWritable.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroObjectInspectorGenerator.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/avro/AvroSerdeUtils.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyFactory.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyStruct.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUnion.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazyObjectInspectorFactory.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazy/objectinspector/LazySimpleStructObjectInspector.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryFactory.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinarySerDe.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/LazyBinaryUtils.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/lazybinary/objectinspector/LazyBinaryObjectInspectorFactory.java
    hive/branches/cbo/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ObjectInspectorFactory.java

Propchange: hive/branches/cbo/
------------------------------------------------------------------------------
  Merged /hive/trunk:r1623597-1624134

Modified: hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hive/branches/cbo/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed Sep 10 21:41:16 2014
@@ -103,7 +103,6 @@ public class HiveConf extends Configurat
    * be recreated so that the change will take effect.
    */
   public static final HiveConf.ConfVars[] metaVars = {
-      HiveConf.ConfVars.METASTOREDIRECTORY,
       HiveConf.ConfVars.METASTOREWAREHOUSE,
       HiveConf.ConfVars.METASTOREURIS,
       HiveConf.ConfVars.METASTORETHRIFTCONNECTIONRETRIES,
@@ -346,9 +345,7 @@ public class HiveConf extends Configurat
     MAPREDSETUPCLEANUPNEEDED(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDSETUPCLEANUPNEEDED"), false, "", true),
     MAPREDTASKCLEANUPNEEDED(ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDTASKCLEANUPNEEDED"), false, "", true),
 
-    // Metastore stuff. Be sure to update HiveConf.metaVars when you add
-    // something here!
-    METASTOREDIRECTORY("hive.metastore.metadb.dir", "", ""),
+    // Metastore stuff. Be sure to update HiveConf.metaVars when you add something here!
     METASTOREWAREHOUSE("hive.metastore.warehouse.dir", "/user/hive/warehouse",
         "location of default database for the warehouse"),
     METASTOREURIS("hive.metastore.uris", "",
@@ -1716,6 +1713,9 @@ public class HiveConf extends Configurat
     HIVE_VECTORIZATION_ENABLED("hive.vectorized.execution.enabled", false,
         "This flag should be set to true to enable vectorized mode of query execution.\n" +
         "The default value is false."),
+    HIVE_VECTORIZATION_REDUCE_ENABLED("hive.vectorized.execution.reduce.enabled", true,
+            "This flag should be set to true to enable vectorized mode of the reduce-side of query execution.\n" +
+            "The default value is true."),
     HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL("hive.vectorized.groupby.checkinterval", 100000,
         "Number of entries added to the group by aggregation hash before a recomputation of average entry size is performed."),
     HIVE_VECTORIZATION_GROUPBY_MAXENTRIES("hive.vectorized.groupby.maxentries", 1000000,

Modified: hive/branches/cbo/hbase-handler/pom.xml
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/pom.xml?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/pom.xml (original)
+++ hive/branches/cbo/hbase-handler/pom.xml Wed Sep 10 21:41:16 2014
@@ -57,6 +57,11 @@
       <version>${junit.version}</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>org.apache.avro</groupId>
+      <artifactId>avro</artifactId>
+      <version>1.7.6</version>
+	</dependency>
   </dependencies>
 
   <profiles>
@@ -101,6 +106,12 @@
           <version>${hbase.hadoop1.version}</version>
         </dependency>
         <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-test</artifactId>
+          <version>${hadoop-20S.version}</version>
+          <scope>test</scope>
+        </dependency>
+        <dependency>
           <groupId>org.apache.hbase</groupId>
           <artifactId>hbase-common</artifactId>
           <version>${hbase.hadoop1.version}</version>
@@ -134,11 +145,25 @@
         </dependency>
         <dependency>
           <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-common</artifactId>
+          <version>${hadoop-23.version}</version>
+          <classifier>tests</classifier>
+          <scope>test</scope>
+        </dependency>
+        <dependency>
+          <groupId>org.apache.hadoop</groupId>
           <artifactId>hadoop-mapreduce-client-core</artifactId>
           <version>${hadoop-23.version}</version>
           <optional>true</optional>
         </dependency>
         <dependency>
+          <groupId>org.apache.hadoop</groupId>
+          <artifactId>hadoop-hdfs</artifactId>
+          <version>${hadoop-23.version}</version>
+          <classifier>tests</classifier>
+          <scope>test</scope>
+        </dependency>
+        <dependency>
           <groupId>org.apache.hbase</groupId>
           <artifactId>hbase-hadoop2-compat</artifactId>
           <version>${hbase.hadoop2.version}</version>
@@ -190,6 +215,12 @@
           <type>test-jar</type>
           <scope>test</scope>
         </dependency>
+        <dependency>
+          <groupId>com.sun.jersey</groupId>
+          <artifactId>jersey-servlet</artifactId>
+          <version>${jersey.version}</version>
+          <scope>test</scope>
+       </dependency>
       </dependencies>
     </profile>
   </profiles>
@@ -209,6 +240,42 @@
           </execution>
         </executions>
       </plugin>
+      <plugin>
+        <groupId>org.apache.avro</groupId>
+        <artifactId>avro-maven-plugin</artifactId>
+        <version>1.7.6</version>
+        <executions>
+           <execution>
+               <phase>generate-test-sources</phase>
+               <goals>
+                  <goal>protocol</goal>
+               </goals>
+               <configuration>
+                  <testSourceDirectory>${project.basedir}/if/test</testSourceDirectory>
+                  <testOutputDirectory>${project.basedir}/src/test</testOutputDirectory>
+               </configuration>
+           </execution>
+        </executions>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <version>1.7</version>
+        <executions>
+           <execution>
+               <id>add-test-sources</id>
+               <phase>generate-test-sources</phase>
+               <goals>
+                  <goal>add-test-source</goal>
+               </goals>
+               <configuration>
+                  <sources>
+                     <source>${project.basedir}/src/gen/avro/gen-java</source>
+                  </sources>
+               </configuration>
+           </execution>
+        </executions>
+      </plugin>
     </plugins>
   </build>
 

Propchange: hive/branches/cbo/hbase-handler/pom.xml
------------------------------------------------------------------------------
  Merged /hive/trunk/hbase-handler/pom.xml:r1623597-1624134

Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/ColumnMappings.java Wed Sep 10 21:41:16 2014
@@ -23,16 +23,21 @@
 
 package org.apache.hadoop.hive.hbase;
 
-import com.google.common.collect.Iterators;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Properties;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
+import com.google.common.collect.Iterators;
 
 public class ColumnMappings implements Iterable<ColumnMappings.ColumnMapping> {
 
@@ -53,24 +58,41 @@ public class ColumnMappings implements I
     return columnsMapping.length;
   }
 
-  String toTypesString() {
+  String toNamesString(Properties tbl, String autogenerate) {
+    if (autogenerate != null && autogenerate.equals("true")) {
+      StringBuilder sb = new StringBuilder();
+      HBaseSerDeHelper.generateColumns(tbl, Arrays.asList(columnsMapping), sb);
+      return sb.toString();
+    }
+
+    return StringUtils.EMPTY; // return empty string
+  }
+
+  String toTypesString(Properties tbl, Configuration conf, String autogenerate)
+      throws SerDeException {
     StringBuilder sb = new StringBuilder();
-    for (ColumnMapping colMap : columnsMapping) {
-      if (sb.length() > 0) {
-        sb.append(":");
-      }
-      if (colMap.hbaseRowKey) {
-        // the row key column becomes a STRING
-        sb.append(serdeConstants.STRING_TYPE_NAME);
-      } else if (colMap.qualifierName == null) {
-        // a column family become a MAP
-        sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
-            + serdeConstants.STRING_TYPE_NAME + ">");
-      } else {
-        // an individual column becomes a STRING
-        sb.append(serdeConstants.STRING_TYPE_NAME);
+
+    if (autogenerate != null && autogenerate.equals("true")) {
+      HBaseSerDeHelper.generateColumnTypes(tbl, Arrays.asList(columnsMapping), sb, conf);
+    } else {
+      for (ColumnMapping colMap : columnsMapping) {
+        if (sb.length() > 0) {
+          sb.append(":");
+        }
+        if (colMap.hbaseRowKey) {
+          // the row key column becomes a STRING
+          sb.append(serdeConstants.STRING_TYPE_NAME);
+        } else if (colMap.qualifierName == null) {
+          // a column family become a MAP
+          sb.append(serdeConstants.MAP_TYPE_NAME + "<" + serdeConstants.STRING_TYPE_NAME + ","
+              + serdeConstants.STRING_TYPE_NAME + ">");
+        } else {
+          // an individual column becomes a STRING
+          sb.append(serdeConstants.STRING_TYPE_NAME);
+        }
       }
     }
+
     return sb.toString();
   }
 

Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseCompositeKey.java Wed Sep 10 21:41:16 2014
@@ -19,7 +19,9 @@
 package org.apache.hadoop.hive.hbase;
 
 import java.util.ArrayList;
+import java.util.Collections;
 import java.util.List;
+import java.util.Map;
 
 import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
 import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
@@ -94,4 +96,14 @@ public class HBaseCompositeKey extends L
 
     return lazyObject;
   }
+
+  /**
+   * Return the different parts of the key. By default, this returns an empty map. Consumers can
+   * choose to override this to provide their own names and types of parts of the key.
+   *
+   * @return map of parts name to their type
+   * */
+  public Map<String, String> getParts() {
+    return Collections.emptyMap();
+  }
 }

Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseLazyObjectFactory.java Wed Sep 10 21:41:16 2014
@@ -18,32 +18,31 @@
 
 package org.apache.hadoop.hive.hbase;
 
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
 import org.apache.hadoop.hive.serde2.SerDeException;
-import org.apache.hadoop.hive.serde2.lazy.LazyFactory;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
 import org.apache.hadoop.hive.serde2.lazy.objectinspector.LazyObjectInspectorFactory;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
 
-import java.util.ArrayList;
-import java.util.List;
-
 // Does same thing with LazyFactory#createLazyObjectInspector except that this replaces
 // original keyOI with OI which is create by HBaseKeyFactory provided by serde property for hbase
 public class HBaseLazyObjectFactory {
 
   public static ObjectInspector createLazyHBaseStructInspector(
-      SerDeParameters serdeParams, int index, HBaseKeyFactory factory) throws SerDeException {
+      SerDeParameters serdeParams, int index, HBaseKeyFactory keyFactory, List<HBaseValueFactory> valueFactories) throws SerDeException {
     List<TypeInfo> columnTypes = serdeParams.getColumnTypes();
     ArrayList<ObjectInspector> columnObjectInspectors = new ArrayList<ObjectInspector>(
         columnTypes.size());
     for (int i = 0; i < columnTypes.size(); i++) {
       if (i == index) {
-        columnObjectInspectors.add(factory.createKeyObjectInspector(columnTypes.get(i)));
+        columnObjectInspectors.add(keyFactory.createKeyObjectInspector(columnTypes.get(i)));
       } else {
-        columnObjectInspectors.add(LazyFactory.createLazyObjectInspector(
-            columnTypes.get(i), serdeParams.getSeparators(), 1, serdeParams.getNullSequence(),
-            serdeParams.isEscaped(), serdeParams.getEscapeChar()));
+        columnObjectInspectors.add(valueFactories.get(i).createValueObjectInspector(
+            columnTypes.get(i)));
       }
     }
     return LazyObjectInspectorFactory.getLazySimpleStructObjectInspector(
@@ -51,4 +50,4 @@ public class HBaseLazyObjectFactory {
         serdeParams.getNullSequence(), serdeParams.isLastColumnTakesRest(),
         serdeParams.isEscaped(), serdeParams.getEscapeChar());
   }
-}
+}
\ No newline at end of file

Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseRowSerializer.java Wed Sep 10 21:41:16 2014
@@ -18,6 +18,10 @@
 
 package org.apache.hadoop.hive.hbase;
 
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
 import org.apache.hadoop.hive.serde2.ByteStream;
@@ -34,10 +38,6 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
 import org.apache.hadoop.io.Writable;
 
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
 public class HBaseRowSerializer {
 
   private final HBaseKeyFactory keyFactory;
@@ -279,6 +279,10 @@ public class HBaseRowSerializer {
           }
         }
         return true;
+       case UNION: {
+        // union type currently not totally supported. See HIVE-2390
+        return false;
+       }
       default:
         throw new RuntimeException("Unknown category type: " + objInspector.getCategory());
     }

Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDe.java Wed Sep 10 21:41:16 2014
@@ -18,6 +18,10 @@
 
 package org.apache.hadoop.hive.hbase;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Properties;
+
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -25,6 +29,7 @@ import org.apache.hadoop.hbase.util.Byte
 import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.serde2.AbstractSerDe;
+import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeStats;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
@@ -33,10 +38,6 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapred.JobConf;
 
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
 /**
  * HBaseSerDe can be used to serialize object into an HBase table and
  * deserialize objects from an HBase table.
@@ -50,15 +51,21 @@ public class HBaseSerDe extends Abstract
   public static final String HBASE_KEY_COL = ":key";
   public static final String HBASE_PUT_TIMESTAMP = "hbase.put.timestamp";
   public static final String HBASE_COMPOSITE_KEY_CLASS = "hbase.composite.key.class";
+  public static final String HBASE_COMPOSITE_KEY_TYPES = "hbase.composite.key.types";
   public static final String HBASE_COMPOSITE_KEY_FACTORY = "hbase.composite.key.factory";
   public static final String HBASE_SCAN_CACHE = "hbase.scan.cache";
   public static final String HBASE_SCAN_CACHEBLOCKS = "hbase.scan.cacheblock";
   public static final String HBASE_SCAN_BATCH = "hbase.scan.batch";
+  public static final String HBASE_AUTOGENERATE_STRUCT = "hbase.struct.autogenerate";
   /**
-   *  Determines whether a regex matching should be done on the columns or not. Defaults to true.
-   *  <strong>WARNING: Note that currently this only supports the suffix wildcard .*</strong>
+   * Determines whether a regex matching should be done on the columns or not. Defaults to true.
+   * <strong>WARNING: Note that currently this only supports the suffix wildcard .*</strong>
    */
   public static final String HBASE_COLUMNS_REGEX_MATCHING = "hbase.columns.mapping.regex.matching";
+  /**
+   * Defines the type for a column.
+   **/
+  public static final String SERIALIZATION_TYPE = "serialization.type";
 
   private ObjectInspector cachedObjectInspector;
   private LazyHBaseRow cachedHBaseRow;
@@ -83,8 +90,11 @@ public class HBaseSerDe extends Abstract
       throws SerDeException {
     serdeParams = new HBaseSerDeParameters(conf, tbl, getClass().getName());
 
-    cachedObjectInspector = HBaseLazyObjectFactory.createLazyHBaseStructInspector(
-        serdeParams.getSerdeParams(), serdeParams.getKeyIndex(), serdeParams.getKeyFactory());
+    cachedObjectInspector =
+        HBaseLazyObjectFactory
+            .createLazyHBaseStructInspector(serdeParams.getSerdeParams(),
+                serdeParams.getKeyIndex(), serdeParams.getKeyFactory(),
+                serdeParams.getValueFactories());
 
     cachedHBaseRow = new LazyHBaseRow(
         (LazySimpleStructObjectInspector) cachedObjectInspector,

Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/HBaseSerDeParameters.java Wed Sep 10 21:41:16 2014
@@ -18,13 +18,20 @@
 
 package org.apache.hadoop.hive.hbase;
 
+import java.util.ArrayList;
 import java.util.List;
 import java.util.Properties;
 
+import org.apache.avro.Schema;
+import org.apache.avro.reflect.ReflectData;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
+import org.apache.hadoop.hive.hbase.struct.AvroHBaseValueFactory;
+import org.apache.hadoop.hive.hbase.struct.DefaultHBaseValueFactory;
+import org.apache.hadoop.hive.hbase.struct.HBaseValueFactory;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.SerDeException;
+import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe.SerDeParameters;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -37,10 +44,12 @@ import org.apache.hadoop.util.Reflection
  */
 public class HBaseSerDeParameters {
 
+  public static final String AVRO_SERIALIZATION_TYPE = "avro";
+  public static final String STRUCT_SERIALIZATION_TYPE = "struct";
+
   private final SerDeParameters serdeParams;
 
   private final Configuration job;
-  private final Properties tbl;
 
   private final String columnMappingString;
   private final ColumnMappings columnMappings;
@@ -48,57 +57,50 @@ public class HBaseSerDeParameters {
 
   private final long putTimestamp;
   private final HBaseKeyFactory keyFactory;
+  private final List<HBaseValueFactory> valueFactories;
 
   HBaseSerDeParameters(Configuration job, Properties tbl, String serdeName) throws SerDeException {
     this.job = job;
-    this.tbl = tbl;
-    this.serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName);
-    this.putTimestamp = Long.valueOf(tbl.getProperty(HBaseSerDe.HBASE_PUT_TIMESTAMP, "-1"));
 
     // Read configuration parameters
     columnMappingString = tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING);
-    doColumnRegexMatching = Boolean.valueOf(tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, "true"));
+    doColumnRegexMatching =
+        Boolean.valueOf(tbl.getProperty(HBaseSerDe.HBASE_COLUMNS_REGEX_MATCHING, "true"));
     // Parse and initialize the HBase columns mapping
     columnMappings = HBaseSerDe.parseColumnsMapping(columnMappingString, doColumnRegexMatching);
-    columnMappings.setHiveColumnDescription(serdeName, serdeParams.getColumnNames(), serdeParams.getColumnTypes());
-
-    // Precondition: make sure this is done after the rest of the SerDe initialization is done.
-    String hbaseTableStorageType = tbl.getProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE);
-    columnMappings.parseColumnStorageTypes(hbaseTableStorageType);
 
     // Build the type property string if not supplied
     String columnTypeProperty = tbl.getProperty(serdeConstants.LIST_COLUMN_TYPES);
-    if (columnTypeProperty == null) {
-      tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES, columnMappings.toTypesString());
-    }
+    String autogenerate = tbl.getProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT);
 
-    this.keyFactory = initKeyFactory(job, tbl);
-  }
+    if (columnTypeProperty == null || columnTypeProperty.isEmpty()) {
+      String columnNameProperty = tbl.getProperty(serdeConstants.LIST_COLUMNS);
+      if (columnNameProperty == null || columnNameProperty.isEmpty()) {
+        if (autogenerate == null || autogenerate.isEmpty()) {
+          throw new IllegalArgumentException("Either the columns must be specified or the "
+              + HBaseSerDe.HBASE_AUTOGENERATE_STRUCT + " property must be set to true.");
+        }
 
-  private HBaseKeyFactory initKeyFactory(Configuration conf, Properties tbl) throws SerDeException {
-    try {
-      HBaseKeyFactory keyFactory = createKeyFactory(conf, tbl);
-      if (keyFactory != null) {
-        keyFactory.init(this, tbl);
+        tbl.setProperty(serdeConstants.LIST_COLUMNS,
+            columnMappings.toNamesString(tbl, autogenerate));
       }
-      return keyFactory;
-    } catch (Exception e) {
-      throw new SerDeException(e);
-    }
-  }
 
-  private static HBaseKeyFactory createKeyFactory(Configuration job, Properties tbl) throws Exception {
-    String factoryClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_FACTORY);
-    if (factoryClassName != null) {
-      Class<?> factoryClazz = Class.forName(factoryClassName);
-      return (HBaseKeyFactory) ReflectionUtils.newInstance(factoryClazz, job);
+      tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
+          columnMappings.toTypesString(tbl, job, autogenerate));
     }
-    String keyClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS);
-    if (keyClassName != null) {
-      Class<?> keyClass = Class.forName(keyClassName);
-      return new CompositeHBaseKeyFactory(keyClass);
-    }
-    return new DefaultHBaseKeyFactory();
+
+    this.serdeParams = LazySimpleSerDe.initSerdeParams(job, tbl, serdeName);
+    this.putTimestamp = Long.valueOf(tbl.getProperty(HBaseSerDe.HBASE_PUT_TIMESTAMP, "-1"));
+
+    columnMappings.setHiveColumnDescription(serdeName, serdeParams.getColumnNames(),
+        serdeParams.getColumnTypes());
+
+    // Precondition: make sure this is done after the rest of the SerDe initialization is done.
+    String hbaseTableStorageType = tbl.getProperty(HBaseSerDe.HBASE_TABLE_DEFAULT_STORAGE_TYPE);
+    columnMappings.parseColumnStorageTypes(hbaseTableStorageType);
+
+    this.keyFactory = initKeyFactory(job, tbl);
+    this.valueFactories = initValueFactories(job, tbl);
   }
 
   public List<String> getColumnNames() {
@@ -133,6 +135,10 @@ public class HBaseSerDeParameters {
     return keyFactory;
   }
 
+  public List<HBaseValueFactory> getValueFactories() {
+    return valueFactories;
+  }
+
   public Configuration getBaseConfiguration() {
     return job;
   }
@@ -151,4 +157,190 @@ public class HBaseSerDeParameters {
   public String toString() {
     return "[" + columnMappingString + ":" + getColumnNames() + ":" + getColumnTypes() + "]";
   }
-}
+
+  private HBaseKeyFactory initKeyFactory(Configuration conf, Properties tbl) throws SerDeException {
+    try {
+      HBaseKeyFactory keyFactory = createKeyFactory(conf, tbl);
+      if (keyFactory != null) {
+        keyFactory.init(this, tbl);
+      }
+      return keyFactory;
+    } catch (Exception e) {
+      throw new SerDeException(e);
+    }
+  }
+
+  private static HBaseKeyFactory createKeyFactory(Configuration job, Properties tbl)
+      throws Exception {
+    String factoryClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_FACTORY);
+    if (factoryClassName != null) {
+      Class<?> factoryClazz = Class.forName(factoryClassName);
+      return (HBaseKeyFactory) ReflectionUtils.newInstance(factoryClazz, job);
+    }
+    String keyClassName = tbl.getProperty(HBaseSerDe.HBASE_COMPOSITE_KEY_CLASS);
+    if (keyClassName != null) {
+      Class<?> keyClass = Class.forName(keyClassName);
+      return new CompositeHBaseKeyFactory(keyClass);
+    }
+    return new DefaultHBaseKeyFactory();
+  }
+
+  private List<HBaseValueFactory> initValueFactories(Configuration conf, Properties tbl)
+      throws SerDeException {
+    List<HBaseValueFactory> valueFactories = createValueFactories(conf, tbl);
+
+    for (HBaseValueFactory valueFactory : valueFactories) {
+      valueFactory.init(this, conf, tbl);
+    }
+
+    return valueFactories;
+  }
+
+  private List<HBaseValueFactory> createValueFactories(Configuration conf, Properties tbl)
+      throws SerDeException {
+    List<HBaseValueFactory> valueFactories = new ArrayList<HBaseValueFactory>();
+
+    try {
+      for (int i = 0; i < columnMappings.size(); i++) {
+        String serType = getSerializationType(conf, tbl, columnMappings.getColumnsMapping()[i]);
+
+        if (serType != null && serType.equals(AVRO_SERIALIZATION_TYPE)) {
+          Schema schema = getSchema(conf, tbl, columnMappings.getColumnsMapping()[i]);
+          valueFactories.add(new AvroHBaseValueFactory(schema));
+        } else {
+          valueFactories.add(new DefaultHBaseValueFactory());
+        }
+      }
+    } catch (Exception e) {
+      throw new SerDeException(e);
+    }
+
+    return valueFactories;
+  }
+
+  /**
+   * Get the type for the given {@link ColumnMapping colMap}
+   * */
+  private String getSerializationType(Configuration conf, Properties tbl,
+      ColumnMapping colMap) throws Exception {
+    String serType = null;
+
+    if (colMap.qualifierName == null) {
+      // only a column family
+
+      if (colMap.qualifierPrefix != null) {
+        serType = tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+            + HBaseSerDe.SERIALIZATION_TYPE);
+      } else {
+        serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE);
+      }
+    } else if (!colMap.hbaseRowKey) {
+      // not an hbase row key. This should either be a prefix or an individual qualifier
+      String qualifierName = colMap.qualifierName;
+
+      if (colMap.qualifierName.endsWith("*")) {
+        qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1);
+      }
+
+      serType =
+          tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+              + HBaseSerDe.SERIALIZATION_TYPE);
+    }
+
+    return serType;
+  }
+
+  private Schema getSchema(Configuration conf, Properties tbl, ColumnMapping colMap)
+      throws Exception {
+    String serType = null;
+    String serClassName = null;
+    String schemaLiteral = null;
+    String schemaUrl = null;
+
+    if (colMap.qualifierName == null) {
+      // only a column family
+
+      if (colMap.qualifierPrefix != null) {
+        serType =
+            tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+                + HBaseSerDe.SERIALIZATION_TYPE);
+
+        serClassName =
+            tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+                + serdeConstants.SERIALIZATION_CLASS);
+
+        schemaLiteral =
+            tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+                + AvroSerdeUtils.SCHEMA_LITERAL);
+
+        schemaUrl =
+            tbl.getProperty(colMap.familyName + "." + colMap.qualifierPrefix + "."
+                + AvroSerdeUtils.SCHEMA_URL);
+      } else {
+        serType = tbl.getProperty(colMap.familyName + "." + HBaseSerDe.SERIALIZATION_TYPE);
+
+        serClassName =
+            tbl.getProperty(colMap.familyName + "." + serdeConstants.SERIALIZATION_CLASS);
+
+        schemaLiteral = tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_LITERAL);
+
+        schemaUrl = tbl.getProperty(colMap.familyName + "." + AvroSerdeUtils.SCHEMA_URL);
+      }
+    } else if (!colMap.hbaseRowKey) {
+      // not an hbase row key. This should either be a prefix or an individual qualifier
+      String qualifierName = colMap.qualifierName;
+
+      if (colMap.qualifierName.endsWith("*")) {
+        qualifierName = colMap.qualifierName.substring(0, colMap.qualifierName.length() - 1);
+      }
+
+      serType =
+          tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+              + HBaseSerDe.SERIALIZATION_TYPE);
+
+      serClassName =
+          tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+              + serdeConstants.SERIALIZATION_CLASS);
+
+      schemaLiteral =
+          tbl.getProperty(colMap.familyName + "." + qualifierName + "."
+              + AvroSerdeUtils.SCHEMA_LITERAL);
+
+      schemaUrl =
+          tbl.getProperty(colMap.familyName + "." + qualifierName + "." + AvroSerdeUtils.SCHEMA_URL);
+    }
+
+    String avroSchemaRetClass = tbl.getProperty(AvroSerdeUtils.SCHEMA_RETRIEVER);
+
+    if (schemaLiteral == null && serClassName == null && schemaUrl == null
+        && avroSchemaRetClass == null) {
+      throw new IllegalArgumentException("serialization.type was set to [" + serType
+          + "] but neither " + AvroSerdeUtils.SCHEMA_LITERAL + ", " + AvroSerdeUtils.SCHEMA_URL
+          + ", serialization.class or " + AvroSerdeUtils.SCHEMA_RETRIEVER + " property was set");
+    }
+
+    Class<?> deserializerClass = null;
+
+    if (serClassName != null) {
+      deserializerClass = conf.getClassByName(serClassName);
+    }
+
+    Schema schema = null;
+
+    // only worry about getting schema if we are dealing with Avro
+    if (serType.equalsIgnoreCase(AVRO_SERIALIZATION_TYPE)) {
+      if (avroSchemaRetClass == null) {
+        // bother about generating a schema only if a schema retriever class wasn't provided
+        if (schemaLiteral != null) {
+          schema = Schema.parse(schemaLiteral);
+        } else if (schemaUrl != null) {
+          schema = HBaseSerDeHelper.getSchemaFromFS(schemaUrl, conf);
+        } else if (deserializerClass != null) {
+          schema = ReflectData.get().getSchema(deserializerClass);
+        }
+      }
+    }
+
+    return schema;
+  }
+}
\ No newline at end of file

Modified: hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java (original)
+++ hive/branches/cbo/hbase-handler/src/java/org/apache/hadoop/hive/hbase/LazyHBaseCellMap.java Wed Sep 10 21:41:16 2014
@@ -149,8 +149,16 @@ public class LazyHBaseCellMap extends La
       }
       if (keyI.equals(key)) {
         // Got a match, return the value
-        LazyObject<?> v = (LazyObject<?>) entry.getValue();
-        return v == null ? v : v.getObject();
+        Object _value = entry.getValue();
+
+        // If the given value is a type of LazyObject, then only try and convert it to that form.
+        // Else return it as it is.
+        if (_value instanceof LazyObject) {
+          LazyObject<?> v = (LazyObject<?>) entry.getValue();
+          return v == null ? v : v.getObject();
+        } else {
+          return _value;
+        }
       }
     }
 

Modified: hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java (original)
+++ hive/branches/cbo/hbase-handler/src/test/org/apache/hadoop/hive/hbase/TestHBaseSerDe.java Wed Sep 10 21:41:16 2014
@@ -22,23 +22,45 @@ import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Properties;
 
 import junit.framework.TestCase;
 
+import org.apache.avro.Schema;
+import org.apache.avro.file.DataFileWriter;
+import org.apache.avro.generic.GenericData;
+import org.apache.avro.generic.GenericDatumWriter;
+import org.apache.avro.generic.GenericRecord;
+import org.apache.avro.io.DatumWriter;
+import org.apache.avro.io.Encoder;
+import org.apache.avro.io.EncoderFactory;
+import org.apache.avro.specific.SpecificDatumWriter;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataOutputStream;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.client.Put;
 import org.apache.hadoop.hbase.client.Result;
 import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hive.hbase.avro.Address;
+import org.apache.hadoop.hive.hbase.avro.ContactInfo;
+import org.apache.hadoop.hive.hbase.avro.Employee;
+import org.apache.hadoop.hive.hbase.avro.Gender;
+import org.apache.hadoop.hive.hbase.avro.HomePhone;
+import org.apache.hadoop.hive.hbase.avro.OfficePhone;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
+import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils;
 import org.apache.hadoop.hive.serde2.io.ByteWritable;
 import org.apache.hadoop.hive.serde2.io.DoubleWritable;
 import org.apache.hadoop.hive.serde2.io.ShortWritable;
 import org.apache.hadoop.hive.serde2.lazy.LazyPrimitive;
+import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
 import org.apache.hadoop.hive.serde2.objectinspector.StructField;
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.io.BooleanWritable;
@@ -53,6 +75,66 @@ import org.apache.thrift.TException;
  */
 public class TestHBaseSerDe extends TestCase {
 
+  static final byte[] TEST_BYTE_ARRAY = Bytes.toBytes("test");
+
+  private static final String RECORD_SCHEMA = "{\n" +
+      "  \"namespace\": \"testing.test.mctesty\",\n" +
+      "  \"name\": \"oneRecord\",\n" +
+      "  \"type\": \"record\",\n" +
+      "  \"fields\": [\n" +
+      "    {\n" +
+      "      \"name\":\"aRecord\",\n" +
+      "      \"type\":{\"type\":\"record\",\n" +
+      "              \"name\":\"recordWithinARecord\",\n" +
+      "              \"fields\": [\n" +
+      "                 {\n" +
+      "                  \"name\":\"int1\",\n" +
+      "                  \"type\":\"int\"\n" +
+      "                },\n" +
+      "                {\n" +
+      "                  \"name\":\"boolean1\",\n" +
+      "                  \"type\":\"boolean\"\n" +
+      "                },\n" +
+      "                {\n" +
+      "                  \"name\":\"long1\",\n" +
+      "                  \"type\":\"long\"\n" +
+      "                }\n" +
+      "      ]}\n" +
+      "    }\n" +
+      "  ]\n" +
+      "}";
+
+  private static final String RECORD_SCHEMA_EVOLVED = "{\n" +
+      "  \"namespace\": \"testing.test.mctesty\",\n" +
+      "  \"name\": \"oneRecord\",\n" +
+      "  \"type\": \"record\",\n" +
+      "  \"fields\": [\n" +
+      "    {\n" +
+      "      \"name\":\"aRecord\",\n" +
+      "      \"type\":{\"type\":\"record\",\n" +
+      "              \"name\":\"recordWithinARecord\",\n" +
+      "              \"fields\": [\n" +
+      "                 {\n" +
+      "                  \"name\":\"int1\",\n" +
+      "                  \"type\":\"int\"\n" +
+      "                },\n" +
+      "                {\n" +
+      "                  \"name\":\"string1\",\n" +
+      "                  \"type\":\"string\", \"default\": \"test\"\n" +
+      "                },\n" +
+      "                {\n" +
+      "                  \"name\":\"boolean1\",\n" +
+      "                  \"type\":\"boolean\"\n" +
+      "                },\n" +
+      "                {\n" +
+      "                  \"name\":\"long1\",\n" +
+      "                  \"type\":\"long\"\n" +
+      "                }\n" +
+      "      ]}\n" +
+      "    }\n" +
+      "  ]\n" +
+      "}";
+
   /**
    * Test the default behavior of the Lazy family of objects and object inspectors.
    */
@@ -551,7 +633,7 @@ public class TestHBaseSerDe extends Test
         "key,valint,valbyte,valshort,vallong,valfloat,valdouble,valbool");
     tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
         "string:map<int,int>:map<tinyint,tinyint>:map<smallint,smallint>:map<bigint,bigint>:"
-        + "map<float,float>:map<double,double>:map<boolean,boolean>");
+            + "map<float,float>:map<double,double>:map<boolean,boolean>");
     tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
         ":key#-,cf-int:#b:b,cf-byte:#b:b,cf-short:#b:b,cf-long:#b:b,cf-float:#b:b,cf-double:#b:b," +
         "cf-bool:#b:b");
@@ -565,7 +647,7 @@ public class TestHBaseSerDe extends Test
         "key,valint,valbyte,valshort,vallong,valfloat,valdouble,valbool");
     tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
         "string:map<int,int>:map<tinyint,tinyint>:map<smallint,smallint>:map<bigint,bigint>:"
-        + "map<float,float>:map<double,double>:map<boolean,boolean>");
+            + "map<float,float>:map<double,double>:map<boolean,boolean>");
     tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
         ":key#-,cf-int:#-:-,cf-byte:#-:-,cf-short:#-:-,cf-long:#-:-,cf-float:#-:-,cf-double:#-:-," +
         "cf-bool:#-:-");
@@ -636,7 +718,7 @@ public class TestHBaseSerDe extends Test
         "key,valbyte,valshort,valint,vallong,valfloat,valdouble,valstring,valbool");
     tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
         "string:map<tinyint,tinyint>:map<smallint,smallint>:map<int,int>:map<bigint,bigint>:"
-        + "map<float,float>:map<double,double>:map<string,string>:map<boolean,boolean>");
+            + "map<float,float>:map<double,double>:map<string,string>:map<boolean,boolean>");
     tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
         ":key#s,cf-byte:#-:s,cf-short:#s:-,cf-int:#s:s,cf-long:#-:-,cf-float:#s:-,cf-double:#-:s," +
         "cf-string:#s:s,cf-bool:#-:-");
@@ -650,7 +732,7 @@ public class TestHBaseSerDe extends Test
         "key,valbyte,valshort,valint,vallong,valfloat,valdouble,valstring,valbool");
     tbl.setProperty(serdeConstants.LIST_COLUMN_TYPES,
         "string:map<tinyint,tinyint>:map<smallint,smallint>:map<int,int>:map<bigint,bigint>:"
-        + "map<float,float>:map<double,double>:map<string,string>:map<boolean,boolean>");
+            + "map<float,float>:map<double,double>:map<string,string>:map<boolean,boolean>");
     tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING,
         ":key#s,cf-byte:#s:s,cf-short:#s:s,cf-int:#s:s,cf-long:#s:s,cf-float:#s:s,cf-double:#s:s," +
         "cf-string:#s:s,cf-bool:#s:s");
@@ -934,6 +1016,592 @@ public class TestHBaseSerDe extends Test
     assertEquals("Serialized put:", p.toString(), put.toString());
   }
 
+  public void testHBaseSerDeWithAvroSchemaInline() throws SerDeException, IOException {
+    byte[] cfa = "cola".getBytes();
+
+    byte[] qualAvro = "avro".getBytes();
+
+    byte[] rowKey = Bytes.toBytes("test-row1");
+
+    // Data
+    List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+    byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA);
+
+    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+    Result r = new Result(kvs);
+
+    Put p = new Put(rowKey);
+
+    // Post serialization, separators are automatically inserted between different fields in the
+    // struct. Currently there is not way to disable that. So the work around here is to pad the
+    // data with the separator bytes before creating a "Put" object
+    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+    Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")};
+
+    // Create, initialize, and test the SerDe
+    HBaseSerDe serDe = new HBaseSerDe();
+    Configuration conf = new Configuration();
+    Properties tbl = createPropertiesForHiveAvroSchemaInline();
+    serDe.initialize(conf, tbl);
+
+    deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+  }
+
+  private Properties createPropertiesForHiveAvroSchemaInline() {
+    Properties tbl = new Properties();
+    tbl.setProperty("cola.avro.serialization.type", "avro");
+    tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_LITERAL, RECORD_SCHEMA);
+    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
+    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+
+    return tbl;
+  }
+
+  public void testHBaseSerDeWithForwardEvolvedSchema() throws SerDeException, IOException {
+    byte[] cfa = "cola".getBytes();
+
+    byte[] qualAvro = "avro".getBytes();
+
+    byte[] rowKey = Bytes.toBytes("test-row1");
+
+    // Data
+    List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+    byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA);
+
+    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+    Result r = new Result(kvs);
+
+    Put p = new Put(rowKey);
+
+    // Post serialization, separators are automatically inserted between different fields in the
+    // struct. Currently there is not way to disable that. So the work around here is to pad the
+    // data with the separator bytes before creating a "Put" object
+    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+    Object[] expectedFieldsData = {new String("test-row1"),
+        new String("[[42, test, true, 42432234234]]")};
+
+    // Create, initialize, and test the SerDe
+    HBaseSerDe serDe = new HBaseSerDe();
+    Configuration conf = new Configuration();
+    Properties tbl = createPropertiesForHiveAvroForwardEvolvedSchema();
+    serDe.initialize(conf, tbl);
+
+    deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+  }
+
+  private Properties createPropertiesForHiveAvroForwardEvolvedSchema() {
+    Properties tbl = new Properties();
+    tbl.setProperty("cola.avro.serialization.type", "avro");
+    tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_LITERAL, RECORD_SCHEMA_EVOLVED);
+    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
+    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+
+    return tbl;
+  }
+
+  public void testHBaseSerDeWithBackwardEvolvedSchema() throws SerDeException, IOException {
+    byte[] cfa = "cola".getBytes();
+
+    byte[] qualAvro = "avro".getBytes();
+
+    byte[] rowKey = Bytes.toBytes("test-row1");
+
+    // Data
+    List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+    byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA_EVOLVED);
+
+    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+    Result r = new Result(kvs);
+
+    Put p = new Put(rowKey);
+
+    // Post serialization, separators are automatically inserted between different fields in the
+    // struct. Currently there is not way to disable that. So the work around here is to pad the
+    // data with the separator bytes before creating a "Put" object
+    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+    Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")};
+
+    // Create, initialize, and test the SerDe
+    HBaseSerDe serDe = new HBaseSerDe();
+    Configuration conf = new Configuration();
+    Properties tbl = createPropertiesForHiveAvroBackwardEvolvedSchema();
+    serDe.initialize(conf, tbl);
+
+    deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+  }
+
+  private Properties createPropertiesForHiveAvroBackwardEvolvedSchema() {
+    Properties tbl = new Properties();
+    tbl.setProperty("cola.avro.serialization.type", "avro");
+    tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_LITERAL, RECORD_SCHEMA);
+    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
+    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+
+    return tbl;
+  }
+
+  public void testHBaseSerDeWithAvroSerClass() throws SerDeException, IOException {
+    byte[] cfa = "cola".getBytes();
+
+    byte[] qualAvro = "avro".getBytes();
+
+    byte[] rowKey = Bytes.toBytes("test-row1");
+
+    // Data
+    List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+    byte[] avroData = getTestAvroBytesFromClass1(1);
+
+    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+    Result r = new Result(kvs);
+
+    Put p = new Put(rowKey);
+
+    // Post serialization, separators are automatically inserted between different fields in the
+    // struct. Currently there is not way to disable that. So the work around here is to pad the
+    // data with the separator bytes before creating a "Put" object
+    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+    Object[] expectedFieldsData = {
+        new String("test-row1"),
+        new String(
+            "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], "
+                + "[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], "
+                + "[999, 1234567890], [999, 1234455555]]]")};
+
+    // Create, initialize, and test the SerDe
+    HBaseSerDe serDe = new HBaseSerDe();
+    Configuration conf = new Configuration();
+    Properties tbl = createPropertiesForHiveAvroSerClass();
+    serDe.initialize(conf, tbl);
+
+    deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+  }
+
+  private Properties createPropertiesForHiveAvroSerClass() {
+    Properties tbl = new Properties();
+    tbl.setProperty("cola.avro.serialization.type", "avro");
+    tbl.setProperty("cola.avro." + serdeConstants.SERIALIZATION_CLASS,
+        "org.apache.hadoop.hive.hbase.avro.Employee");
+    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
+    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+
+    return tbl;
+  }
+
+  public void testHBaseSerDeWithAvroSchemaUrl() throws SerDeException, IOException {
+    byte[] cfa = "cola".getBytes();
+
+    byte[] qualAvro = "avro".getBytes();
+
+    byte[] rowKey = Bytes.toBytes("test-row1");
+
+    // Data
+    List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+    byte[] avroData = getTestAvroBytesFromSchema(RECORD_SCHEMA);
+
+    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+    Result r = new Result(kvs);
+
+    Put p = new Put(rowKey);
+
+    // Post serialization, separators are automatically inserted between different fields in the
+    // struct. Currently there is not way to disable that. So the work around here is to pad the
+    // data with the separator bytes before creating a "Put" object
+    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+    Object[] expectedFieldsData = {new String("test-row1"), new String("[[42, true, 42432234234]]")};
+
+    MiniDFSCluster miniDfs = null;
+
+    try {
+      // MiniDFSCluster litters files and folders all over the place.
+      miniDfs = new MiniDFSCluster(new Configuration(), 1, true, null);
+
+      miniDfs.getFileSystem().mkdirs(new Path("/path/to/schema"));
+      FSDataOutputStream out = miniDfs.getFileSystem().create(
+          new Path("/path/to/schema/schema.avsc"));
+      out.writeBytes(RECORD_SCHEMA);
+      out.close();
+      String onHDFS = miniDfs.getFileSystem().getUri() + "/path/to/schema/schema.avsc";
+
+      // Create, initialize, and test the SerDe
+      HBaseSerDe serDe = new HBaseSerDe();
+      Configuration conf = new Configuration();
+      Properties tbl = createPropertiesForHiveAvroSchemaUrl(onHDFS);
+      serDe.initialize(conf, tbl);
+
+      deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+    } finally {
+      // Teardown the cluster
+      if (miniDfs != null) {
+        miniDfs.shutdown();
+      }
+    }
+  }
+
+  private Properties createPropertiesForHiveAvroSchemaUrl(String schemaUrl) {
+    Properties tbl = new Properties();
+    tbl.setProperty("cola.avro.serialization.type", "avro");
+    tbl.setProperty("cola.avro." + AvroSerdeUtils.SCHEMA_URL, schemaUrl);
+    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
+    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+
+    return tbl;
+  }
+
+  public void testHBaseSerDeWithAvroExternalSchema() throws SerDeException, IOException {
+    byte[] cfa = "cola".getBytes();
+
+    byte[] qualAvro = "avro".getBytes();
+
+    byte[] rowKey = Bytes.toBytes("test-row1");
+
+    // Data
+    List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+    byte[] avroData = getTestAvroBytesFromClass2(1);
+
+    kvs.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+    Result r = new Result(kvs);
+
+    Put p = new Put(rowKey);
+
+    // Post serialization, separators are automatically inserted between different fields in the
+    // struct. Currently there is not way to disable that. So the work around here is to pad the
+    // data with the separator bytes before creating a "Put" object
+    p.add(new KeyValue(rowKey, cfa, qualAvro, avroData));
+
+    Object[] expectedFieldsData = {
+        new String("test-row1"),
+        new String(
+            "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], "
+                + "[999, 1234567890], [999, 1234455555]]]")};
+
+    // Create, initialize, and test the SerDe
+    HBaseSerDe serDe = new HBaseSerDe();
+    Configuration conf = new Configuration();
+
+    Properties tbl = createPropertiesForHiveAvroExternalSchema();
+    serDe.initialize(conf, tbl);
+
+    deserializeAndSerializeHiveAvro(serDe, r, p, expectedFieldsData);
+  }
+
+  private Properties createPropertiesForHiveAvroExternalSchema() {
+    Properties tbl = new Properties();
+    tbl.setProperty("cola.avro.serialization.type", "avro");
+    tbl.setProperty(AvroSerdeUtils.SCHEMA_RETRIEVER,
+        "org.apache.hadoop.hive.hbase.HBaseTestAvroSchemaRetriever");
+    tbl.setProperty("cola.avro." + serdeConstants.SERIALIZATION_CLASS,
+        "org.apache.hadoop.hive.hbase.avro.Employee");
+    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, ":key,cola:avro");
+    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+
+    return tbl;
+  }
+
+  public void testHBaseSerDeWithHiveMapToHBaseAvroColumnFamily() throws Exception {
+    byte[] cfa = "cola".getBytes();
+
+    byte[] qualAvroA = "prefixA_avro1".getBytes();
+    byte[] qualAvroB = "prefixB_avro2".getBytes();
+    byte[] qualAvroC = "prefixB_avro3".getBytes();
+
+    List<Object> qualifiers = new ArrayList<Object>();
+    qualifiers.add(new Text("prefixA_avro1"));
+    qualifiers.add(new Text("prefixB_avro2"));
+    qualifiers.add(new Text("prefixB_avro3"));
+
+    List<Object> expectedQualifiers = new ArrayList<Object>();
+    expectedQualifiers.add(new Text("prefixB_avro2"));
+    expectedQualifiers.add(new Text("prefixB_avro3"));
+
+    byte[] rowKey = Bytes.toBytes("test-row1");
+
+    // Data
+    List<KeyValue> kvs = new ArrayList<KeyValue>();
+
+    byte[] avroDataA = getTestAvroBytesFromSchema(RECORD_SCHEMA);
+    byte[] avroDataB = getTestAvroBytesFromClass1(1);
+    byte[] avroDataC = getTestAvroBytesFromClass1(2);
+
+    kvs.add(new KeyValue(rowKey, cfa, qualAvroA, avroDataA));
+    kvs.add(new KeyValue(rowKey, cfa, qualAvroB, avroDataB));
+    kvs.add(new KeyValue(rowKey, cfa, qualAvroC, avroDataC));
+
+    Result r = new Result(kvs);
+
+    Put p = new Put(rowKey);
+
+    // Post serialization, separators are automatically inserted between different fields in the
+    // struct. Currently there is not way to disable that. So the work around here is to pad the
+    // data with the separator bytes before creating a "Put" object
+    p.add(new KeyValue(rowKey, cfa, qualAvroB, Bytes.padTail(avroDataB, 11)));
+    p.add(new KeyValue(rowKey, cfa, qualAvroC, Bytes.padTail(avroDataC, 11)));
+
+    Object[] expectedFieldsData = {
+        new Text("test-row1"),
+        new String(
+            "[Avro Employee1, 11111, 25, FEMALE, [[[Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address1, Avro Second Address1, Avro City1, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], "
+                + "[999, 1234567890], [999, 1234455555]]]"),
+        new String(
+            "[Avro Employee2, 11111, 25, FEMALE, [[[Avro First Address2, Avro Second Address2, Avro City2, 123456, 0:[999, 1234567890], null, {testkey=testvalue}], [Avro First Address2, Avro Second Address2, Avro City2, 123456, 0:[999, 1234567890], null, {testkey=testvalue}]], "
+                + "[999, 1234567890], [999, 1234455555]]]")};
+
+    int[] expectedMapSize = new int[] {2};
+
+    // Create, initialize, and test the SerDe
+    HBaseSerDe serDe = new HBaseSerDe();
+    Configuration conf = new Configuration();
+    Properties tbl = createPropertiesForHiveAvroColumnFamilyMap();
+    serDe.initialize(conf, tbl);
+
+    Object notPresentKey = new Text("prefixA_avro1");
+
+    deserializeAndSerializeHiveStructColumnFamily(serDe, r, p, expectedFieldsData, expectedMapSize,
+        expectedQualifiers,
+        notPresentKey);
+  }
+
+  private Properties createPropertiesForHiveAvroColumnFamilyMap() {
+    Properties tbl = new Properties();
+    tbl.setProperty("cola.prefixB_.serialization.type", "avro");
+    tbl.setProperty("cola.prefixB_." + serdeConstants.SERIALIZATION_CLASS,
+        "org.apache.hadoop.hive.hbase.avro.Employee");
+    tbl.setProperty(HBaseSerDe.HBASE_COLUMNS_MAPPING, "cola:prefixB_.*");
+    tbl.setProperty(HBaseSerDe.HBASE_AUTOGENERATE_STRUCT, "true");
+    tbl.setProperty(LazySimpleSerDe.SERIALIZATION_EXTEND_NESTING_LEVELS, "true");
+
+    return tbl;
+  }
+
+  private void deserializeAndSerializeHiveAvro(HBaseSerDe serDe, Result r, Put p,
+      Object[] expectedFieldsData)
+      throws SerDeException, IOException {
+    StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
+
+    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
+
+    Object row = serDe.deserialize(new ResultWritable(r));
+
+    for (int j = 0; j < fieldRefs.size(); j++) {
+      Object fieldData = soi.getStructFieldData(row, fieldRefs.get(j));
+      assertNotNull(fieldData);
+      assertEquals(expectedFieldsData[j], fieldData.toString().trim());
+    }
+
+    // Now serialize
+    Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();
+
+    assertNotNull(put);
+    assertEquals(p.getFamilyCellMap(), put.getFamilyCellMap());
+  }
+
+  private void deserializeAndSerializeHiveStructColumnFamily(HBaseSerDe serDe, Result r, Put p,
+      Object[] expectedFieldsData,
+      int[] expectedMapSize, List<Object> expectedQualifiers, Object notPresentKey)
+      throws SerDeException, IOException {
+    StructObjectInspector soi = (StructObjectInspector) serDe.getObjectInspector();
+
+    List<? extends StructField> fieldRefs = soi.getAllStructFieldRefs();
+
+    Object row = serDe.deserialize(new ResultWritable(r));
+
+    int k = 0;
+
+    for (int i = 0; i < fieldRefs.size(); i++) {
+      Object fieldData = soi.getStructFieldData(row, fieldRefs.get(i));
+      assertNotNull(fieldData);
+
+      if (fieldData instanceof LazyPrimitive<?, ?>) {
+        assertEquals(expectedFieldsData[i], ((LazyPrimitive<?, ?>) fieldData).getWritableObject());
+      } else if (fieldData instanceof LazyHBaseCellMap) {
+
+        for (int j = 0; j < ((LazyHBaseCellMap) fieldData).getMapSize(); j++) {
+          assertEquals(expectedFieldsData[k + 1],
+              ((LazyHBaseCellMap) fieldData).getMapValueElement(expectedQualifiers.get(k))
+                  .toString().trim());
+          k++;
+        }
+
+        assertEquals(expectedMapSize[i - 1], ((LazyHBaseCellMap) fieldData).getMapSize());
+
+        // Make sure that the unwanted key is not present in the map
+        assertNull(((LazyHBaseCellMap) fieldData).getMapValueElement(notPresentKey));
+
+      } else {
+        fail("Error: field data not an instance of LazyPrimitive<?, ?> or LazyHBaseCellMap");
+      }
+    }
+
+    SerDeUtils.getJSONString(row, soi);
+
+    // Now serialize
+    Put put = ((PutWritable) serDe.serialize(row, soi)).getPut();
+
+    assertNotNull(put);
+  }
+
+  private byte[] getTestAvroBytesFromSchema(String schemaToUse) throws IOException {
+    Schema s = Schema.parse(schemaToUse);
+    GenericData.Record record = new GenericData.Record(s);
+    GenericData.Record innerRecord = new GenericData.Record(s.getField("aRecord").schema());
+    innerRecord.put("int1", 42);
+    innerRecord.put("boolean1", true);
+    innerRecord.put("long1", 42432234234l);
+
+    if (schemaToUse.equals(RECORD_SCHEMA_EVOLVED)) {
+      innerRecord.put("string1", "new value");
+    }
+
+    record.put("aRecord", innerRecord);
+
+    DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(s);
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+
+    DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<GenericRecord>(datumWriter);
+    dataFileWriter.create(s, out);
+    dataFileWriter.append(record);
+    dataFileWriter.close();
+
+    byte[] data = out.toByteArray();
+
+    out.close();
+    return data;
+  }
+
+  private byte[] getTestAvroBytesFromClass1(int i) throws IOException {
+    Employee employee = new Employee();
+
+    employee.setEmployeeName("Avro Employee" + i);
+    employee.setEmployeeID(11111L);
+    employee.setGender(Gender.FEMALE);
+    employee.setAge(25L);
+
+    Address address = new Address();
+
+    address.setAddress1("Avro First Address" + i);
+    address.setAddress2("Avro Second Address" + i);
+    address.setCity("Avro City" + i);
+    address.setZipcode(123456L);
+
+    Map<CharSequence, CharSequence> metadata = new HashMap<CharSequence, CharSequence>();
+
+    metadata.put("testkey", "testvalue");
+
+    address.setMetadata(metadata);
+
+    HomePhone hPhone = new HomePhone();
+
+    hPhone.setAreaCode(999L);
+    hPhone.setNumber(1234567890L);
+
+    OfficePhone oPhone = new OfficePhone();
+
+    oPhone.setAreaCode(999L);
+    oPhone.setNumber(1234455555L);
+
+    ContactInfo contact = new ContactInfo();
+
+    List<Address> addresses = new ArrayList<Address>();
+    address.setCounty(hPhone); // set value for the union type
+    addresses.add(address);
+    addresses.add(address);
+
+    contact.setAddress(addresses);
+
+    contact.setHomePhone(hPhone);
+    contact.setOfficePhone(oPhone);
+
+    employee.setContactInfo(contact);
+
+    DatumWriter<Employee> datumWriter = new SpecificDatumWriter<Employee>(Employee.class);
+    DataFileWriter<Employee> dataFileWriter = new DataFileWriter<Employee>(datumWriter);
+
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+
+    dataFileWriter.create(employee.getSchema(), out);
+    dataFileWriter.append(employee);
+    dataFileWriter.close();
+
+    return out.toByteArray();
+  }
+
+  private byte[] getTestAvroBytesFromClass2(int i) throws IOException {
+    Employee employee = new Employee();
+
+    employee.setEmployeeName("Avro Employee" + i);
+    employee.setEmployeeID(11111L);
+    employee.setGender(Gender.FEMALE);
+    employee.setAge(25L);
+
+    Address address = new Address();
+
+    address.setAddress1("Avro First Address" + i);
+    address.setAddress2("Avro Second Address" + i);
+    address.setCity("Avro City" + i);
+    address.setZipcode(123456L);
+
+    Map<CharSequence, CharSequence> metadata = new HashMap<CharSequence, CharSequence>();
+
+    metadata.put("testkey", "testvalue");
+
+    address.setMetadata(metadata);
+
+    HomePhone hPhone = new HomePhone();
+
+    hPhone.setAreaCode(999L);
+    hPhone.setNumber(1234567890L);
+
+    OfficePhone oPhone = new OfficePhone();
+
+    oPhone.setAreaCode(999L);
+    oPhone.setNumber(1234455555L);
+
+    ContactInfo contact = new ContactInfo();
+
+    List<Address> addresses = new ArrayList<Address>();
+    address.setCounty(hPhone); // set value for the union type
+    addresses.add(address);
+    addresses.add(address);
+
+    contact.setAddress(addresses);
+
+    contact.setHomePhone(hPhone);
+    contact.setOfficePhone(oPhone);
+
+    employee.setContactInfo(contact);
+
+    DatumWriter<Employee> employeeWriter = new SpecificDatumWriter<Employee>(Employee.class);
+
+    ByteArrayOutputStream out = new ByteArrayOutputStream();
+
+    Encoder encoder = EncoderFactory.get().binaryEncoder(out, null);
+
+    // write out a header for the payload
+    out.write(TEST_BYTE_ARRAY);
+
+    employeeWriter.write(employee, encoder);
+
+    encoder.flush();
+
+    return out.toByteArray();
+  }
+
   class TestStruct {
     String f1;
     String f2;

Modified: hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java (original)
+++ hive/branches/cbo/itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestJdbcWithMiniHS2.java Wed Sep 10 21:41:16 2014
@@ -46,6 +46,7 @@ import org.junit.Test;
 public class TestJdbcWithMiniHS2 {
   private static MiniHS2 miniHS2 = null;
   private static Path dataFilePath;
+  private static final String tmpDir = System.getProperty("test.tmp.dir");
 
   private Connection hs2Conn = null;
 
@@ -303,7 +304,7 @@ public class TestJdbcWithMiniHS2 {
    * @throws Exception
    */
   @Test
-  public void testScratchDirs() throws Exception {
+  public void testSessionScratchDirs() throws Exception {
     // Stop HiveServer2
     if (miniHS2.isStarted()) {
       miniHS2.stop();
@@ -314,7 +315,7 @@ public class TestJdbcWithMiniHS2 {
     // 1. Test with doAs=false
     conf.setBoolean("hive.server2.enable.doAs", false);
     // Set a custom prefix for hdfs scratch dir path
-    conf.set("hive.exec.scratchdir", "/tmp/hs2");
+    conf.set("hive.exec.scratchdir", tmpDir + "/hs2");
     // Set a scratch dir permission
     String fsPermissionStr = "700";
     conf.set("hive.scratch.dir.permission", fsPermissionStr);
@@ -326,19 +327,21 @@ public class TestJdbcWithMiniHS2 {
     hs2Conn = getConnection(miniHS2.getJdbcURL(), userName, "password");
     // FS
     FileSystem fs = miniHS2.getLocalFS();
+    FsPermission expectedFSPermission = new FsPermission(HiveConf.getVar(conf,
+        HiveConf.ConfVars.SCRATCHDIRPERMISSION));
 
     // Verify scratch dir paths and permission
     // HDFS scratch dir
     scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR) + "/" + userName);
-    verifyScratchDir(conf, fs, scratchDirPath, userName, false);
+    verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, false);
 
     // Local scratch dir
     scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.LOCALSCRATCHDIR));
-    verifyScratchDir(conf, fs, scratchDirPath, userName, true);
+    verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, true);
 
     // Downloaded resources dir
     scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR));
-    verifyScratchDir(conf, fs, scratchDirPath, userName, true);
+    verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, true);
 
     // 2. Test with doAs=true
     // Restart HiveServer2 with doAs=true
@@ -356,15 +359,15 @@ public class TestJdbcWithMiniHS2 {
     // Verify scratch dir paths and permission
     // HDFS scratch dir
     scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR) + "/" + userName);
-    verifyScratchDir(conf, fs, scratchDirPath, userName, false);
+    verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, false);
 
     // Local scratch dir
     scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.LOCALSCRATCHDIR));
-    verifyScratchDir(conf, fs, scratchDirPath, userName, true);
+    verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, true);
 
     // Downloaded resources dir
     scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR));
-    verifyScratchDir(conf, fs, scratchDirPath, userName, true);
+    verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, true);
 
     // Test for user "trinity"
     userName = "trinity";
@@ -373,22 +376,61 @@ public class TestJdbcWithMiniHS2 {
     // Verify scratch dir paths and permission
     // HDFS scratch dir
     scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR) + "/" + userName);
-    verifyScratchDir(conf, fs, scratchDirPath, userName, false);
+    verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, false);
 
     // Local scratch dir
     scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.LOCALSCRATCHDIR));
-    verifyScratchDir(conf, fs, scratchDirPath, userName, true);
+    verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, true);
 
     // Downloaded resources dir
     scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR));
-    verifyScratchDir(conf, fs, scratchDirPath, userName, true);
+    verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, true);
+  }
+
+  /**
+   * Tests the creation of the root hdfs scratch dir, which should be writable by all (777).
+   *
+   * @throws Exception
+   */
+  @Test
+  public void testRootScratchDir() throws Exception {
+    // Stop HiveServer2
+    if (miniHS2.isStarted()) {
+      miniHS2.stop();
+    }
+    HiveConf conf = new HiveConf();
+    String userName;
+    Path scratchDirPath;
+    conf.set("hive.exec.scratchdir", tmpDir + "/hs2");
+    // Start an instance of HiveServer2 which uses miniMR
+    miniHS2 = new MiniHS2(conf);
+    Map<String, String> confOverlay = new HashMap<String, String>();
+    miniHS2.start(confOverlay);
+    userName = System.getProperty("user.name");
+    hs2Conn = getConnection(miniHS2.getJdbcURL(), userName, "password");
+    // FS
+    FileSystem fs = miniHS2.getLocalFS();
+    FsPermission expectedFSPermission = new FsPermission("777");
+    // Verify scratch dir paths and permission
+    // HDFS scratch dir
+    scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR));
+    verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, false);
+    // Test with multi-level scratch dir path
+    // Stop HiveServer2
+    if (miniHS2.isStarted()) {
+      miniHS2.stop();
+    }
+    conf.set("hive.exec.scratchdir", tmpDir + "/level1/level2/level3");
+    miniHS2 = new MiniHS2(conf);
+    miniHS2.start(confOverlay);
+    hs2Conn = getConnection(miniHS2.getJdbcURL(), userName, "password");
+    scratchDirPath = new Path(HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIR));
+    verifyScratchDir(conf, fs, scratchDirPath, expectedFSPermission, userName, false);
   }
 
   private void verifyScratchDir(HiveConf conf, FileSystem fs, Path scratchDirPath,
-      String userName, boolean isLocal) throws Exception {
+      FsPermission expectedFSPermission, String userName, boolean isLocal) throws Exception {
     String dirType = isLocal ? "Local" : "DFS";
-    FsPermission expectedFSPermission = new FsPermission(HiveConf.getVar(conf,
-        HiveConf.ConfVars.SCRATCHDIRPERMISSION));
     assertTrue("The expected " + dirType + " scratch dir does not exist for the user: " +
         userName, fs.exists(scratchDirPath));
     if (fs.exists(scratchDirPath) && !isLocal) {

Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java?rev=1624140&r1=1624139&r2=1624140&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java Wed Sep 10 21:41:16 2014
@@ -210,14 +210,6 @@ public class HiveMetaStoreClient impleme
       } catch (Exception e) {
         MetaStoreUtils.logAndThrowMetaException(e);
       }
-    } else if (conf.getVar(HiveConf.ConfVars.METASTOREDIRECTORY) != null) {
-      metastoreUris = new URI[1];
-      try {
-        metastoreUris[0] = new URI(conf
-            .getVar(HiveConf.ConfVars.METASTOREDIRECTORY));
-      } catch (URISyntaxException e) {
-        MetaStoreUtils.logAndThrowMetaException(e);
-      }
     } else {
       LOG.error("NOT getting uris from conf");
       throw new MetaException("MetaStoreURIs not found in conf file");
@@ -1289,7 +1281,7 @@ public class HiveMetaStoreClient impleme
     InvalidInputException{
     return client.update_partition_column_statistics(statsObj);
   }
-  
+
   /** {@inheritDoc} */
   public boolean setPartitionColumnStatistics(SetPartitionsStatsRequest request)
     throws NoSuchObjectException, InvalidObjectException, MetaException, TException,