You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/08/10 03:33:55 UTC
svn commit: r1617040 [10/13] - in /hive/branches/spark: ./ ant/src/org/apache/hadoop/hive/ant/ beeline/ beeline/src/java/org/apache/hive/beeline/ common/src/java/org/apache/hadoop/hive/common/ common/src/java/org/apache/hadoop/hive/conf/ data/conf/ dat...

Modified: hive/branches/spark/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb (original)
+++ hive/branches/spark/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb Sun Aug 10 01:33:50 2014
@@ -1008,6 +1008,26 @@ class ColumnStatistics
   ::Thrift::Struct.generate_accessors self
 end
 
+class AggrStats
+  include ::Thrift::Struct, ::Thrift::Struct_Union
+  COLSTATS = 1
+  PARTSFOUND = 2
+
+  FIELDS = {
+    COLSTATS => {:type => ::Thrift::Types::LIST, :name => 'colStats', :element => {:type => ::Thrift::Types::STRUCT, :class => ::ColumnStatisticsObj}},
+    PARTSFOUND => {:type => ::Thrift::Types::I64, :name => 'partsFound'}
+  }
+
+  def struct_fields; FIELDS; end
+
+  def validate
+    raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field colStats is unset!') unless @colStats
+    raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field partsFound is unset!') unless @partsFound
+  end
+
+  ::Thrift::Struct.generate_accessors self
+end
+
 class Schema
   include ::Thrift::Struct, ::Thrift::Struct_Union
   FIELDSCHEMAS = 1

Modified: hive/branches/spark/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb (original)
+++ hive/branches/spark/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb Sun Aug 10 01:33:50 2014
@@ -1231,6 +1231,23 @@ module ThriftHiveMetastore
       raise ::Thrift::ApplicationException.new(::Thrift::ApplicationException::MISSING_RESULT, 'get_partitions_statistics_req failed: unknown result')
     end
 
+    def get_aggr_stats_for(request)
+      send_get_aggr_stats_for(request)
+      return recv_get_aggr_stats_for()
+    end
+
+    def send_get_aggr_stats_for(request)
+      send_message('get_aggr_stats_for', Get_aggr_stats_for_args, :request => request)
+    end
+
+    def recv_get_aggr_stats_for()
+      result = receive_message(Get_aggr_stats_for_result)
+      return result.success unless result.success.nil?
+      raise result.o1 unless result.o1.nil?
+      raise result.o2 unless result.o2.nil?
+      raise ::Thrift::ApplicationException.new(::Thrift::ApplicationException::MISSING_RESULT, 'get_aggr_stats_for failed: unknown result')
+    end
+
     def delete_partition_column_statistics(db_name, tbl_name, part_name, col_name)
       send_delete_partition_column_statistics(db_name, tbl_name, part_name, col_name)
       return recv_delete_partition_column_statistics()
@@ -2824,6 +2841,19 @@ module ThriftHiveMetastore
       write_result(result, oprot, 'get_partitions_statistics_req', seqid)
     end
 
+    def process_get_aggr_stats_for(seqid, iprot, oprot)
+      args = read_args(iprot, Get_aggr_stats_for_args)
+      result = Get_aggr_stats_for_result.new()
+      begin
+        result.success = @handler.get_aggr_stats_for(args.request)
+      rescue ::NoSuchObjectException => o1
+        result.o1 = o1
+      rescue ::MetaException => o2
+        result.o2 = o2
+      end
+      write_result(result, oprot, 'get_aggr_stats_for', seqid)
+    end
+
     def process_delete_partition_column_statistics(seqid, iprot, oprot)
       args = read_args(iprot, Delete_partition_column_statistics_args)
       result = Delete_partition_column_statistics_result.new()
@@ -6077,6 +6107,42 @@ module ThriftHiveMetastore
     ::Thrift::Struct.generate_accessors self
   end
 
+  class Get_aggr_stats_for_args
+    include ::Thrift::Struct, ::Thrift::Struct_Union
+    REQUEST = 1
+
+    FIELDS = {
+      REQUEST => {:type => ::Thrift::Types::STRUCT, :name => 'request', :class => ::PartitionsStatsRequest}
+    }
+
+    def struct_fields; FIELDS; end
+
+    def validate
+    end
+
+    ::Thrift::Struct.generate_accessors self
+  end
+
+  class Get_aggr_stats_for_result
+    include ::Thrift::Struct, ::Thrift::Struct_Union
+    SUCCESS = 0
+    O1 = 1
+    O2 = 2
+
+    FIELDS = {
+      SUCCESS => {:type => ::Thrift::Types::STRUCT, :name => 'success', :class => ::AggrStats},
+      O1 => {:type => ::Thrift::Types::STRUCT, :name => 'o1', :class => ::NoSuchObjectException},
+      O2 => {:type => ::Thrift::Types::STRUCT, :name => 'o2', :class => ::MetaException}
+    }
+
+    def struct_fields; FIELDS; end
+
+    def validate
+    end
+
+    ::Thrift::Struct.generate_accessors self
+  end
+
   class Delete_partition_column_statistics_args
     include ::Thrift::Struct, ::Thrift::Struct_Union
     DB_NAME = 1

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java Sun Aug 10 01:33:50 2014
@@ -29,6 +29,7 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.Database;
@@ -156,7 +157,7 @@ public class HiveAlterHandler implements
         destPath = new Path(newTblLoc);
         destFs = wh.getFs(destPath);
         // check that src and dest are on the same file system
-        if (! equalsFileSystem(srcFs, destFs)) {
+        if (!FileUtils.equalsFileSystem(srcFs, destFs)) {
           throw new InvalidOperationException("table new location " + destPath
               + " is on a different file system than the old location "
               + srcPath + ". This operation is not supported");
@@ -251,21 +252,6 @@ public class HiveAlterHandler implements
     }
   }
 
-  /**
-   * @param fs1
-   * @param fs2
-   * @return return true if both file system arguments point to same file system
-   */
-  private boolean equalsFileSystem(FileSystem fs1, FileSystem fs2) {
-    //When file system cache is disabled, you get different FileSystem objects
-    // for same file system, so '==' can't be used in such cases
-    //FileSystem api doesn't have a .equals() function implemented, so using
-    //the uri for comparison. FileSystem already uses uri+Configuration for
-    //equality in its CACHE .
-    //Once equality has been added in HDFS-4321, we should make use of it
-    return fs1.getUri().equals(fs2.getUri());
-  }
-
   public Partition alterPartition(final RawStore msdb, Warehouse wh, final String dbname,
       final String name, final List<String> part_vals, final Partition new_part)
       throws InvalidOperationException, InvalidObjectException, AlreadyExistsException,

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Sun Aug 10 01:33:50 2014
@@ -64,6 +64,7 @@ import org.apache.hadoop.hive.conf.HiveC
 import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
 import org.apache.hadoop.hive.metastore.api.AddPartitionsRequest;
 import org.apache.hadoop.hive.metastore.api.AddPartitionsResult;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -423,7 +424,7 @@ public class HiveMetaStore extends Thrif
 
       String partitionValidationRegex =
           hiveConf.getVar(HiveConf.ConfVars.METASTORE_PARTITION_NAME_WHITELIST_PATTERN);
-      if (partitionValidationRegex != null && partitionValidationRegex != "") {
+      if (partitionValidationRegex != null && !partitionValidationRegex.isEmpty()) {
         partitionValidationPattern = Pattern.compile(partitionValidationRegex);
       } else {
         partitionValidationPattern = null;
@@ -4008,7 +4009,7 @@ public class HiveMetaStore extends Thrif
       incrementCounter("drop_role");
       firePreEvent(new PreAuthorizationCallEvent(this));
       if (ADMIN.equals(roleName) || PUBLIC.equals(roleName)) {
-        throw new MetaException(PUBLIC + "/" + ADMIN +" role can't be dropped.");
+        throw new MetaException(PUBLIC + "," + ADMIN + " roles can't be dropped.");
       }
       Boolean ret = null;
       try {
@@ -4078,6 +4079,7 @@ public class HiveMetaStore extends Thrif
       return ret;
     }
 
+    @Override
     public GrantRevokeRoleResponse grant_revoke_role(GrantRevokeRoleRequest request)
         throws MetaException, org.apache.thrift.TException {
       GrantRevokeRoleResponse response = new GrantRevokeRoleResponse();
@@ -4975,6 +4977,25 @@ public class HiveMetaStore extends Thrif
       return rolePrinGrantList;
     }
 
+    @Override
+    public AggrStats get_aggr_stats_for(PartitionsStatsRequest request)
+        throws NoSuchObjectException, MetaException, TException {
+      startFunction("get_aggr_stats_for: db=" + request.getDbName() + " table=" + request.getTblName());
+      AggrStats aggrStats = null;
+      try {
+        //TODO: We are setting partitionCnt for which we were able to retrieve stats same as
+        // incoming number from request. This is not correct, but currently no users of this api
+        // rely on this. Only, current user StatsAnnotation don't care for it. StatsOptimizer
+        // will care for it, so before StatsOptimizer begin using it, we need to fix this.
+        aggrStats = new AggrStats(getMS().get_aggr_stats_for(request.getDbName(),
+          request.getTblName(), request.getPartNames(), request.getColNames()), request.getPartNames().size());
+        return aggrStats;
+      } finally {
+          endFunction("get_partitions_statistics_req: ", aggrStats == null, null, request.getTblName());
+      }
+
+    }
+
   }
 
   public static IHMSHandler newHMSHandler(String name, HiveConf hiveConf) throws MetaException {

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java Sun Aug 10 01:33:50 2014
@@ -52,6 +52,7 @@ import org.apache.hadoop.hive.conf.HiveC
 import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
 import org.apache.hadoop.hive.metastore.api.AddPartitionsRequest;
 import org.apache.hadoop.hive.metastore.api.AddPartitionsResult;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -1820,4 +1821,11 @@ public class HiveMetaStoreClient impleme
       NoSuchObjectException, UnsupportedOperationException {
     client.drop_table_with_environment_context(dbname, name, deleteData, envContext);
   }
+
+  @Override
+  public AggrStats getAggrColStatsFor(String dbName, String tblName,
+    List<String> colNames, List<String> partNames) throws NoSuchObjectException, MetaException, TException {
+    PartitionsStatsRequest req = new PartitionsStatsRequest(dbName, tblName, colNames, partNames);
+    return client.get_aggr_stats_for(req);
+  }
 }

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java Sun Aug 10 01:33:50 2014
@@ -38,6 +38,7 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
@@ -1290,4 +1291,7 @@ public interface IMetaStoreClient {
    */
   GetRoleGrantsForPrincipalResponse get_role_grants_for_principal(
       GetRoleGrantsForPrincipalRequest getRolePrincReq) throws MetaException, TException;
+
+  public AggrStats getAggrColStatsFor(String dbName, String tblName,
+      List<String> colNames, List<String> partName)  throws NoSuchObjectException, MetaException, TException;
 }

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java Sun Aug 10 01:33:50 2014
@@ -309,7 +309,7 @@ class MetaStoreDirectSql {
     StringBuilder partSb = new StringBuilder(sbCapacity);
     // Assume db and table names are the same for all partition, that's what we're selecting for.
     for (Object partitionId : sqlResult) {
-      partSb.append(extractSqlLong(partitionId)).append(",");
+      partSb.append(StatObjectConverter.extractSqlLong(partitionId)).append(",");
     }
     String partIds = trimCommaList(partSb);
     timingTrace(doTrace, queryText, start, queryTime);
@@ -346,10 +346,10 @@ class MetaStoreDirectSql {
     dbName = dbName.toLowerCase();
     for (Object[] fields : sqlResult2) {
       // Here comes the ugly part...
-      long partitionId = extractSqlLong(fields[0]);
-      Long sdId = extractSqlLong(fields[1]);
-      Long colId = extractSqlLong(fields[2]);
-      Long serdeId = extractSqlLong(fields[3]);
+      long partitionId = StatObjectConverter.extractSqlLong(fields[0]);
+      Long sdId = StatObjectConverter.extractSqlLong(fields[1]);
+      Long colId = StatObjectConverter.extractSqlLong(fields[2]);
+      Long serdeId = StatObjectConverter.extractSqlLong(fields[3]);
       // A partition must have either everything set, or nothing set if it's a view.
       if (sdId == null || colId == null || serdeId == null) {
         if (isView == null) {
@@ -427,6 +427,7 @@ class MetaStoreDirectSql {
         + " where \"PART_ID\" in (" + partIds + ") and \"PARAM_KEY\" is not null"
         + " order by \"PART_ID\" asc";
     loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {
+      @Override
       public void apply(Partition t, Object[] fields) {
         t.putToParameters((String)fields[1], (String)fields[2]);
       }});
@@ -435,6 +436,7 @@ class MetaStoreDirectSql {
         + " where \"PART_ID\" in (" + partIds + ") and \"INTEGER_IDX\" >= 0"
         + " order by \"PART_ID\" asc, \"INTEGER_IDX\" asc";
     loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {
+      @Override
       public void apply(Partition t, Object[] fields) {
         t.addToValues((String)fields[1]);
       }});
@@ -452,6 +454,7 @@ class MetaStoreDirectSql {
         + " where \"SD_ID\" in (" + sdIds + ") and \"PARAM_KEY\" is not null"
         + " order by \"SD_ID\" asc";
     loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+      @Override
       public void apply(StorageDescriptor t, Object[] fields) {
         t.putToParameters((String)fields[1], (String)fields[2]);
       }});
@@ -460,6 +463,7 @@ class MetaStoreDirectSql {
         + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0"
         + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
     loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+      @Override
       public void apply(StorageDescriptor t, Object[] fields) {
         if (fields[2] == null) return;
         t.addToSortCols(new Order((String)fields[1], extractSqlInt(fields[2])));
@@ -469,6 +473,7 @@ class MetaStoreDirectSql {
         + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0"
         + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
     loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+      @Override
       public void apply(StorageDescriptor t, Object[] fields) {
         t.addToBucketCols((String)fields[1]);
       }});
@@ -479,6 +484,7 @@ class MetaStoreDirectSql {
         + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
     boolean hasSkewedColumns =
       loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+        @Override
         public void apply(StorageDescriptor t, Object[] fields) {
           if (!t.isSetSkewedInfo()) t.setSkewedInfo(new SkewedInfo());
           t.getSkewedInfo().addToSkewedColNames((String)fields[1]);
@@ -502,6 +508,7 @@ class MetaStoreDirectSql {
       loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
         private Long currentListId;
         private List<String> currentList;
+        @Override
         public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
           if (!t.isSetSkewedInfo()) t.setSkewedInfo(new SkewedInfo());
           // Note that this is not a typical list accumulator - there's no call to finalize
@@ -511,7 +518,7 @@ class MetaStoreDirectSql {
             currentListId = null;
             t.getSkewedInfo().addToSkewedColValues(new ArrayList<String>());
           } else {
-            long fieldsListId = extractSqlLong(fields[1]);
+            long fieldsListId = StatObjectConverter.extractSqlLong(fields[1]);
             if (currentListId == null || fieldsListId != currentListId) {
               currentList = new ArrayList<String>();
               currentListId = fieldsListId;
@@ -539,6 +546,7 @@ class MetaStoreDirectSql {
       loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
         private Long currentListId;
         private List<String> currentList;
+        @Override
         public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
           if (!t.isSetSkewedInfo()) {
             SkewedInfo skewedInfo = new SkewedInfo();
@@ -552,7 +560,7 @@ class MetaStoreDirectSql {
             currentList = new ArrayList<String>(); // left outer join produced a list with no values
             currentListId = null;
           } else {
-            long fieldsListId = extractSqlLong(fields[1]);
+            long fieldsListId = StatObjectConverter.extractSqlLong(fields[1]);
             if (currentListId == null || fieldsListId != currentListId) {
               currentList = new ArrayList<String>();
               currentListId = fieldsListId;
@@ -572,6 +580,7 @@ class MetaStoreDirectSql {
           + " from \"COLUMNS_V2\" where \"CD_ID\" in (" + colIds + ") and \"INTEGER_IDX\" >= 0"
           + " order by \"CD_ID\" asc, \"INTEGER_IDX\" asc";
       loopJoinOrderedResult(colss, queryText, 0, new ApplyFunc<List<FieldSchema>>() {
+        @Override
         public void apply(List<FieldSchema> t, Object[] fields) {
           t.add(new FieldSchema((String)fields[2], (String)fields[3], (String)fields[1]));
         }});
@@ -582,6 +591,7 @@ class MetaStoreDirectSql {
         + " where \"SERDE_ID\" in (" + serdeIds + ") and \"PARAM_KEY\" is not null"
         + " order by \"SERDE_ID\" asc";
     loopJoinOrderedResult(serdes, queryText, 0, new ApplyFunc<SerDeInfo>() {
+      @Override
       public void apply(SerDeInfo t, Object[] fields) {
         t.putToParameters((String)fields[1], (String)fields[2]);
       }});
@@ -589,14 +599,6 @@ class MetaStoreDirectSql {
     return orderedResult;
   }
 
-  private Long extractSqlLong(Object obj) throws MetaException {
-    if (obj == null) return null;
-    if (!(obj instanceof Number)) {
-      throw new MetaException("Expected numeric type but got " + obj.getClass().getName());
-    }
-    return ((Number)obj).longValue();
-  }
-
   private void timingTrace(boolean doTrace, String queryText, long start, long queryTime) {
     if (!doTrace) return;
     LOG.debug("Direct SQL query in " + (queryTime - start) / 1000000.0 + "ms + " +
@@ -664,7 +666,7 @@ class MetaStoreDirectSql {
         if (fields == null) {
           fields = iter.next();
         }
-        long nestedId = extractSqlLong(fields[keyIndex]);
+        long nestedId = StatObjectConverter.extractSqlLong(fields[keyIndex]);
         if (nestedId < id) throw new MetaException("Found entries for unknown ID " + nestedId);
         if (nestedId > id) break; // fields belong to one of the next entries
         func.apply(entry.getValue(), fields);
@@ -891,19 +893,49 @@ class MetaStoreDirectSql {
     return result;
   }
 
-  public List<ColumnStatistics> getPartitionStats(String dbName, String tableName,
+  public List<ColumnStatisticsObj> aggrColStatsForPartitions(String dbName, String tableName,
       List<String> partNames, List<String> colNames) throws MetaException {
-    if (colNames.isEmpty() || partNames.isEmpty()) {
-      return Lists.newArrayList();
-    }
-    boolean doTrace = LOG.isDebugEnabled();
-    long start = doTrace ? System.nanoTime() : 0;
-    String queryText = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from \"PART_COL_STATS\""
+    String qText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", "
+      + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), "
+      + "min(\"BIG_DECIMAL_LOW_VALUE\"), max(\"BIG_DECIMAL_HIGH_VALUE\"), sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), "
+      + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\") from \"PART_COL_STATS\""
       + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in ("
       + makeParams(colNames.size()) + ") AND \"PARTITION_NAME\" in ("
-      + makeParams(partNames.size()) + ") order by \"PARTITION_NAME\"";
+      + makeParams(partNames.size()) + ") group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
+
+    boolean doTrace = LOG.isDebugEnabled();
+    long start = doTrace ? System.nanoTime() : 0;
+    Query query = pm.newQuery("javax.jdo.query.SQL", qText);
+    Object qResult = query.executeWithArray(prepareParams(dbName, tableName, partNames, colNames));
+    if (qResult == null) {
+      query.closeAll();
+      return Lists.newArrayList();
+    }
+    List<Object[]> list = ensureList(qResult);
+    List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(list.size());
+    for (Object[] row : list) {
+      colStats.add(prepareCSObj(row,0));
+    }
+    long end = doTrace ? System.nanoTime() : 0;
+    timingTrace(doTrace, qText, start, end);
+    query.closeAll();
+    return colStats;
+  }
+
+  private ColumnStatisticsObj prepareCSObj (Object[] row, int i) throws MetaException {
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data);
+    Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
+        declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
+        avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++];
+    StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
+        llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses);
+    return cso;
+  }
+
+  private Object[] prepareParams(String dbName, String tableName, List<String> partNames,
+    List<String> colNames) throws MetaException {
 
-    Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
     Object[] params = new Object[colNames.size() + partNames.size() + 2];
     int paramI = 0;
     params[paramI++] = dbName;
@@ -914,7 +946,24 @@ class MetaStoreDirectSql {
     for (String partName : partNames) {
       params[paramI++] = partName;
     }
-    Object qResult = query.executeWithArray(params);
+
+    return params;
+  }
+
+  public List<ColumnStatistics> getPartitionStats(String dbName, String tableName,
+      List<String> partNames, List<String> colNames) throws MetaException {
+    if (colNames.isEmpty() || partNames.isEmpty()) {
+      return Lists.newArrayList();
+    }
+    boolean doTrace = LOG.isDebugEnabled();
+    long start = doTrace ? System.nanoTime() : 0;
+    String queryText = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from \"PART_COL_STATS\""
+      + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in ("
+      + makeParams(colNames.size()) + ") AND \"PARTITION_NAME\" in ("
+      + makeParams(partNames.size()) + ") order by \"PARTITION_NAME\"";
+
+    Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
+    Object qResult = query.executeWithArray(prepareParams(dbName, tableName, partNames, colNames));
     long queryTime = doTrace ? System.nanoTime() : 0;
     if (qResult == null) {
       query.closeAll();
@@ -960,19 +1009,10 @@ class MetaStoreDirectSql {
       // LastAnalyzed is stored per column but thrift has it per several;
       // get the lowest for now as nobody actually uses this field.
       Object laObj = row[offset + 14];
-      if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > extractSqlLong(laObj))) {
-        csd.setLastAnalyzed(extractSqlLong(laObj));
+      if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > StatObjectConverter.extractSqlLong(laObj))) {
+        csd.setLastAnalyzed(StatObjectConverter.extractSqlLong(laObj));
       }
-      ColumnStatisticsData data = new ColumnStatisticsData();
-      // see STATS_COLLIST
-      int i = offset;
-      ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data);
-      Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
-          declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
-          avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++];
-      StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
-          llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses);
-      csos.add(cso);
+      csos.add(prepareCSObj(row, offset));
     }
     result.setStatsObj(csos);
     return result;

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Sun Aug 10 01:33:50 2014
@@ -5901,6 +5901,29 @@ public class ObjectStore implements RawS
     }.run(true);
   }
 
+
+  @Override
+  public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName, String tblName,
+      final List<String> partNames, final List<String> colNames) throws MetaException, NoSuchObjectException {
+
+    return new GetListHelper<ColumnStatisticsObj>(dbName, tblName, true, false) {
+      @Override
+      protected List<ColumnStatisticsObj> getSqlResult(
+          GetHelper<List<ColumnStatisticsObj>> ctx) throws MetaException {
+        return directSql.aggrColStatsForPartitions(dbName, tblName, partNames, colNames);
+      }
+
+      @Override
+      protected List<ColumnStatisticsObj> getJdoResult(
+          GetHelper<List<ColumnStatisticsObj>> ctx) throws MetaException,
+          NoSuchObjectException {
+        // This is fast path for query optimizations, if we can find this info quickly using
+        // directSql, do it. No point in failing back to slow path here.
+        throw new MetaException("Jdo path is not implemented for stats aggr.");
+      }
+      }.run(true);
+  }
+
   private List<MPartitionColumnStatistics> getMPartitionColumnStatistics(
       Table table, List<String> partNames, List<String> colNames)
           throws NoSuchObjectException, MetaException {
@@ -6747,5 +6770,4 @@ public class ObjectStore implements RawS
     }
     return funcs;
   }
-
 }

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java Sun Aug 10 01:33:50 2014
@@ -27,6 +27,7 @@ import java.util.Map;
 
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.Function;
 import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -547,4 +548,6 @@ public interface RawStore extends Config
    */
   public List<String> getFunctions(String dbName, String pattern) throws MetaException;
 
+  public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName, String tblName,
+    List<String> partNames, List<String> colNames) throws MetaException, NoSuchObjectException;
 }

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java Sun Aug 10 01:33:50 2014
@@ -352,61 +352,61 @@ public class StatObjectConverter {
   // SQL
   public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data,
       Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh,
-      Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses) {
+      Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException {
     if (colType.equals("boolean")) {
       BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
-      boolStats.setNumFalses((Long)falses);
-      boolStats.setNumTrues((Long)trues);
-      boolStats.setNumNulls((Long)nulls);
+      boolStats.setNumFalses(extractSqlLong(falses));
+      boolStats.setNumTrues(extractSqlLong(trues));
+      boolStats.setNumNulls(extractSqlLong(nulls));
       data.setBooleanStats(boolStats);
     } else if (colType.equals("string") ||
         colType.startsWith("varchar") || colType.startsWith("char")) {
       StringColumnStatsData stringStats = new StringColumnStatsData();
-      stringStats.setNumNulls((Long)nulls);
+      stringStats.setNumNulls(extractSqlLong(nulls));
       stringStats.setAvgColLen((Double)avglen);
-      stringStats.setMaxColLen((Long)maxlen);
-      stringStats.setNumDVs((Long)dist);
+      stringStats.setMaxColLen(extractSqlLong(maxlen));
+      stringStats.setNumDVs(extractSqlLong(dist));
       data.setStringStats(stringStats);
     } else if (colType.equals("binary")) {
       BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
-      binaryStats.setNumNulls((Long)nulls);
+      binaryStats.setNumNulls(extractSqlLong(nulls));
       binaryStats.setAvgColLen((Double)avglen);
-      binaryStats.setMaxColLen((Long)maxlen);
+      binaryStats.setMaxColLen(extractSqlLong(maxlen));
       data.setBinaryStats(binaryStats);
     } else if (colType.equals("bigint") || colType.equals("int") ||
         colType.equals("smallint") || colType.equals("tinyint") ||
         colType.equals("timestamp")) {
       LongColumnStatsData longStats = new LongColumnStatsData();
-      longStats.setNumNulls((Long)nulls);
+      longStats.setNumNulls(extractSqlLong(nulls));
       if (lhigh != null) {
-        longStats.setHighValue((Long)lhigh);
+        longStats.setHighValue(extractSqlLong(lhigh));
       }
       if (llow != null) {
-        longStats.setLowValue((Long)llow);
+        longStats.setLowValue(extractSqlLong(llow));
       }
-      longStats.setNumDVs((Long)dist);
+      longStats.setNumDVs(extractSqlLong(dist));
       data.setLongStats(longStats);
     } else if (colType.equals("double") || colType.equals("float")) {
       DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
-      doubleStats.setNumNulls((Long)nulls);
+      doubleStats.setNumNulls(extractSqlLong(nulls));
       if (dhigh != null) {
         doubleStats.setHighValue((Double)dhigh);
       }
       if (dlow != null) {
         doubleStats.setLowValue((Double)dlow);
       }
-      doubleStats.setNumDVs((Long)dist);
+      doubleStats.setNumDVs(extractSqlLong(dist));
       data.setDoubleStats(doubleStats);
     } else if (colType.startsWith("decimal")) {
       DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
-      decimalStats.setNumNulls((Long)nulls);
+      decimalStats.setNumNulls(extractSqlLong(nulls));
       if (dechigh != null) {
         decimalStats.setHighValue(createThriftDecimal((String)dechigh));
       }
       if (declow != null) {
         decimalStats.setLowValue(createThriftDecimal((String)declow));
       }
-      decimalStats.setNumDVs((Long)dist);
+      decimalStats.setNumDVs(extractSqlLong(dist));
       data.setDecimalStats(decimalStats);
     }
   }
@@ -419,4 +419,12 @@ public class StatObjectConverter {
   private static String createJdoDecimalString(Decimal d) {
     return new BigDecimal(new BigInteger(d.getUnscaled()), d.getScale()).toString();
   }
+
+  static Long extractSqlLong(Object obj) throws MetaException {
+    if (obj == null) return null;
+    if (!(obj instanceof Number)) {
+      throw new MetaException("Expected numeric type but got " + obj.getClass().getName());
+    }
+    return ((Number)obj).longValue();
+  }
 }

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java Sun Aug 10 01:33:50 2014
@@ -18,6 +18,7 @@
 package org.apache.hadoop.hive.metastore.txn;
 
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.shims.ShimLoader;
 
 import java.sql.Connection;
 import java.sql.Driver;
@@ -201,7 +202,8 @@ public class TxnDbUtil {
     Properties prop = new Properties();
     String driverUrl = HiveConf.getVar(conf, HiveConf.ConfVars.METASTORECONNECTURLKEY);
     String user = HiveConf.getVar(conf, HiveConf.ConfVars.METASTORE_CONNECTION_USER_NAME);
-    String passwd = HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREPWD);
+    String passwd = ShimLoader.getHadoopShims().getPassword(conf,
+        HiveConf.ConfVars.METASTOREPWD.varname);
     prop.put("user", user);
     prop.put("password", passwd);
     return driver.connect(driverUrl, prop);

Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java Sun Aug 10 01:33:50 2014
@@ -32,9 +32,12 @@ import org.apache.hadoop.hive.common.Val
 import org.apache.hadoop.hive.common.ValidTxnListImpl;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.*;
+import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.util.StringUtils;
 
 import javax.sql.DataSource;
+
+import java.io.IOException;
 import java.sql.*;
 import java.util.*;
 
@@ -1602,7 +1605,13 @@ public class TxnHandler {
 
     String driverUrl = HiveConf.getVar(conf, HiveConf.ConfVars.METASTORECONNECTURLKEY);
     String user = HiveConf.getVar(conf, HiveConf.ConfVars.METASTORE_CONNECTION_USER_NAME);
-    String passwd = HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREPWD);
+    String passwd;
+    try {
+      passwd = ShimLoader.getHadoopShims().getPassword(conf,
+          HiveConf.ConfVars.METASTOREPWD.varname);
+    } catch (IOException err) {
+      throw new SQLException("Error getting metastore password", err);
+    }
     String connectionPooler = HiveConf.getVar(conf,
         HiveConf.ConfVars.METASTORE_CONNECTION_POOLING_TYPE).toLowerCase();
 

Modified: hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java (original)
+++ hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java Sun Aug 10 01:33:50 2014
@@ -25,6 +25,7 @@ import java.util.Map;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.Function;
 import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -36,6 +37,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.PartitionEventType;
+import org.apache.hadoop.hive.metastore.api.PartitionsStatsRequest;
 import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
 import org.apache.hadoop.hive.metastore.api.PrincipalType;
 import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
@@ -709,5 +711,12 @@ public class DummyRawStoreControlledComm
     return objectStore.getFunctions(dbName, pattern);
   }
 
+  @Override
+  public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName,
+      String tblName, List<String> partNames, List<String> colNames)
+      throws MetaException {
+    return null;
+  }
+
 
 }

Modified: hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java (original)
+++ hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java Sun Aug 10 01:33:50 2014
@@ -26,6 +26,7 @@ import junit.framework.Assert;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.Function;
 import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -37,6 +38,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.PartitionEventType;
+import org.apache.hadoop.hive.metastore.api.PartitionsStatsRequest;
 import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
 import org.apache.hadoop.hive.metastore.api.PrincipalType;
 import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
@@ -726,7 +728,12 @@ public class DummyRawStoreForJdoConnecti
     return null;
   }
 
-
+  @Override
+  public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName,
+      String tblName, List<String> partNames, List<String> colNames)
+      throws MetaException {
+    return null;
+  }
 }
 
 

Modified: hive/branches/spark/pom.xml
URL: http://svn.apache.org/viewvc/hive/branches/spark/pom.xml?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/pom.xml (original)
+++ hive/branches/spark/pom.xml Sun Aug 10 01:33:50 2014
@@ -113,8 +113,8 @@
     <hadoop-20S.version>1.2.1</hadoop-20S.version>
     <hadoop-23.version>2.4.0</hadoop-23.version>
     <hadoop.bin.path>${basedir}/${hive.path.to.root}/testutils/hadoop</hadoop.bin.path>
-    <hbase.hadoop1.version>0.96.0-hadoop1</hbase.hadoop1.version>
-    <hbase.hadoop2.version>0.96.0-hadoop2</hbase.hadoop2.version>
+    <hbase.hadoop1.version>0.98.3-hadoop1</hbase.hadoop1.version>
+    <hbase.hadoop2.version>0.98.3-hadoop2</hbase.hadoop2.version>
     <!-- httpcomponents are not always in version sync -->
     <httpcomponents.client.version>4.2.5</httpcomponents.client.version>
     <httpcomponents.core.version>4.2.5</httpcomponents.core.version>
@@ -777,7 +777,7 @@
             <test.warehouse.dir>${test.warehouse.scheme}${test.warehouse.dir}</test.warehouse.dir>
             <java.net.preferIPv4Stack>true</java.net.preferIPv4Stack>
             <!-- EnforceReadOnlyTables hook and QTestUtil -->
-            <test.src.tables>src,src1,srcbucket,srcbucket2,src_json,src_thrift,src_sequencefile,srcpart,alltypesorc</test.src.tables>
+            <test.src.tables>src,src1,srcbucket,srcbucket2,src_json,src_thrift,src_sequencefile,srcpart,alltypesorc,src_hbase</test.src.tables>
             <java.security.krb5.conf>${test.tmp.dir}/conf/krb5.conf</java.security.krb5.conf>
           </systemPropertyVariables>
         </configuration>

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Sun Aug 10 01:33:50 2014
@@ -608,7 +608,7 @@ public class Driver implements CommandPr
           Partition partition = read.getPartition();
           tbl = partition.getTable();
           // use partition level authorization
-          if (tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
+          if (Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) {
             List<String> cols = part2Cols.get(partition);
             if (cols != null && cols.size() > 0) {
               authorizer.authorize(partition.getTable(),
@@ -626,7 +626,7 @@ public class Driver implements CommandPr
         // check, and the table authorization may already happened because of other
         // partitions
         if (tbl != null && !tableAuthChecked.contains(tbl.getTableName()) &&
-            !(tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE)) {
+            !(Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName())))) {
           List<String> cols = tab2Cols.get(tbl);
           if (cols != null && cols.size() > 0) {
             authorizer.authorize(tbl, null, cols,
@@ -671,7 +671,7 @@ public class Driver implements CommandPr
           //or non-existent tho such sources may still be referenced by the TableScanOperator
           //if it's null then the partition probably doesn't exist so let's use table permission
           if (tbl.isPartitioned() &&
-              tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
+              Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) {
             String alias_id = topOpMap.getKey();
 
             PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp,

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java Sun Aug 10 01:33:50 2014
@@ -705,13 +705,9 @@ public abstract class CommonJoinOperator
       }
 
       if (allOne) {
-        LOG.info("calling genAllOneUniqueJoinObject");
         genAllOneUniqueJoinObject();
-        LOG.info("called genAllOneUniqueJoinObject");
       } else {
-        LOG.trace("calling genUniqueJoinObject");
         genUniqueJoinObject(0, 0);
-        LOG.trace("called genUniqueJoinObject");
       }
     } else {
       // does any result need to be emitted
@@ -753,17 +749,11 @@ public abstract class CommonJoinOperator
       }
 
       if (!hasEmpty && !mayHasMoreThanOne) {
-        LOG.trace("calling genAllOneUniqueJoinObject");
         genAllOneUniqueJoinObject();
-        LOG.trace("called genAllOneUniqueJoinObject");
       } else if (!hasEmpty && !hasLeftSemiJoin) {
-        LOG.trace("calling genUniqueJoinObject");
         genUniqueJoinObject(0, 0);
-        LOG.trace("called genUniqueJoinObject");
       } else {
-        LOG.trace("calling genObject");
         genJoinObject();
-        LOG.trace("called genObject");
       }
     }
     Arrays.fill(aliasFilterTags, (byte)0xff);

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Sun Aug 10 01:33:50 2014
@@ -3924,12 +3924,16 @@ public class DDLTask extends Task<DDLWor
     tbl.setInputFormatClass(crtTbl.getInputFormat());
     tbl.setOutputFormatClass(crtTbl.getOutputFormat());
 
-    tbl.getTTable().getSd().setInputFormat(
-        tbl.getInputFormatClass().getName());
-    tbl.getTTable().getSd().setOutputFormat(
-        tbl.getOutputFormatClass().getName());
+    // only persist input/ouput format to metadata when it is explicitly specified.
+    // Otherwise, load lazily via StorageHandler at query time.
+    if (crtTbl.getInputFormat() != null && !crtTbl.getInputFormat().isEmpty()) {
+      tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
+    }
+    if (crtTbl.getOutputFormat() != null && !crtTbl.getOutputFormat().isEmpty()) {
+      tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
+    }
 
-    if (!Utilities.isDefaultNameNode(conf)) {
+    if (!Utilities.isDefaultNameNode(conf) && tbl.getTTable().getSd().isSetLocation()) {
       // If location is specified - ensure that it is a full qualified name
       makeLocationQualified(tbl.getDbName(), tbl.getTTable().getSd(), tbl.getTableName());
     }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Sun Aug 10 01:33:50 2014
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
 import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
@@ -64,23 +65,9 @@ import org.apache.hadoop.hive.ql.plan.Un
  *
  */
 public final class OperatorFactory {
+  private static final List<OpTuple> opvec;
+  private static final List<OpTuple> vectorOpvec;
 
-  /**
-   * OpTuple.
-   *
-   * @param <T>
-   */
-  public static final class OpTuple<T extends OperatorDesc> {
-    public Class<T> descClass;
-    public Class<? extends Operator<T>> opClass;
-
-    public OpTuple(Class<T> descClass, Class<? extends Operator<T>> opClass) {
-      this.descClass = descClass;
-      this.opClass = opClass;
-    }
-  }
-
-  public static ArrayList<OpTuple> opvec;
   static {
     opvec = new ArrayList<OpTuple>();
     opvec.add(new OpTuple<FilterDesc>(FilterDesc.class, FilterOperator.class));
@@ -116,7 +103,6 @@ public final class OperatorFactory {
         MuxOperator.class));
   }
 
-  public static ArrayList<OpTuple> vectorOpvec;
   static {
     vectorOpvec = new ArrayList<OpTuple>();
     vectorOpvec.add(new OpTuple<SelectDesc>(SelectDesc.class, VectorSelectOperator.class));
@@ -128,8 +114,20 @@ public final class OperatorFactory {
     vectorOpvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class, VectorFileSinkOperator.class));
     vectorOpvec.add(new OpTuple<FilterDesc>(FilterDesc.class, VectorFilterOperator.class));
     vectorOpvec.add(new OpTuple<LimitDesc>(LimitDesc.class, VectorLimitOperator.class));
+    vectorOpvec.add(new OpTuple<ExtractDesc>(ExtractDesc.class, VectorExtractOperator.class));
   }
 
+  private static final class OpTuple<T extends OperatorDesc> {
+    private final Class<T> descClass;
+    private final Class<? extends Operator<T>> opClass;
+
+    public OpTuple(Class<T> descClass, Class<? extends Operator<T>> opClass) {
+      this.descClass = descClass;
+      this.opClass = opClass;
+    }
+  }
+
+
   public static <T extends OperatorDesc> Operator<T> getVectorOperator(T conf,
       VectorizationContext vContext) throws HiveException {
     Class<T> descClass = (Class<T>) conf.getClass();

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java Sun Aug 10 01:33:50 2014
@@ -25,6 +25,8 @@ import java.util.Arrays;
 import java.util.List;
 import java.util.Random;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.ql.io.HiveKey;
@@ -61,9 +63,20 @@ public class ReduceSinkOperator extends 
     PTFUtils.makeTransient(ReduceSinkOperator.class, "inputAliases", "valueIndex");
   }
 
+  private static final Log LOG = LogFactory.getLog(ReduceSinkOperator.class.getName());
   private static final long serialVersionUID = 1L;
-  protected transient OutputCollector out;
+  private static final MurmurHash hash = (MurmurHash) MurmurHash.getInstance();
+
+  private transient ObjectInspector[] partitionObjectInspectors;
+  private transient ObjectInspector[] bucketObjectInspectors;
+  private transient int buckColIdxInKey;
+  private boolean firstRow;
+  private transient int tag;
+  private boolean skipTag = false;
+  private transient InspectableObject tempInspectableObject = new InspectableObject();
+  private transient int[] valueIndex; // index for value(+ from keys, - from values)
 
+  protected transient OutputCollector out;
   /**
    * The evaluators for the key columns. Key columns decide the sort order on
    * the reducer side. Key columns are passed to the reducer in the "key".
@@ -84,38 +97,40 @@ public class ReduceSinkOperator extends 
    * Evaluators for bucketing columns. This is used to compute bucket number.
    */
   protected transient ExprNodeEvaluator[] bucketEval = null;
-
-  // TODO: we use MetadataTypedColumnsetSerDe for now, till DynamicSerDe is
-  // ready
+  // TODO: we use MetadataTypedColumnsetSerDe for now, till DynamicSerDe is ready
   protected transient Serializer keySerializer;
   protected transient boolean keyIsText;
   protected transient Serializer valueSerializer;
-  transient int tag;
   protected transient byte[] tagByte = new byte[1];
-  transient protected int numDistributionKeys;
-  transient protected int numDistinctExprs;
-  transient String[] inputAliases;  // input aliases of this RS for join (used for PPD)
-  private boolean skipTag = false;
+  protected transient int numDistributionKeys;
+  protected transient int numDistinctExprs;
+  protected transient String[] inputAliases;  // input aliases of this RS for join (used for PPD)
   protected transient boolean autoParallel = false;
-  
-  protected static final MurmurHash hash = (MurmurHash)MurmurHash.getInstance();
-
-  private transient int[] valueIndex; // index for value(+ from keys, - from values)
-
-  public void setInputAliases(String[] inputAliases) {
-    this.inputAliases = inputAliases;
-  }
-
-  public String[] getInputAliases() {
-    return inputAliases;
-  }
-
-  public void setOutputCollector(OutputCollector _out) {
-    this.out = _out;
-  }
-
   // picks topN K:V pairs from input.
   protected transient TopNHash reducerHash = new TopNHash();
+  protected transient HiveKey keyWritable = new HiveKey();
+  protected transient ObjectInspector keyObjectInspector;
+  protected transient ObjectInspector valueObjectInspector;
+  protected transient Object[] cachedValues;
+  protected transient List<List<Integer>> distinctColIndices;
+  protected transient Random random;
+  /**
+   * This two dimensional array holds key data and a corresponding Union object
+   * which contains the tag identifying the aggregate expression for distinct columns.
+   *
+   * If there is no distict expression, cachedKeys is simply like this.
+   * cachedKeys[0] = [col0][col1]
+   *
+   * with two distict expression, union(tag:key) is attatched for each distinct expression
+   * cachedKeys[0] = [col0][col1][0:dist1]
+   * cachedKeys[1] = [col0][col1][1:dist2]
+   *
+   * in this case, child GBY evaluates distict values with expression like KEY.col2:0.dist1
+   * see {@link ExprNodeColumnEvaluator}
+   */
+  // TODO: we only ever use one row of these at a time. Why do we need to cache multiple?
+  protected transient Object[][] cachedKeys;
+
   @Override
   protected void initializeOp(Configuration hconf) throws HiveException {
     try {
@@ -184,40 +199,12 @@ public class ReduceSinkOperator extends 
       firstRow = true;
       initializeChildren(hconf);
     } catch (Exception e) {
-      e.printStackTrace();
+      String msg = "Error initializing ReduceSinkOperator: " + e.getMessage();
+      LOG.error(msg, e);
       throw new RuntimeException(e);
     }
   }
 
-  transient InspectableObject tempInspectableObject = new InspectableObject();
-  protected transient HiveKey keyWritable = new HiveKey();
-
-  protected transient ObjectInspector keyObjectInspector;
-  protected transient ObjectInspector valueObjectInspector;
-  transient ObjectInspector[] partitionObjectInspectors;
-  transient ObjectInspector[] bucketObjectInspectors = null;
-  transient int buckColIdxInKey;
-
-  protected transient Object[] cachedValues;
-  protected transient List<List<Integer>> distinctColIndices;
-  /**
-   * This two dimensional array holds key data and a corresponding Union object
-   * which contains the tag identifying the aggregate expression for distinct columns.
-   *
-   * If there is no distict expression, cachedKeys is simply like this.
-   * cachedKeys[0] = [col0][col1]
-   *
-   * with two distict expression, union(tag:key) is attatched for each distinct expression
-   * cachedKeys[0] = [col0][col1][0:dist1]
-   * cachedKeys[1] = [col0][col1][1:dist2]
-   *
-   * in this case, child GBY evaluates distict values with expression like KEY.col2:0.dist1
-   * see {@link ExprNodeColumnEvaluator}
-   */
-  // TODO: we only ever use one row of these at a time. Why do we need to cache multiple?
-  protected transient Object[][] cachedKeys;
-  boolean firstRow;
-  protected transient Random random;
 
   /**
    * Initializes array of ExprNodeEvaluator. Adds Union field for distinct
@@ -509,4 +496,16 @@ public class ReduceSinkOperator extends 
   public int[] getValueIndex() {
     return valueIndex;
   }
+
+  public void setInputAliases(String[] inputAliases) {
+    this.inputAliases = inputAliases;
+  }
+
+  public String[] getInputAliases() {
+    return inputAliases;
+  }
+
+  public void setOutputCollector(OutputCollector _out) {
+    this.out = _out;
+  }
 }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java Sun Aug 10 01:33:50 2014
@@ -250,7 +250,7 @@ public class ExecMapper extends MapReduc
             + used_memory);
       }
 
-      reportStats rps = new reportStats(rp);
+      ReportStats rps = new ReportStats(rp);
       mo.preorderMap(rps);
       return;
     } catch (Exception e) {
@@ -285,10 +285,10 @@ public class ExecMapper extends MapReduc
    * reportStats.
    *
    */
-  public static class reportStats implements Operator.OperatorFunc {
-    Reporter rp;
+  public static class ReportStats implements Operator.OperatorFunc {
+    private final Reporter rp;
 
-    public reportStats(Reporter rp) {
+    public ReportStats(Reporter rp) {
       this.rp = rp;
     }
 

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java Sun Aug 10 01:33:50 2014
@@ -34,7 +34,7 @@ import org.apache.hadoop.hive.ql.exec.Ob
 import org.apache.hadoop.hive.ql.exec.ObjectCacheFactory;
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.reportStats;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.plan.ReduceWork;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -306,7 +306,7 @@ public class ExecReducer extends MapRedu
       }
 
       reducer.close(abort);
-      reportStats rps = new reportStats(rp);
+      ReportStats rps = new ReportStats(rp);
       reducer.preorderMap(rps);
 
     } catch (Exception e) {

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java Sun Aug 10 01:33:50 2014
@@ -33,7 +33,7 @@ import org.apache.hadoop.hive.ql.exec.Ob
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorUtils;
 import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.reportStats;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats;
 import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
 import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector;
 import org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator;
@@ -225,7 +225,7 @@ public class MapRecordProcessor extends 
       if (isLogInfoEnabled) {
         logCloseInfo();
       }
-      reportStats rps = new reportStats(reporter);
+      ReportStats rps = new ReportStats(reporter);
       mapOp.preorderMap(rps);
       return;
     } catch (Exception e) {

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java Sun Aug 10 01:33:50 2014
@@ -35,7 +35,7 @@ import org.apache.hadoop.hive.ql.exec.Ob
 import org.apache.hadoop.hive.ql.exec.Operator;
 import org.apache.hadoop.hive.ql.exec.OperatorUtils;
 import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.reportStats;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats;
 import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
 import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector;
 import org.apache.hadoop.hive.ql.exec.tez.tools.InputMerger;
@@ -136,7 +136,7 @@ public class ReduceRecordProcessor  exte
     reducer.setParentOperators(null); // clear out any parents as reducer is the
     // root
     isTagged = redWork.getNeedsTagging();
-    vectorized = redWork.getVectorModeOn() != null;
+    vectorized = redWork.getVectorMode();
 
     try {
       keyTableDesc = redWork.getKeyDesc();
@@ -519,7 +519,7 @@ public class ReduceRecordProcessor  exte
           dummyOp.close(abort);
         }
       }
-      reportStats rps = new reportStats(reporter);
+      ReportStats rps = new ReportStats(reporter);
       reducer.preorderMap(rps);
 
     } catch (Exception e) {

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java Sun Aug 10 01:33:50 2014
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.serde2.obj
 import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
 import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
 import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
 import org.apache.hadoop.io.DataOutputBuffer;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.io.IntWritable;
@@ -123,6 +124,7 @@ public class VectorizedBatchUtil {
         case DOUBLE:
           cvList.add(new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE));
           break;
+        case BINARY:
         case STRING:
           cvList.add(new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE));
           break;
@@ -237,7 +239,7 @@ public class VectorizedBatchUtil {
       // float/double. String types have no default value for null.
       switch (poi.getPrimitiveCategory()) {
       case BOOLEAN: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
         if (writableCol != null) {
           lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
           lcv.isNull[rowIndex] = false;
@@ -248,7 +250,7 @@ public class VectorizedBatchUtil {
       }
         break;
       case BYTE: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
         if (writableCol != null) {
           lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
           lcv.isNull[rowIndex] = false;
@@ -259,7 +261,7 @@ public class VectorizedBatchUtil {
       }
         break;
       case SHORT: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
         if (writableCol != null) {
           lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
           lcv.isNull[rowIndex] = false;
@@ -270,7 +272,7 @@ public class VectorizedBatchUtil {
       }
         break;
       case INT: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
         if (writableCol != null) {
           lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
           lcv.isNull[rowIndex] = false;
@@ -281,7 +283,7 @@ public class VectorizedBatchUtil {
       }
         break;
       case LONG: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
         if (writableCol != null) {
           lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
           lcv.isNull[rowIndex] = false;
@@ -292,7 +294,7 @@ public class VectorizedBatchUtil {
       }
         break;
       case DATE: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
         if (writableCol != null) {
           lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays();
           lcv.isNull[rowIndex] = false;
@@ -303,7 +305,7 @@ public class VectorizedBatchUtil {
       }
         break;
       case FLOAT: {
-        DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off+i];
+        DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off + i];
         if (writableCol != null) {
           dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
           dcv.isNull[rowIndex] = false;
@@ -314,7 +316,7 @@ public class VectorizedBatchUtil {
       }
         break;
       case DOUBLE: {
-        DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off+i];
+        DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off + i];
         if (writableCol != null) {
           dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
           dcv.isNull[rowIndex] = false;
@@ -325,7 +327,7 @@ public class VectorizedBatchUtil {
       }
         break;
       case TIMESTAMP: {
-        LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+        LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
         if (writableCol != null) {
           Timestamp t = ((TimestampWritable) writableCol).getTimestamp();
           lcv.vector[rowIndex] = TimestampUtils.getTimeNanoSec(t);
@@ -336,8 +338,27 @@ public class VectorizedBatchUtil {
         }
       }
         break;
+      case BINARY: {
+        BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
+        if (writableCol != null) {
+            bcv.isNull[rowIndex] = false;
+            BytesWritable bw = (BytesWritable) writableCol;
+            byte[] bytes = bw.getBytes();
+            int start = buffer.getLength();
+            int length = bytes.length;
+            try {
+              buffer.write(bytes, 0, length);
+            } catch (IOException ioe) {
+              throw new IllegalStateException("bad write", ioe);
+            }
+            bcv.setRef(rowIndex, buffer.getData(), start, length);
+        } else {
+          setNullColIsNullValue(bcv, rowIndex);
+        }
+      }
+        break;
       case STRING: {
-        BytesColumnVector bcv = (BytesColumnVector) batch.cols[off+i];
+        BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
         if (writableCol != null) {
           bcv.isNull[rowIndex] = false;
           Text colText = (Text) writableCol;
@@ -355,7 +376,7 @@ public class VectorizedBatchUtil {
       }
         break;
       case DECIMAL:
-        DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[off+i];
+        DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[off + i];
         if (writableCol != null) {
           dcv.isNull[rowIndex] = false;
           HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol;

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java Sun Aug 10 01:33:50 2014
@@ -152,13 +152,20 @@ public class VectorizedColumnarSerDe ext
                 ByteBuffer b = Text.encode(String.valueOf(dcv.vector[rowIndex]));
                 serializeVectorStream.write(b.array(), 0, b.limit());
                 break;
-              case STRING:
+              case BINARY: {
+                BytesColumnVector bcv = (BytesColumnVector) batch.cols[k];
+                byte[] bytes = bcv.vector[rowIndex];
+                serializeVectorStream.write(bytes, 0, bytes.length);
+              }
+                break;
+              case STRING: {
                 BytesColumnVector bcv = (BytesColumnVector) batch.cols[k];
                 LazyUtils.writeEscaped(serializeVectorStream, bcv.vector[rowIndex],
                     bcv.start[rowIndex],
                     bcv.length[rowIndex],
                     serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams
                         .getNeedsEscape());
+              }
                 break;
               case TIMESTAMP:
                 LongColumnVector tcv = (LongColumnVector) batch.cols[k];

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java Sun Aug 10 01:33:50 2014
@@ -278,7 +278,7 @@ public class VectorizedRowBatchCtx {
         case PRIMITIVE: {
           PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
           // Vectorization currently only supports the following data types:
-          // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP,
+          // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, BINARY, STRING, TIMESTAMP,
           // DATE and DECIMAL
           switch (poi.getPrimitiveCategory()) {
           case BOOLEAN:
@@ -294,6 +294,7 @@ public class VectorizedRowBatchCtx {
           case DOUBLE:
             result.cols[j] = new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
             break;
+          case BINARY:
           case STRING:
             result.cols[j] = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
             break;
@@ -404,7 +405,7 @@ public class VectorizedRowBatchCtx {
             lcv.isNull[0] = true;
             lcv.isRepeating = true;
           } else { 
-            lcv.fill((Boolean)value == true ? 1 : 0);
+            lcv.fill((Boolean) value == true ? 1 : 0);
             lcv.isNull[0] = false;
           }
         }
@@ -417,7 +418,7 @@ public class VectorizedRowBatchCtx {
             lcv.isNull[0] = true;
             lcv.isRepeating = true;
           } else { 
-            lcv.fill((Byte)value);
+            lcv.fill((Byte) value);
             lcv.isNull[0] = false;
           }
         }
@@ -430,7 +431,7 @@ public class VectorizedRowBatchCtx {
             lcv.isNull[0] = true;
             lcv.isRepeating = true;
           } else { 
-            lcv.fill((Short)value);
+            lcv.fill((Short) value);
             lcv.isNull[0] = false;
           }
         }
@@ -443,7 +444,7 @@ public class VectorizedRowBatchCtx {
             lcv.isNull[0] = true;
             lcv.isRepeating = true;
           } else { 
-            lcv.fill((Integer)value);
+            lcv.fill((Integer) value);
             lcv.isNull[0] = false;
           }          
         }
@@ -456,7 +457,7 @@ public class VectorizedRowBatchCtx {
             lcv.isNull[0] = true;
             lcv.isRepeating = true;
           } else { 
-            lcv.fill((Long)value);
+            lcv.fill((Long) value);
             lcv.isNull[0] = false;
           }          
         }
@@ -469,7 +470,7 @@ public class VectorizedRowBatchCtx {
             lcv.isNull[0] = true;
             lcv.isRepeating = true;
           } else { 
-            lcv.fill(((Date)value).getTime());
+            lcv.fill(((Date) value).getTime());
             lcv.isNull[0] = false;
           }          
         }
@@ -521,17 +522,31 @@ public class VectorizedRowBatchCtx {
             dv.isNull[0] = true;
             dv.isRepeating = true;
           } else {
-            HiveDecimal hd = (HiveDecimal)(value);
-            dv.vector[0] = new Decimal128(hd.toString(), (short)hd.scale());
+            HiveDecimal hd = (HiveDecimal) value;
+            dv.vector[0] = new Decimal128(hd.toString(), (short) hd.scale());
             dv.isRepeating = true;
             dv.isNull[0] = false;      
           }
         }
         break;
-          
+
+        case BINARY: {
+            BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex];
+            byte[] bytes = (byte[]) value;
+            if (bytes == null) {
+              bcv.noNulls = false;
+              bcv.isNull[0] = true;
+              bcv.isRepeating = true;
+            } else {
+              bcv.fill(bytes);
+              bcv.isNull[0] = false;
+            }
+          }
+          break;
+
         case STRING: {
           BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex];
-          String sVal = (String)value;
+          String sVal = (String) value;
           if (sVal == null) {
             bcv.noNulls = false;
             bcv.isNull[0] = true;

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java Sun Aug 10 01:33:50 2014
@@ -23,11 +23,15 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
 import org.apache.hadoop.io.Text;
 
-import java.sql.Date;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.text.ParseException;
 
 public class VectorUDFDateString extends StringUnaryUDF {
   private static final long serialVersionUID = 1L;
 
+  private transient static SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+
   private static final Log LOG = LogFactory.getLog(
       VectorUDFDateString.class.getName());
 
@@ -41,13 +45,10 @@ public class VectorUDFDateString extends
           return null;
         }
         try {
-          Date date = Date.valueOf(s.toString());
-          t.set(date.toString());
+          Date date = formatter.parse(s.toString());
+          t.set(formatter.format(date));
           return t;
-        } catch (IllegalArgumentException e) {
-          if (LOG.isDebugEnabled()) {
-            LOG.info("VectorUDFDateString passed bad string for Date.valueOf '" + s.toString() + "'");
-          }
+        } catch (ParseException e) {
           return null;
         }
       }

Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Sun Aug 10 01:33:50 2014
@@ -910,8 +910,11 @@ class RecordReaderImpl implements Record
     private InStream stream;
     private IntegerReader lengths = null;
 
+    private final LongColumnVector scratchlcv;
+
     BinaryTreeReader(Path path, int columnId, Configuration conf) {
       super(path, columnId, conf);
+      scratchlcv = new LongColumnVector();
     }
 
     @Override
@@ -969,8 +972,18 @@ class RecordReaderImpl implements Record
 
     @Override
     Object nextVector(Object previousVector, long batchSize) throws IOException {
-      throw new UnsupportedOperationException(
-          "NextBatch is not supported operation for Binary type");
+      BytesColumnVector result = null;
+      if (previousVector == null) {
+        result = new BytesColumnVector();
+      } else {
+        result = (BytesColumnVector) previousVector;
+      }
+
+      // Read present/isNull stream
+      super.nextVector(result, batchSize);
+
+      BytesColumnVectorUtil.setRefToOrcByteArrays(stream, lengths, scratchlcv, result, batchSize);
+      return result;
     }
 
     @Override
@@ -1361,6 +1374,66 @@ class RecordReaderImpl implements Record
     }
   }
 
+  private static class BytesColumnVectorUtil {
+    // This method has the common code for reading in bytes into a BytesColumnVector.
+    // It is used by the BINARY, STRING, CHAR, VARCHAR types.
+    public static void setRefToOrcByteArrays(InStream stream, IntegerReader lengths, LongColumnVector scratchlcv,
+            BytesColumnVector result, long batchSize) throws IOException {
+
+      // Read lengths
+      scratchlcv.isNull = result.isNull;  // Notice we are replacing the isNull vector here...
+      lengths.nextVector(scratchlcv, batchSize);
+      int totalLength = 0;
+      if (!scratchlcv.isRepeating) {
+        for (int i = 0; i < batchSize; i++) {
+          if (!scratchlcv.isNull[i]) {
+            totalLength += (int) scratchlcv.vector[i];
+          }
+        }
+      } else {
+        if (!scratchlcv.isNull[0]) {
+          totalLength = (int) (batchSize * scratchlcv.vector[0]);
+        }
+      }
+
+      // Read all the strings for this batch
+      byte[] allBytes = new byte[totalLength];
+      int offset = 0;
+      int len = totalLength;
+      while (len > 0) {
+        int bytesRead = stream.read(allBytes, offset, len);
+        if (bytesRead < 0) {
+          throw new EOFException("Can't finish byte read from " + stream);
+        }
+        len -= bytesRead;
+        offset += bytesRead;
+      }
+
+      // Too expensive to figure out 'repeating' by comparisons.
+      result.isRepeating = false;
+      offset = 0;
+      if (!scratchlcv.isRepeating) {
+        for (int i = 0; i < batchSize; i++) {
+          if (!scratchlcv.isNull[i]) {
+            result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]);
+            offset += scratchlcv.vector[i];
+          } else {
+            result.setRef(i, allBytes, 0, 0);
+          }
+        }
+      } else {
+        for (int i = 0; i < batchSize; i++) {
+          if (!scratchlcv.isNull[i]) {
+            result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]);
+            offset += scratchlcv.vector[0];
+          } else {
+            result.setRef(i, allBytes, 0, 0);
+          }
+        }
+      }
+    }
+  }
+
   /**
    * A reader for string columns that are direct encoded in the current
    * stripe.
@@ -1443,57 +1516,7 @@ class RecordReaderImpl implements Record
       // Read present/isNull stream
       super.nextVector(result, batchSize);
 
-      // Read lengths
-      scratchlcv.isNull = result.isNull;
-      lengths.nextVector(scratchlcv, batchSize);
-      int totalLength = 0;
-      if (!scratchlcv.isRepeating) {
-        for (int i = 0; i < batchSize; i++) {
-          if (!scratchlcv.isNull[i]) {
-            totalLength += (int) scratchlcv.vector[i];
-          }
-        }
-      } else {
-        if (!scratchlcv.isNull[0]) {
-          totalLength = (int) (batchSize * scratchlcv.vector[0]);
-        }
-      }
-
-      //Read all the strings for this batch
-      byte[] allBytes = new byte[totalLength];
-      int offset = 0;
-      int len = totalLength;
-      while (len > 0) {
-        int bytesRead = stream.read(allBytes, offset, len);
-        if (bytesRead < 0) {
-          throw new EOFException("Can't finish byte read from " + stream);
-        }
-        len -= bytesRead;
-        offset += bytesRead;
-      }
-
-      // Too expensive to figure out 'repeating' by comparisons.
-      result.isRepeating = false;
-      offset = 0;
-      if (!scratchlcv.isRepeating) {
-        for (int i = 0; i < batchSize; i++) {
-          if (!scratchlcv.isNull[i]) {
-            result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]);
-            offset += scratchlcv.vector[i];
-          } else {
-            result.setRef(i, allBytes, 0, 0);
-          }
-        }
-      } else {
-        for (int i = 0; i < batchSize; i++) {
-          if (!scratchlcv.isNull[i]) {
-            result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]);
-            offset += scratchlcv.vector[0];
-          } else {
-            result.setRef(i, allBytes, 0, 0);
-          }
-        }
-      }
+      BytesColumnVectorUtil.setRefToOrcByteArrays(stream, lengths, scratchlcv, result, batchSize);
       return result;
     }