You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/08/05 09:23:07 UTC

svn commit: r1615872 [10/12] - in /hive/branches/cbo: ./ bin/ common/ common/src/java/org/apache/hadoop/hive/conf/ conf/ contrib/src/java/org/apache/hadoop/hive/contrib/metastore/hooks/ contrib/src/test/queries/clientnegative/ contrib/src/test/queries/...

Modified: hive/branches/cbo/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb (original)
+++ hive/branches/cbo/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb Tue Aug  5 07:23:02 2014
@@ -1008,6 +1008,26 @@ class ColumnStatistics
   ::Thrift::Struct.generate_accessors self
 end
 
+class AggrStats
+  include ::Thrift::Struct, ::Thrift::Struct_Union
+  COLSTATS = 1
+  PARTSFOUND = 2
+
+  FIELDS = {
+    COLSTATS => {:type => ::Thrift::Types::LIST, :name => 'colStats', :element => {:type => ::Thrift::Types::STRUCT, :class => ::ColumnStatisticsObj}},
+    PARTSFOUND => {:type => ::Thrift::Types::I64, :name => 'partsFound'}
+  }
+
+  def struct_fields; FIELDS; end
+
+  def validate
+    raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field colStats is unset!') unless @colStats
+    raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field partsFound is unset!') unless @partsFound
+  end
+
+  ::Thrift::Struct.generate_accessors self
+end
+
 class Schema
   include ::Thrift::Struct, ::Thrift::Struct_Union
   FIELDSCHEMAS = 1

Modified: hive/branches/cbo/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb (original)
+++ hive/branches/cbo/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb Tue Aug  5 07:23:02 2014
@@ -1231,6 +1231,23 @@ module ThriftHiveMetastore
       raise ::Thrift::ApplicationException.new(::Thrift::ApplicationException::MISSING_RESULT, 'get_partitions_statistics_req failed: unknown result')
     end
 
+    def get_aggr_stats_for(request)
+      send_get_aggr_stats_for(request)
+      return recv_get_aggr_stats_for()
+    end
+
+    def send_get_aggr_stats_for(request)
+      send_message('get_aggr_stats_for', Get_aggr_stats_for_args, :request => request)
+    end
+
+    def recv_get_aggr_stats_for()
+      result = receive_message(Get_aggr_stats_for_result)
+      return result.success unless result.success.nil?
+      raise result.o1 unless result.o1.nil?
+      raise result.o2 unless result.o2.nil?
+      raise ::Thrift::ApplicationException.new(::Thrift::ApplicationException::MISSING_RESULT, 'get_aggr_stats_for failed: unknown result')
+    end
+
     def delete_partition_column_statistics(db_name, tbl_name, part_name, col_name)
       send_delete_partition_column_statistics(db_name, tbl_name, part_name, col_name)
       return recv_delete_partition_column_statistics()
@@ -2824,6 +2841,19 @@ module ThriftHiveMetastore
       write_result(result, oprot, 'get_partitions_statistics_req', seqid)
     end
 
+    def process_get_aggr_stats_for(seqid, iprot, oprot)
+      args = read_args(iprot, Get_aggr_stats_for_args)
+      result = Get_aggr_stats_for_result.new()
+      begin
+        result.success = @handler.get_aggr_stats_for(args.request)
+      rescue ::NoSuchObjectException => o1
+        result.o1 = o1
+      rescue ::MetaException => o2
+        result.o2 = o2
+      end
+      write_result(result, oprot, 'get_aggr_stats_for', seqid)
+    end
+
     def process_delete_partition_column_statistics(seqid, iprot, oprot)
       args = read_args(iprot, Delete_partition_column_statistics_args)
       result = Delete_partition_column_statistics_result.new()
@@ -6077,6 +6107,42 @@ module ThriftHiveMetastore
     ::Thrift::Struct.generate_accessors self
   end
 
+  class Get_aggr_stats_for_args
+    include ::Thrift::Struct, ::Thrift::Struct_Union
+    REQUEST = 1
+
+    FIELDS = {
+      REQUEST => {:type => ::Thrift::Types::STRUCT, :name => 'request', :class => ::PartitionsStatsRequest}
+    }
+
+    def struct_fields; FIELDS; end
+
+    def validate
+    end
+
+    ::Thrift::Struct.generate_accessors self
+  end
+
+  class Get_aggr_stats_for_result
+    include ::Thrift::Struct, ::Thrift::Struct_Union
+    SUCCESS = 0
+    O1 = 1
+    O2 = 2
+
+    FIELDS = {
+      SUCCESS => {:type => ::Thrift::Types::STRUCT, :name => 'success', :class => ::AggrStats},
+      O1 => {:type => ::Thrift::Types::STRUCT, :name => 'o1', :class => ::NoSuchObjectException},
+      O2 => {:type => ::Thrift::Types::STRUCT, :name => 'o2', :class => ::MetaException}
+    }
+
+    def struct_fields; FIELDS; end
+
+    def validate
+    end
+
+    ::Thrift::Struct.generate_accessors self
+  end
+
   class Delete_partition_column_statistics_args
     include ::Thrift::Struct, ::Thrift::Struct_Union
     DB_NAME = 1

Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Tue Aug  5 07:23:02 2014
@@ -64,6 +64,7 @@ import org.apache.hadoop.hive.conf.HiveC
 import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
 import org.apache.hadoop.hive.metastore.api.AddPartitionsRequest;
 import org.apache.hadoop.hive.metastore.api.AddPartitionsResult;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -89,7 +90,6 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.GrantRevokePrivilegeResponse;
 import org.apache.hadoop.hive.metastore.api.GrantRevokeRoleRequest;
 import org.apache.hadoop.hive.metastore.api.GrantRevokeRoleResponse;
-import org.apache.hadoop.hive.metastore.api.GrantRevokeType;
 import org.apache.hadoop.hive.metastore.api.HeartbeatRequest;
 import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeRequest;
 import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse;
@@ -241,13 +241,12 @@ public class HiveMetaStore extends Thrif
   public static class HMSHandler extends FacebookBase implements
       IHMSHandler {
     public static final Log LOG = HiveMetaStore.LOG;
-    private static boolean createDefaultDB = false;
-    private static boolean defaultRolesCreated = false;
-    private static boolean adminUsersAdded = false;
     private String rawStoreClassName;
     private final HiveConf hiveConf; // stores datastore (jpox) properties,
                                      // right now they come from jpox.properties
 
+    private static String currentUrl;
+
     private Warehouse wh; // hdfs warehouse
     private final ThreadLocal<RawStore> threadLocalMS =
         new ThreadLocal<RawStore>() {
@@ -316,8 +315,6 @@ public class HiveMetaStore extends Thrif
           address, cmd).toString());
     }
 
-    // The next serial number to be assigned
-    private boolean checkForDefaultDb;
     private static int nextSerialNum = 0;
     private static ThreadLocal<Integer> threadLocalId = new ThreadLocal<Integer>() {
       @Override
@@ -350,10 +347,6 @@ public class HiveMetaStore extends Thrif
       return threadLocalId.get();
     }
 
-    public static void resetDefaultDBFlag() {
-      createDefaultDB = false;
-    }
-
     public HMSHandler(String name) throws MetaException {
       super(name);
       hiveConf = new HiveConf(this.getClass());
@@ -387,8 +380,6 @@ public class HiveMetaStore extends Thrif
 
     private boolean init() throws MetaException {
       rawStoreClassName = hiveConf.getVar(HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL);
-      checkForDefaultDb = hiveConf.getBoolean(
-          "hive.metastore.checkForDefaultDb", true);
       initListeners = MetaStoreUtils.getMetaStoreListeners(
           MetaStoreInitListener.class, hiveConf,
           hiveConf.getVar(HiveConf.ConfVars.METASTORE_INIT_HOOKS));
@@ -404,9 +395,12 @@ public class HiveMetaStore extends Thrif
       wh = new Warehouse(hiveConf);
 
       synchronized (HMSHandler.class) {
-        createDefaultDB();
-        createDefaultRoles();
-        addAdminUsers();
+        if (currentUrl == null || !currentUrl.equals(MetaStoreInit.getConnectionURL(hiveConf))) {
+          createDefaultDB();
+          createDefaultRoles();
+          addAdminUsers();
+          currentUrl = MetaStoreInit.getConnectionURL(hiveConf);
+        }
       }
 
       if (hiveConf.getBoolean("hive.metastore.metrics.enabled", false)) {
@@ -517,7 +511,6 @@ public class HiveMetaStore extends Thrif
         db.setOwnerType(PrincipalType.ROLE);
         ms.createDatabase(db);
       }
-      HMSHandler.createDefaultDB = true;
     }
 
     /**
@@ -526,9 +519,6 @@ public class HiveMetaStore extends Thrif
      * @throws MetaException
      */
     private void createDefaultDB() throws MetaException {
-      if (HMSHandler.createDefaultDB || !checkForDefaultDb) {
-        return;
-      }
       try {
         createDefaultDB_core(getMS());
       } catch (InvalidObjectException e) {
@@ -541,11 +531,6 @@ public class HiveMetaStore extends Thrif
 
     private void createDefaultRoles() throws MetaException {
 
-      if(defaultRolesCreated) {
-        LOG.debug("Admin role already created previously.");
-        return;
-      }
-
       RawStore ms = getMS();
       try {
         ms.addRole(ADMIN, ADMIN);
@@ -579,16 +564,10 @@ public class HiveMetaStore extends Thrif
         // Unlikely to be thrown.
         LOG.warn("Failed while granting global privs to admin", e);
       }
-
-      defaultRolesCreated = true;
     }
 
     private void addAdminUsers() throws MetaException {
 
-      if(adminUsersAdded) {
-        LOG.debug("Admin users already added.");
-        return;
-      }
       // now add pre-configured users to admin role
       String userStr = HiveConf.getVar(hiveConf,ConfVars.USERS_IN_ADMIN_ROLE,"").trim();
       if (userStr.isEmpty()) {
@@ -623,7 +602,6 @@ public class HiveMetaStore extends Thrif
           LOG.debug(userName + " already in admin role", e);
         }
       }
-      adminUsersAdded = true;
     }
 
     private void logInfo(String m) {
@@ -4031,7 +4009,7 @@ public class HiveMetaStore extends Thrif
       incrementCounter("drop_role");
       firePreEvent(new PreAuthorizationCallEvent(this));
       if (ADMIN.equals(roleName) || PUBLIC.equals(roleName)) {
-        throw new MetaException(PUBLIC + "/" + ADMIN +" role can't be dropped.");
+        throw new MetaException(PUBLIC + "," + ADMIN + " roles can't be dropped.");
       }
       Boolean ret = null;
       try {
@@ -4101,6 +4079,7 @@ public class HiveMetaStore extends Thrif
       return ret;
     }
 
+    @Override
     public GrantRevokeRoleResponse grant_revoke_role(GrantRevokeRoleRequest request)
         throws MetaException, org.apache.thrift.TException {
       GrantRevokeRoleResponse response = new GrantRevokeRoleResponse();
@@ -4998,6 +4977,25 @@ public class HiveMetaStore extends Thrif
       return rolePrinGrantList;
     }
 
+    @Override
+    public AggrStats get_aggr_stats_for(PartitionsStatsRequest request)
+        throws NoSuchObjectException, MetaException, TException {
+      startFunction("get_aggr_stats_for: db=" + request.getDbName() + " table=" + request.getTblName());
+      AggrStats aggrStats = null;
+      try {
+        //TODO: We are setting partitionCnt for which we were able to retrieve stats same as
+        // incoming number from request. This is not correct, but currently no users of this api
+        // rely on this. Only, current user StatsAnnotation don't care for it. StatsOptimizer
+        // will care for it, so before StatsOptimizer begin using it, we need to fix this.
+        aggrStats = new AggrStats(getMS().get_aggr_stats_for(request.getDbName(),
+          request.getTblName(), request.getPartNames(), request.getColNames()), request.getPartNames().size());
+        return aggrStats;
+      } finally {
+          endFunction("get_partitions_statistics_req: ", aggrStats == null, null, request.getTblName());
+      }
+
+    }
+
   }
 
   public static IHMSHandler newHMSHandler(String name, HiveConf hiveConf) throws MetaException {

Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java Tue Aug  5 07:23:02 2014
@@ -34,6 +34,7 @@ import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
+import java.util.HashMap;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -51,6 +52,7 @@ import org.apache.hadoop.hive.conf.HiveC
 import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
 import org.apache.hadoop.hive.metastore.api.AddPartitionsRequest;
 import org.apache.hadoop.hive.metastore.api.AddPartitionsResult;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -144,6 +146,8 @@ public class HiveMetaStoreClient impleme
   private String tokenStrForm;
   private final boolean localMetaStore;
 
+  private Map<String, String> currentMetaVars;
+
   // for thrift connects
   private int retries = 5;
   private int retryDelaySeconds = 0;
@@ -171,6 +175,7 @@ public class HiveMetaStoreClient impleme
       // through the network
       client = HiveMetaStore.newHMSHandler("hive client", conf);
       isConnected = true;
+      snapshotActiveConf();
       return;
     }
 
@@ -231,6 +236,26 @@ public class HiveMetaStoreClient impleme
   }
 
   @Override
+  public boolean isCompatibleWith(HiveConf conf) {
+    if (currentMetaVars == null) {
+      return false; // recreate
+    }
+    boolean compatible = true;
+    for (ConfVars oneVar : HiveConf.metaVars) {
+      // Since metaVars are all of different types, use string for comparison
+      String oldVar = currentMetaVars.get(oneVar.varname);
+      String newVar = conf.get(oneVar.varname, "");
+      if (oldVar == null ||
+          (oneVar.isCaseSensitive() ? !oldVar.equals(newVar) : !oldVar.equalsIgnoreCase(newVar))) {
+        LOG.info("Mestastore configuration " + oneVar.varname +
+            " changed from " + oldVar + " to " + newVar);
+        compatible = false;
+      }
+    }
+    return compatible;
+  }
+
+  @Override
   public void reconnect() throws MetaException {
     if (localMetaStore) {
       // For direct DB connections we don't yet support reestablishing connections.
@@ -383,9 +408,19 @@ public class HiveMetaStoreClient impleme
       throw new MetaException("Could not connect to meta store using any of the URIs provided." +
         " Most recent failure: " + StringUtils.stringifyException(tte));
     }
+
+    snapshotActiveConf();
+
     LOG.info("Connected to metastore.");
   }
 
+  private void snapshotActiveConf() {
+    currentMetaVars = new HashMap<String, String>(HiveConf.metaVars.length);
+    for (ConfVars oneVar : HiveConf.metaVars) {
+      currentMetaVars.put(oneVar.varname, conf.get(oneVar.varname, ""));
+    }
+  }
+
   public String getTokenStrForm() throws IOException {
     return tokenStrForm;
    }
@@ -393,6 +428,7 @@ public class HiveMetaStoreClient impleme
   @Override
   public void close() {
     isConnected = false;
+    currentMetaVars = null;
     try {
       if (null != client) {
         client.shutdown();
@@ -1785,4 +1821,11 @@ public class HiveMetaStoreClient impleme
       NoSuchObjectException, UnsupportedOperationException {
     client.drop_table_with_environment_context(dbname, name, deleteData, envContext);
   }
+
+  @Override
+  public AggrStats getAggrColStatsFor(String dbName, String tblName,
+    List<String> colNames, List<String> partNames) throws NoSuchObjectException, MetaException, TException {
+    PartitionsStatsRequest req = new PartitionsStatsRequest(dbName, tblName, colNames, partNames);
+    return client.get_aggr_stats_for(req);
+  }
 }

Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java Tue Aug  5 07:23:02 2014
@@ -19,9 +19,9 @@
 package org.apache.hadoop.hive.metastore;
 
 import org.apache.hadoop.hive.common.ValidTxnList;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.CompactionType;
 import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse;
 import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse;
 import org.apache.hadoop.hive.metastore.api.LockRequest;
 import org.apache.hadoop.hive.metastore.api.LockResponse;
@@ -38,22 +38,18 @@ import java.util.List;
 import java.util.Map;
 
 import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.CompactionType;
 import org.apache.hadoop.hive.metastore.api.ConfigValSecurityException;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Function;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse;
 import org.apache.hadoop.hive.metastore.api.GetPrincipalsInRoleRequest;
 import org.apache.hadoop.hive.metastore.api.GetPrincipalsInRoleResponse;
 import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalRequest;
 import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalResponse;
-import org.apache.hadoop.hive.metastore.api.GrantRevokePrivilegeRequest;
-import org.apache.hadoop.hive.metastore.api.GrantRevokePrivilegeResponse;
 import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
 import org.apache.hadoop.hive.metastore.api.HiveObjectRef;
 import org.apache.hadoop.hive.metastore.api.Index;
@@ -61,28 +57,18 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
 import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
 import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
-import org.apache.hadoop.hive.metastore.api.LockRequest;
-import org.apache.hadoop.hive.metastore.api.LockResponse;
 import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
-import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
-import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.PartitionEventType;
 import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
 import org.apache.hadoop.hive.metastore.api.PrincipalType;
 import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
 import org.apache.hadoop.hive.metastore.api.Role;
-import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
-import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
 import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
-import org.apache.hadoop.hive.metastore.api.TxnOpenException;
 import org.apache.hadoop.hive.metastore.api.UnknownDBException;
 import org.apache.hadoop.hive.metastore.api.UnknownPartitionException;
 import org.apache.hadoop.hive.metastore.api.UnknownTableException;
-import org.apache.thrift.TException;
 
 /**
  * TODO Unnecessary when the server sides for both dbstore and filestore are
@@ -91,6 +77,12 @@ import org.apache.thrift.TException;
 public interface IMetaStoreClient {
 
   /**
+   * Returns whether current client is convertible with conf or not
+   * @return
+   */
+  public boolean isCompatibleWith(HiveConf conf);
+
+  /**
    *  Tries to reconnect this MetaStoreClient to the MetaStore.
    */
   public void reconnect() throws MetaException;
@@ -1299,4 +1291,7 @@ public interface IMetaStoreClient {
    */
   GetRoleGrantsForPrincipalResponse get_role_grants_for_principal(
       GetRoleGrantsForPrincipalRequest getRolePrincReq) throws MetaException, TException;
+
+  public AggrStats getAggrColStatsFor(String dbName, String tblName,
+      List<String> colNames, List<String> partName)  throws NoSuchObjectException, MetaException, TException;
 }

Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java Tue Aug  5 07:23:02 2014
@@ -427,6 +427,7 @@ class MetaStoreDirectSql {
         + " where \"PART_ID\" in (" + partIds + ") and \"PARAM_KEY\" is not null"
         + " order by \"PART_ID\" asc";
     loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {
+      @Override
       public void apply(Partition t, Object[] fields) {
         t.putToParameters((String)fields[1], (String)fields[2]);
       }});
@@ -435,6 +436,7 @@ class MetaStoreDirectSql {
         + " where \"PART_ID\" in (" + partIds + ") and \"INTEGER_IDX\" >= 0"
         + " order by \"PART_ID\" asc, \"INTEGER_IDX\" asc";
     loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {
+      @Override
       public void apply(Partition t, Object[] fields) {
         t.addToValues((String)fields[1]);
       }});
@@ -452,6 +454,7 @@ class MetaStoreDirectSql {
         + " where \"SD_ID\" in (" + sdIds + ") and \"PARAM_KEY\" is not null"
         + " order by \"SD_ID\" asc";
     loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+      @Override
       public void apply(StorageDescriptor t, Object[] fields) {
         t.putToParameters((String)fields[1], (String)fields[2]);
       }});
@@ -460,6 +463,7 @@ class MetaStoreDirectSql {
         + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0"
         + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
     loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+      @Override
       public void apply(StorageDescriptor t, Object[] fields) {
         if (fields[2] == null) return;
         t.addToSortCols(new Order((String)fields[1], extractSqlInt(fields[2])));
@@ -469,6 +473,7 @@ class MetaStoreDirectSql {
         + " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0"
         + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
     loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+      @Override
       public void apply(StorageDescriptor t, Object[] fields) {
         t.addToBucketCols((String)fields[1]);
       }});
@@ -479,6 +484,7 @@ class MetaStoreDirectSql {
         + " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
     boolean hasSkewedColumns =
       loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+        @Override
         public void apply(StorageDescriptor t, Object[] fields) {
           if (!t.isSetSkewedInfo()) t.setSkewedInfo(new SkewedInfo());
           t.getSkewedInfo().addToSkewedColNames((String)fields[1]);
@@ -502,6 +508,7 @@ class MetaStoreDirectSql {
       loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
         private Long currentListId;
         private List<String> currentList;
+        @Override
         public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
           if (!t.isSetSkewedInfo()) t.setSkewedInfo(new SkewedInfo());
           // Note that this is not a typical list accumulator - there's no call to finalize
@@ -539,6 +546,7 @@ class MetaStoreDirectSql {
       loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
         private Long currentListId;
         private List<String> currentList;
+        @Override
         public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
           if (!t.isSetSkewedInfo()) {
             SkewedInfo skewedInfo = new SkewedInfo();
@@ -572,6 +580,7 @@ class MetaStoreDirectSql {
           + " from \"COLUMNS_V2\" where \"CD_ID\" in (" + colIds + ") and \"INTEGER_IDX\" >= 0"
           + " order by \"CD_ID\" asc, \"INTEGER_IDX\" asc";
       loopJoinOrderedResult(colss, queryText, 0, new ApplyFunc<List<FieldSchema>>() {
+        @Override
         public void apply(List<FieldSchema> t, Object[] fields) {
           t.add(new FieldSchema((String)fields[2], (String)fields[3], (String)fields[1]));
         }});
@@ -582,6 +591,7 @@ class MetaStoreDirectSql {
         + " where \"SERDE_ID\" in (" + serdeIds + ") and \"PARAM_KEY\" is not null"
         + " order by \"SERDE_ID\" asc";
     loopJoinOrderedResult(serdes, queryText, 0, new ApplyFunc<SerDeInfo>() {
+      @Override
       public void apply(SerDeInfo t, Object[] fields) {
         t.putToParameters((String)fields[1], (String)fields[2]);
       }});
@@ -891,19 +901,49 @@ class MetaStoreDirectSql {
     return result;
   }
 
-  public List<ColumnStatistics> getPartitionStats(String dbName, String tableName,
+  public List<ColumnStatisticsObj> aggrColStatsForPartitions(String dbName, String tableName,
       List<String> partNames, List<String> colNames) throws MetaException {
-    if (colNames.isEmpty() || partNames.isEmpty()) {
-      return Lists.newArrayList();
-    }
-    boolean doTrace = LOG.isDebugEnabled();
-    long start = doTrace ? System.nanoTime() : 0;
-    String queryText = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from \"PART_COL_STATS\""
+    String qText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", "
+      + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), "
+      + "min(\"BIG_DECIMAL_LOW_VALUE\"), max(\"BIG_DECIMAL_HIGH_VALUE\"), sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), "
+      + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\") from \"PART_COL_STATS\""
       + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in ("
       + makeParams(colNames.size()) + ") AND \"PARTITION_NAME\" in ("
-      + makeParams(partNames.size()) + ") order by \"PARTITION_NAME\"";
+      + makeParams(partNames.size()) + ") group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
+
+    boolean doTrace = LOG.isDebugEnabled();
+    long start = doTrace ? System.nanoTime() : 0;
+    Query query = pm.newQuery("javax.jdo.query.SQL", qText);
+    Object qResult = query.executeWithArray(prepareParams(dbName, tableName, partNames, colNames));
+    if (qResult == null) {
+      query.closeAll();
+      return Lists.newArrayList();
+    }
+    List<Object[]> list = ensureList(qResult);
+    List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(list.size());
+    for (Object[] row : list) {
+      colStats.add(prepareCSObj(row,0));
+    }
+    long end = doTrace ? System.nanoTime() : 0;
+    timingTrace(doTrace, qText, start, end);
+    query.closeAll();
+    return colStats;
+  }
+
+  private ColumnStatisticsObj prepareCSObj (Object[] row, int i) {
+    ColumnStatisticsData data = new ColumnStatisticsData();
+    ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data);
+    Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
+        declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
+        avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++];
+    StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
+        llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses);
+    return cso;
+  }
+
+  private Object[] prepareParams(String dbName, String tableName, List<String> partNames,
+    List<String> colNames) throws MetaException {
 
-    Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
     Object[] params = new Object[colNames.size() + partNames.size() + 2];
     int paramI = 0;
     params[paramI++] = dbName;
@@ -914,7 +954,24 @@ class MetaStoreDirectSql {
     for (String partName : partNames) {
       params[paramI++] = partName;
     }
-    Object qResult = query.executeWithArray(params);
+
+    return params;
+  }
+
+  public List<ColumnStatistics> getPartitionStats(String dbName, String tableName,
+      List<String> partNames, List<String> colNames) throws MetaException {
+    if (colNames.isEmpty() || partNames.isEmpty()) {
+      return Lists.newArrayList();
+    }
+    boolean doTrace = LOG.isDebugEnabled();
+    long start = doTrace ? System.nanoTime() : 0;
+    String queryText = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from \"PART_COL_STATS\""
+      + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in ("
+      + makeParams(colNames.size()) + ") AND \"PARTITION_NAME\" in ("
+      + makeParams(partNames.size()) + ") order by \"PARTITION_NAME\"";
+
+    Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
+    Object qResult = query.executeWithArray(prepareParams(dbName, tableName, partNames, colNames));
     long queryTime = doTrace ? System.nanoTime() : 0;
     if (qResult == null) {
       query.closeAll();
@@ -963,16 +1020,7 @@ class MetaStoreDirectSql {
       if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > extractSqlLong(laObj))) {
         csd.setLastAnalyzed(extractSqlLong(laObj));
       }
-      ColumnStatisticsData data = new ColumnStatisticsData();
-      // see STATS_COLLIST
-      int i = offset;
-      ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data);
-      Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
-          declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
-          avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++];
-      StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
-          llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses);
-      csos.add(cso);
+      csos.add(prepareCSObj(row, offset));
     }
     result.setStatsObj(csos);
     return result;

Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Tue Aug  5 07:23:02 2014
@@ -5901,6 +5901,29 @@ public class ObjectStore implements RawS
     }.run(true);
   }
 
+
+  @Override
+  public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName, String tblName,
+      final List<String> partNames, final List<String> colNames) throws MetaException, NoSuchObjectException {
+
+    return new GetListHelper<ColumnStatisticsObj>(dbName, tblName, true, false) {
+      @Override
+      protected List<ColumnStatisticsObj> getSqlResult(
+          GetHelper<List<ColumnStatisticsObj>> ctx) throws MetaException {
+        return directSql.aggrColStatsForPartitions(dbName, tblName, partNames, colNames);
+      }
+
+      @Override
+      protected List<ColumnStatisticsObj> getJdoResult(
+          GetHelper<List<ColumnStatisticsObj>> ctx) throws MetaException,
+          NoSuchObjectException {
+        // This is fast path for query optimizations, if we can find this info quickly using
+        // directSql, do it. No point in failing back to slow path here.
+        throw new MetaException("Jdo path is not implemented for stats aggr.");
+      }
+      }.run(true);
+  }
+
   private List<MPartitionColumnStatistics> getMPartitionColumnStatistics(
       Table table, List<String> partNames, List<String> colNames)
           throws NoSuchObjectException, MetaException {
@@ -6747,5 +6770,4 @@ public class ObjectStore implements RawS
     }
     return funcs;
   }
-
 }

Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java Tue Aug  5 07:23:02 2014
@@ -27,6 +27,7 @@ import java.util.Map;
 
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.Function;
 import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -547,4 +548,6 @@ public interface RawStore extends Config
    */
   public List<String> getFunctions(String dbName, String pattern) throws MetaException;
 
+  public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName, String tblName,
+    List<String> partNames, List<String> colNames) throws MetaException, NoSuchObjectException;
 }

Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStoreProxy.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStoreProxy.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStoreProxy.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStoreProxy.java Tue Aug  5 07:23:02 2014
@@ -26,8 +26,6 @@ import java.lang.reflect.UndeclaredThrow
 import java.util.List;
 
 import org.apache.commons.lang.ClassUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.classification.InterfaceAudience;
 import org.apache.hadoop.hive.common.classification.InterfaceStability;
@@ -55,7 +53,7 @@ public class RawStoreProxy implements In
     // This has to be called before initializing the instance of RawStore
     init();
 
-    this.base = (RawStore) ReflectionUtils.newInstance(rawStoreClass, conf);
+    this.base = ReflectionUtils.newInstance(rawStoreClass, conf);
   }
 
   public static RawStore getProxy(HiveConf hiveConf, Configuration conf, String rawStoreClassName,
@@ -96,14 +94,6 @@ public class RawStoreProxy implements In
   public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
     Object ret = null;
 
-    boolean reloadConf = HiveConf.getBoolVar(hiveConf,
-        HiveConf.ConfVars.METASTOREFORCERELOADCONF);
-
-    if (reloadConf) {
-      MetaStoreInit.updateConnectionURL(hiveConf, getConf(), null, metaStoreInitData);
-      initMS();
-    }
-
     try {
       ret = method.invoke(base, args);
     } catch (UndeclaredThrowableException e) {

Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java Tue Aug  5 07:23:02 2014
@@ -17,17 +17,24 @@
  */
 package org.apache.hadoop.hive.metastore.txn;
 
-import com.jolbox.bonecp.BoneCP;
 import com.jolbox.bonecp.BoneCPConfig;
+import com.jolbox.bonecp.BoneCPDataSource;
+import org.apache.commons.dbcp.ConnectionFactory;
+import org.apache.commons.dbcp.DriverManagerConnectionFactory;
+import org.apache.commons.dbcp.PoolableConnectionFactory;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.commons.dbcp.PoolingDataSource;
 
+import org.apache.commons.pool.ObjectPool;
+import org.apache.commons.pool.impl.GenericObjectPool;
 import org.apache.hadoop.hive.common.ValidTxnList;
 import org.apache.hadoop.hive.common.ValidTxnListImpl;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.*;
 import org.apache.hadoop.util.StringUtils;
 
+import javax.sql.DataSource;
 import java.sql.*;
 import java.util.*;
 
@@ -65,9 +72,9 @@ public class TxnHandler {
   static final private int ALLOWED_REPEATED_DEADLOCKS = 5;
   static final private Log LOG = LogFactory.getLog(TxnHandler.class.getName());
 
-  static private BoneCP connPool;
-  private static final Boolean lockLock = new Boolean("true"); // Random object to lock on for the
-  // lock method
+  static private DataSource connPool;
+  private static Boolean lockLock = new Boolean("true"); // Random object to lock on for the lock
+  // method
 
   /**
    * Number of consecutive deadlocks we have seen
@@ -1596,14 +1603,28 @@ public class TxnHandler {
     String driverUrl = HiveConf.getVar(conf, HiveConf.ConfVars.METASTORECONNECTURLKEY);
     String user = HiveConf.getVar(conf, HiveConf.ConfVars.METASTORE_CONNECTION_USER_NAME);
     String passwd = HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREPWD);
+    String connectionPooler = HiveConf.getVar(conf,
+        HiveConf.ConfVars.METASTORE_CONNECTION_POOLING_TYPE).toLowerCase();
 
-    BoneCPConfig config = new BoneCPConfig();
-    config.setJdbcUrl(driverUrl);
-    config.setMaxConnectionsPerPartition(10);
-    config.setPartitionCount(1);
-    config.setUser(user);
-    config.setPassword(passwd);
-    connPool = new BoneCP(config);
+    if ("bonecp".equals(connectionPooler)) {
+      BoneCPConfig config = new BoneCPConfig();
+      config.setJdbcUrl(driverUrl);
+      config.setMaxConnectionsPerPartition(10);
+      config.setPartitionCount(1);
+      config.setUser(user);
+      config.setPassword(passwd);
+      connPool = new BoneCPDataSource(config);
+    } else if ("dbcp".equals(connectionPooler)) {
+      ObjectPool objectPool = new GenericObjectPool();
+      ConnectionFactory connFactory = new DriverManagerConnectionFactory(driverUrl, user, passwd);
+      // This doesn't get used, but it's still necessary, see
+      // http://svn.apache.org/viewvc/commons/proper/dbcp/branches/DBCP_1_4_x_BRANCH/doc/ManualPoolingDataSourceExample.java?view=markup
+      PoolableConnectionFactory poolConnFactory =
+          new PoolableConnectionFactory(connFactory, objectPool, null, null, false, true);
+      connPool = new PoolingDataSource(objectPool);
+    } else {
+      throw new RuntimeException("Unknown JDBC connection pooling " + connectionPooler);
+    }
   }
 
  private static synchronized void buildJumpTable() {

Modified: hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java (original)
+++ hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java Tue Aug  5 07:23:02 2014
@@ -25,6 +25,7 @@ import java.util.Map;
 import org.apache.hadoop.conf.Configurable;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.Function;
 import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -36,6 +37,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.PartitionEventType;
+import org.apache.hadoop.hive.metastore.api.PartitionsStatsRequest;
 import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
 import org.apache.hadoop.hive.metastore.api.PrincipalType;
 import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
@@ -709,5 +711,12 @@ public class DummyRawStoreControlledComm
     return objectStore.getFunctions(dbName, pattern);
   }
 
+  @Override
+  public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName,
+      String tblName, List<String> partNames, List<String> colNames)
+      throws MetaException {
+    return null;
+  }
+
 
 }

Modified: hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java (original)
+++ hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java Tue Aug  5 07:23:02 2014
@@ -26,6 +26,7 @@ import junit.framework.Assert;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
 import org.apache.hadoop.hive.metastore.api.Database;
 import org.apache.hadoop.hive.metastore.api.Function;
 import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -37,6 +38,7 @@ import org.apache.hadoop.hive.metastore.
 import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
 import org.apache.hadoop.hive.metastore.api.Partition;
 import org.apache.hadoop.hive.metastore.api.PartitionEventType;
+import org.apache.hadoop.hive.metastore.api.PartitionsStatsRequest;
 import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
 import org.apache.hadoop.hive.metastore.api.PrincipalType;
 import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
@@ -726,7 +728,12 @@ public class DummyRawStoreForJdoConnecti
     return null;
   }
 
-
+  @Override
+  public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName,
+      String tblName, List<String> partNames, List<String> colNames)
+      throws MetaException {
+    return null;
+  }
 }
 
 

Modified: hive/branches/cbo/pom.xml
URL: http://svn.apache.org/viewvc/hive/branches/cbo/pom.xml?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/pom.xml (original)
+++ hive/branches/cbo/pom.xml Tue Aug  5 07:23:02 2014
@@ -104,6 +104,8 @@
     <commons-lang.version>2.4</commons-lang.version>
     <commons-lang3.version>3.1</commons-lang3.version>
     <commons-logging.version>1.1.3</commons-logging.version>
+    <commons-pool.version>1.5.4</commons-pool.version>
+    <commons-dbcp.version>1.4</commons-dbcp.version>
     <derby.version>10.10.1.1</derby.version>
     <guava.version>11.0.2</guava.version>
     <groovy.version>2.1.6</groovy.version>
@@ -111,8 +113,8 @@
     <hadoop-20S.version>1.2.1</hadoop-20S.version>
     <hadoop-23.version>2.4.0</hadoop-23.version>
     <hadoop.bin.path>${basedir}/${hive.path.to.root}/testutils/hadoop</hadoop.bin.path>
-    <hbase.hadoop1.version>0.96.0-hadoop1</hbase.hadoop1.version>
-    <hbase.hadoop2.version>0.96.0-hadoop2</hbase.hadoop2.version>
+    <hbase.hadoop1.version>0.98.3-hadoop1</hbase.hadoop1.version>
+    <hbase.hadoop2.version>0.98.3-hadoop2</hbase.hadoop2.version>
     <!-- httpcomponents are not always in version sync -->
     <httpcomponents.client.version>4.2.5</httpcomponents.client.version>
     <httpcomponents.core.version>4.2.5</httpcomponents.core.version>
@@ -772,7 +774,7 @@
             <test.warehouse.dir>${test.warehouse.scheme}${test.warehouse.dir}</test.warehouse.dir>
             <java.net.preferIPv4Stack>true</java.net.preferIPv4Stack>
             <!-- EnforceReadOnlyTables hook and QTestUtil -->
-            <test.src.tables>src,src1,srcbucket,srcbucket2,src_json,src_thrift,src_sequencefile,srcpart,alltypesorc</test.src.tables>
+            <test.src.tables>src,src1,srcbucket,srcbucket2,src_json,src_thrift,src_sequencefile,srcpart,alltypesorc,src_hbase</test.src.tables>
             <java.security.krb5.conf>${test.tmp.dir}/conf/krb5.conf</java.security.krb5.conf>
           </systemPropertyVariables>
         </configuration>

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Context.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Context.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Context.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Context.java Tue Aug  5 07:23:02 2014
@@ -39,7 +39,6 @@ import org.apache.hadoop.hive.ql.lockmgr
 import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
 import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.security.UserGroupInformation;
 import org.apache.hadoop.util.StringUtils;
 
 import java.io.DataInput;
@@ -54,8 +53,6 @@ import java.util.Map;
 import java.util.Random;
 import java.util.concurrent.ConcurrentHashMap;
 
-import javax.security.auth.login.LoginException;
-
 /**
  * Context for Semantic Analyzers. Usage: not reusable - construct a new one for
  * each query should call clear() at end of use to remove temporary folders
@@ -337,7 +334,14 @@ public class Context {
    *          external URI to which the tmp data has to be eventually moved
    * @return next available tmp path on the file system corresponding extURI
    */
-  public Path getExternalTmpPath(URI extURI) {
+  public Path getExternalTmpPath(Path path) {
+    URI extURI = path.toUri();
+    if (extURI.getScheme().equals("viewfs")) {
+      // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/..
+      // to final /user/hive/warehouse/ will fail later, so instead pick tmp dir
+      // on same namespace as tbl dir.
+      return getExtTmpPathRelTo(path.getParent());
+    }
     return new Path(getExternalScratchDir(extURI), EXT_PREFIX +
       nextPathId());
   }
@@ -347,7 +351,8 @@ public class Context {
    * within passed in uri, whereas getExternalTmpPath() ignores passed in path and returns temp
    * path within /tmp
    */
-  public Path getExtTmpPathRelTo(URI uri) {
+  public Path getExtTmpPathRelTo(Path path) {
+    URI uri = path.toUri();
     return new Path (getScratchDir(uri.getScheme(), uri.getAuthority(), !explain, 
     uri.getPath() + Path.SEPARATOR + "_" + this.executionId), EXT_PREFIX + nextPathId());
   }

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Tue Aug  5 07:23:02 2014
@@ -82,6 +82,7 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
 import org.apache.hadoop.hive.ql.parse.ASTNode;
 import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo;
 import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook;
 import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
 import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl;
@@ -503,8 +504,13 @@ public class Driver implements CommandPr
     Hive db = sem.getDb();
 
     if (ss.isAuthorizationModeV2()) {
-      doAuthorizationV2(ss, op, inputs, outputs, command);
-      return;
+      // get mapping of tables to columns used
+      ColumnAccessInfo colAccessInfo = sem.getColumnAccessInfo();
+      // colAccessInfo is set only in case of SemanticAnalyzer
+      Map<String, Set<String>> tab2Cols = colAccessInfo != null ? colAccessInfo
+          .getTableToColumnAccessMap() : null;
+      doAuthorizationV2(ss, op, inputs, outputs, command, tab2Cols);
+     return;
     }
     if (op == null) {
       throw new HiveException("Operation should not be null");
@@ -583,56 +589,9 @@ public class Driver implements CommandPr
         }
       }
 
-      //for a select or create-as-select query, populate the partition to column (par2Cols) or
-      // table to columns mapping (tab2Cols)
-      if (op.equals(HiveOperation.CREATETABLE_AS_SELECT)
-          || op.equals(HiveOperation.QUERY)) {
-        SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
-        ParseContext parseCtx = querySem.getParseContext();
-        Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();
-
-        for (Map.Entry<String, Operator<? extends OperatorDesc>> topOpMap : querySem
-            .getParseContext().getTopOps().entrySet()) {
-          Operator<? extends OperatorDesc> topOp = topOpMap.getValue();
-          if (topOp instanceof TableScanOperator
-              && tsoTopMap.containsKey(topOp)) {
-            TableScanOperator tableScanOp = (TableScanOperator) topOp;
-            Table tbl = tsoTopMap.get(tableScanOp);
-            List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
-            List<FieldSchema> columns = tbl.getCols();
-            List<String> cols = new ArrayList<String>();
-            for (int i = 0; i < neededColumnIds.size(); i++) {
-              cols.add(columns.get(neededColumnIds.get(i)).getName());
-            }
-            //map may not contain all sources, since input list may have been optimized out
-            //or non-existent tho such sources may still be referenced by the TableScanOperator
-            //if it's null then the partition probably doesn't exist so let's use table permission
-            if (tbl.isPartitioned() &&
-                tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
-              String alias_id = topOpMap.getKey();
-
-              PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp,
-                  parseCtx, alias_id);
-              Set<Partition> parts = partsList.getPartitions();
-              for (Partition part : parts) {
-                List<String> existingCols = part2Cols.get(part);
-                if (existingCols == null) {
-                  existingCols = new ArrayList<String>();
-                }
-                existingCols.addAll(cols);
-                part2Cols.put(part, existingCols);
-              }
-            } else {
-              List<String> existingCols = tab2Cols.get(tbl);
-              if (existingCols == null) {
-                existingCols = new ArrayList<String>();
-              }
-              existingCols.addAll(cols);
-              tab2Cols.put(tbl, existingCols);
-            }
-          }
-        }
-      }
+      getTablePartitionUsedColumns(op, sem, tab2Cols, part2Cols, tableUsePartLevelAuth);
+
+
 
       // cache the results for table authorization
       Set<String> tableAuthChecked = new HashSet<String>();
@@ -683,8 +642,65 @@ public class Driver implements CommandPr
     }
   }
 
+  private static void getTablePartitionUsedColumns(HiveOperation op, BaseSemanticAnalyzer sem,
+      Map<Table, List<String>> tab2Cols, Map<Partition, List<String>> part2Cols,
+      Map<String, Boolean> tableUsePartLevelAuth) throws HiveException {
+    // for a select or create-as-select query, populate the partition to column
+    // (par2Cols) or
+    // table to columns mapping (tab2Cols)
+    if (op.equals(HiveOperation.CREATETABLE_AS_SELECT)
+        || op.equals(HiveOperation.QUERY)) {
+      SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
+      ParseContext parseCtx = querySem.getParseContext();
+      Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();
+
+      for (Map.Entry<String, Operator<? extends OperatorDesc>> topOpMap : querySem
+          .getParseContext().getTopOps().entrySet()) {
+        Operator<? extends OperatorDesc> topOp = topOpMap.getValue();
+        if (topOp instanceof TableScanOperator
+            && tsoTopMap.containsKey(topOp)) {
+          TableScanOperator tableScanOp = (TableScanOperator) topOp;
+          Table tbl = tsoTopMap.get(tableScanOp);
+          List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
+          List<FieldSchema> columns = tbl.getCols();
+          List<String> cols = new ArrayList<String>();
+          for (int i = 0; i < neededColumnIds.size(); i++) {
+            cols.add(columns.get(neededColumnIds.get(i)).getName());
+          }
+          //map may not contain all sources, since input list may have been optimized out
+          //or non-existent tho such sources may still be referenced by the TableScanOperator
+          //if it's null then the partition probably doesn't exist so let's use table permission
+          if (tbl.isPartitioned() &&
+              tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
+            String alias_id = topOpMap.getKey();
+
+            PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp,
+                parseCtx, alias_id);
+            Set<Partition> parts = partsList.getPartitions();
+            for (Partition part : parts) {
+              List<String> existingCols = part2Cols.get(part);
+              if (existingCols == null) {
+                existingCols = new ArrayList<String>();
+              }
+              existingCols.addAll(cols);
+              part2Cols.put(part, existingCols);
+            }
+          } else {
+            List<String> existingCols = tab2Cols.get(tbl);
+            if (existingCols == null) {
+              existingCols = new ArrayList<String>();
+            }
+            existingCols.addAll(cols);
+            tab2Cols.put(tbl, existingCols);
+          }
+        }
+      }
+    }
+
+  }
+
   private static void doAuthorizationV2(SessionState ss, HiveOperation op, HashSet<ReadEntity> inputs,
-      HashSet<WriteEntity> outputs, String command) throws HiveException {
+      HashSet<WriteEntity> outputs, String command, Map<String, Set<String>> tab2cols) throws HiveException {
 
     HiveAuthzContext.Builder authzContextBuilder = new HiveAuthzContext.Builder();
 
@@ -696,11 +712,34 @@ public class Driver implements CommandPr
 
     HiveOperationType hiveOpType = getHiveOperationType(op);
     List<HivePrivilegeObject> inputsHObjs = getHivePrivObjects(inputs);
+    updateInputColumnInfo(inputsHObjs, tab2cols);
+
     List<HivePrivilegeObject> outputHObjs = getHivePrivObjects(outputs);
     ss.getAuthorizerV2().checkPrivileges(hiveOpType, inputsHObjs, outputHObjs, authzContextBuilder.build());
     return;
   }
 
+  /**
+   * Add column information for input table objects
+   * @param inputsHObjs input HivePrivilegeObject
+   * @param map table to used input columns mapping
+   */
+  private static void updateInputColumnInfo(List<HivePrivilegeObject> inputsHObjs,
+      Map<String, Set<String>> tableName2Cols) {
+    if(tableName2Cols == null) {
+      return;
+    }
+    for(HivePrivilegeObject inputObj : inputsHObjs){
+      if(inputObj.getType() != HivePrivilegeObjectType.TABLE_OR_VIEW){
+        // input columns are relevant only for tables or views
+        continue;
+      }
+      Set<String> cols = tableName2Cols.get(Table.getCompleteName(inputObj.getDbname(),
+          inputObj.getObjectName()));
+      inputObj.setColumns(cols);
+    }
+  }
+
   private static List<HivePrivilegeObject> getHivePrivObjects(HashSet<? extends Entity> privObjects) {
     List<HivePrivilegeObject> hivePrivobjs = new ArrayList<HivePrivilegeObject>();
     if(privObjects == null){
@@ -1213,7 +1252,8 @@ public class Driver implements CommandPr
       }
       resStream = null;
 
-      HookContext hookContext = new HookContext(plan, conf, ctx.getPathToCS());
+      SessionState ss = SessionState.get();
+      HookContext hookContext = new HookContext(plan, conf, ctx.getPathToCS(), ss.getUserName(), ss.getUserIpAddress());
       hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);
 
       for (Hook peh : getHooks(HiveConf.ConfVars.PREEXECHOOKS)) {

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Tue Aug  5 07:23:02 2014
@@ -35,6 +35,7 @@ import java.util.Arrays;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
@@ -88,9 +89,9 @@ import org.apache.hadoop.hive.ql.QueryPl
 import org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo;
 import org.apache.hadoop.hive.ql.hooks.ReadEntity;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.io.merge.MergeTask;
+import org.apache.hadoop.hive.ql.io.merge.MergeWork;
 import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
-import org.apache.hadoop.hive.ql.io.rcfile.merge.BlockMergeTask;
-import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork;
 import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateTask;
 import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateWork;
 import org.apache.hadoop.hive.ql.lockmgr.DbLockManager;
@@ -550,12 +551,13 @@ public class DDLTask extends Task<DDLWor
       throws HiveException {
     // merge work only needs input and output.
     MergeWork mergeWork = new MergeWork(mergeFilesDesc.getInputDir(),
-        mergeFilesDesc.getOutputDir());
+        mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass());
     mergeWork.setListBucketingCtx(mergeFilesDesc.getLbCtx());
     mergeWork.resolveConcatenateMerge(db.getConf());
     mergeWork.setMapperCannotSpanPartns(true);
+    mergeWork.setSourceTableInputFormat(mergeFilesDesc.getInputFormatClass());
     DriverContext driverCxt = new DriverContext();
-    BlockMergeTask taskExec = new BlockMergeTask();
+    MergeTask taskExec = new MergeTask();
     taskExec.initialize(db.getConf(), null, driverCxt);
     taskExec.setWork(mergeWork);
     taskExec.setQueryPlan(this.getQueryPlan());
@@ -598,10 +600,13 @@ public class DDLTask extends Task<DDLWor
 
     HiveAuthorizer authorizer = getSessionAuthorizer();
     try {
+      Set<String> colSet = showGrantDesc.getColumns() != null ? new HashSet<String>(
+          showGrantDesc.getColumns()) : null;
       List<HivePrivilegeInfo> privInfos = authorizer.showPrivileges(
           AuthorizationUtils.getHivePrincipal(showGrantDesc.getPrincipalDesc()),
           AuthorizationUtils.getHivePrivilegeObject(showGrantDesc.getHiveObj(),
-              showGrantDesc.getColumns()));
+              colSet
+              ));
       boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST);
       writeToFile(writeGrantInfo(privInfos, testMode), showGrantDesc.getResFile());
     } catch (IOException e) {
@@ -1283,7 +1288,7 @@ public class DDLTask extends Task<DDLWor
       // First create the archive in a tmp dir so that if the job fails, the
       // bad files don't pollute the filesystem
       Path tmpPath = new Path(driverContext.getCtx()
-          .getExternalTmpPath(originalDir.toUri()), "partlevel");
+          .getExternalTmpPath(originalDir), "partlevel");
 
       console.printInfo("Creating " + archiveName +
           " for " + originalDir.toString());
@@ -1478,7 +1483,7 @@ public class DDLTask extends Task<DDLWor
       throw new HiveException("Haven't found any archive where it should be");
     }
 
-    Path tmpPath = driverContext.getCtx().getExternalTmpPath(originalDir.toUri());
+    Path tmpPath = driverContext.getCtx().getExternalTmpPath(originalDir);
 
     try {
       fs = tmpPath.getFileSystem(conf);
@@ -3919,12 +3924,16 @@ public class DDLTask extends Task<DDLWor
     tbl.setInputFormatClass(crtTbl.getInputFormat());
     tbl.setOutputFormatClass(crtTbl.getOutputFormat());
 
-    tbl.getTTable().getSd().setInputFormat(
-        tbl.getInputFormatClass().getName());
-    tbl.getTTable().getSd().setOutputFormat(
-        tbl.getOutputFormatClass().getName());
+    // only persist input/ouput format to metadata when it is explicitly specified.
+    // Otherwise, load lazily via StorageHandler at query time.
+    if (crtTbl.getInputFormat() != null && !crtTbl.getInputFormat().isEmpty()) {
+      tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
+    }
+    if (crtTbl.getOutputFormat() != null && !crtTbl.getOutputFormat().isEmpty()) {
+      tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
+    }
 
-    if (!Utilities.isDefaultNameNode(conf)) {
+    if (!Utilities.isDefaultNameNode(conf) && tbl.getTTable().getSd().isSetLocation()) {
       // If location is specified - ensure that it is a full qualified name
       makeLocationQualified(tbl.getDbName(), tbl.getTTable().getSd(), tbl.getTableName());
     }

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java Tue Aug  5 07:23:02 2014
@@ -36,7 +36,7 @@ import org.apache.hadoop.hive.ql.exec.mr
 import org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer;
 import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
-import org.apache.hadoop.hive.ql.io.rcfile.merge.BlockMergeTask;
+import org.apache.hadoop.hive.ql.io.merge.MergeTask;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager;
 import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
@@ -294,7 +294,7 @@ public class MoveTask extends Task<MoveW
           while (task.getParentTasks() != null && task.getParentTasks().size() == 1) {
             task = (Task)task.getParentTasks().get(0);
             // If it was a merge task or a local map reduce task, nothing can be inferred
-            if (task instanceof BlockMergeTask || task instanceof MapredLocalTask) {
+            if (task instanceof MergeTask || task instanceof MapredLocalTask) {
               break;
             }
 

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java Tue Aug  5 07:23:02 2014
@@ -165,7 +165,7 @@ public abstract class Task<T extends Ser
       }
       return retval;
     } catch (IOException e) {
-      throw new RuntimeException(e.getMessage());
+      throw new RuntimeException("Unexpected error: " + e.getMessage(), e);
     }
   }
 

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java Tue Aug  5 07:23:02 2014
@@ -28,8 +28,8 @@ import org.apache.hadoop.hive.ql.exec.mr
 import org.apache.hadoop.hive.ql.exec.tez.TezTask;
 import org.apache.hadoop.hive.ql.index.IndexMetadataChangeTask;
 import org.apache.hadoop.hive.ql.index.IndexMetadataChangeWork;
-import org.apache.hadoop.hive.ql.io.rcfile.merge.BlockMergeTask;
-import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork;
+import org.apache.hadoop.hive.ql.io.merge.MergeTask;
+import org.apache.hadoop.hive.ql.io.merge.MergeWork;
 import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanTask;
 import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork;
 import org.apache.hadoop.hive.ql.plan.ColumnStatsWork;
@@ -93,7 +93,7 @@ public final class TaskFactory {
     taskvec.add(new TaskTuple<StatsNoJobWork>(StatsNoJobWork.class, StatsNoJobTask.class));
     taskvec.add(new TaskTuple<ColumnStatsWork>(ColumnStatsWork.class, ColumnStatsTask.class));
     taskvec.add(new TaskTuple<MergeWork>(MergeWork.class,
-        BlockMergeTask.class));
+        MergeTask.class));
     taskvec.add(new TaskTuple<DependencyCollectionWork>(DependencyCollectionWork.class,
         DependencyCollectionTask.class));
     taskvec.add(new TaskTuple<PartialScanWork>(PartialScanWork.class,

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Tue Aug  5 07:23:02 2014
@@ -120,7 +120,8 @@ import org.apache.hadoop.hive.ql.io.Hive
 import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat;
 import org.apache.hadoop.hive.ql.io.RCFile;
 import org.apache.hadoop.hive.ql.io.ReworkMapredInputFormat;
-import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork;
+import org.apache.hadoop.hive.ql.io.merge.MergeWork;
+import org.apache.hadoop.hive.ql.io.orc.OrcFileMergeMapper;
 import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileMergeMapper;
 import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanMapper;
 import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork;
@@ -346,7 +347,8 @@ public final class Utilities {
         if(MAP_PLAN_NAME.equals(name)){
           if (ExecMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))){
             gWork = deserializePlan(in, MapWork.class, conf);
-          } else if(RCFileMergeMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
+          } else if(RCFileMergeMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS)) ||
+              OrcFileMergeMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
             gWork = deserializePlan(in, MergeWork.class, conf);
           } else if(ColumnTruncateMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
             gWork = deserializePlan(in, ColumnTruncateWork.class, conf);

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java Tue Aug  5 07:23:02 2014
@@ -499,7 +499,7 @@ public class ExecDriver extends Task<Map
       inputPaths.add(new Path(path));
     }
 
-    Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0).toUri());
+    Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0));
     Path partitionFile = new Path(tmpPath, ".partitions");
     ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);
     PartitionKeySampler sampler = new PartitionKeySampler();

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java Tue Aug  5 07:23:02 2014
@@ -39,7 +39,6 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.plan.ReduceWork;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
 import org.apache.hadoop.hive.serde2.Deserializer;
-import org.apache.hadoop.hive.serde2.SerDe;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.SerDeUtils;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -56,7 +55,7 @@ import org.apache.hadoop.util.StringUtil
 /**
  * ExecReducer is the generic Reducer class for Hive. Together with ExecMapper it is
  * the bridge between the map-reduce framework and the Hive operator pipeline at
- * execution time. It's main responsabilities are:
+ * execution time. It's main responsibilities are:
  *
  * - Load and setup the operator pipeline from XML
  * - Run the pipeline by transforming key, value pairs to records and forwarding them to the operators
@@ -66,8 +65,20 @@ import org.apache.hadoop.util.StringUtil
  */
 public class ExecReducer extends MapReduceBase implements Reducer {
 
+  private static final Log LOG = LogFactory.getLog("ExecReducer");
   private static final String PLAN_KEY = "__REDUCE_PLAN__";
 
+  // used to log memory usage periodically
+  private final MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
+  // Input value serde needs to be an array to support different SerDe
+  // for different tags
+  private final Deserializer[] inputValueDeserializer = new Deserializer[Byte.MAX_VALUE];
+  private final Object[] valueObject = new Object[Byte.MAX_VALUE];
+  private final List<Object> row = new ArrayList<Object>(Utilities.reduceFieldNameList.size());
+  private final boolean isLogInfoEnabled = LOG.isInfoEnabled();
+
+  // TODO: move to DynamicSerDe when it's ready
+  private Deserializer inputKeyDeserializer;
   private JobConf jc;
   private OutputCollector<?, ?> oc;
   private Operator<?> reducer;
@@ -76,23 +87,13 @@ public class ExecReducer extends MapRedu
   private boolean isTagged = false;
   private long cntr = 0;
   private long nextCntr = 1;
-
-  public static final Log l4j = LogFactory.getLog("ExecReducer");
-  private boolean isLogInfoEnabled = false;
-
-  // used to log memory usage periodically
-  private MemoryMXBean memoryMXBean;
-
-  // TODO: move to DynamicSerDe when it's ready
-  private Deserializer inputKeyDeserializer;
-  // Input value serde needs to be an array to support different SerDe
-  // for different tags
-  private final SerDe[] inputValueDeserializer = new SerDe[Byte.MAX_VALUE];
-
-  TableDesc keyTableDesc;
-  TableDesc[] valueTableDesc;
-
-  ObjectInspector[] rowObjectInspector;
+  private TableDesc keyTableDesc;
+  private TableDesc[] valueTableDesc;
+  private ObjectInspector[] rowObjectInspector;
+
+  // runtime objects
+  private transient Object keyObject;
+  private transient BytesWritable groupKey;
 
   @Override
   public void configure(JobConf job) {
@@ -100,20 +101,16 @@ public class ExecReducer extends MapRedu
     ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
     ObjectInspector keyObjectInspector;
 
-    // Allocate the bean at the beginning -
-    memoryMXBean = ManagementFactory.getMemoryMXBean();
-    l4j.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax());
-
-    isLogInfoEnabled = l4j.isInfoEnabled();
+    LOG.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax());
 
     try {
-      l4j.info("conf classpath = "
+      LOG.info("conf classpath = "
           + Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
-      l4j.info("thread classpath = "
+      LOG.info("thread classpath = "
           + Arrays.asList(((URLClassLoader) Thread.currentThread()
           .getContextClassLoader()).getURLs()));
     } catch (Exception e) {
-      l4j.info("cannot get classpath: " + e.getMessage());
+      LOG.info("cannot get classpath: " + e.getMessage());
     }
     jc = job;
 
@@ -132,7 +129,7 @@ public class ExecReducer extends MapRedu
     isTagged = gWork.getNeedsTagging();
     try {
       keyTableDesc = gWork.getKeyDesc();
-      inputKeyDeserializer = (SerDe) ReflectionUtils.newInstance(keyTableDesc
+      inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc
           .getDeserializerClass(), null);
       SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
       keyObjectInspector = inputKeyDeserializer.getObjectInspector();
@@ -140,7 +137,7 @@ public class ExecReducer extends MapRedu
       for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
         // We should initialize the SerDe with the TypeInfo when available.
         valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
-        inputValueDeserializer[tag] = (SerDe) ReflectionUtils.newInstance(
+        inputValueDeserializer[tag] = ReflectionUtils.newInstance(
             valueTableDesc[tag].getDeserializerClass(), null);
         SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null,
                                    valueTableDesc[tag].getProperties(), null);
@@ -162,7 +159,7 @@ public class ExecReducer extends MapRedu
 
     // initialize reduce operator tree
     try {
-      l4j.info(reducer.dump(0));
+      LOG.info(reducer.dump(0));
       reducer.initialize(jc, rowObjectInspector);
     } catch (Throwable e) {
       abort = true;
@@ -175,13 +172,6 @@ public class ExecReducer extends MapRedu
     }
   }
 
-  private Object keyObject;
-  private final Object[] valueObject = new Object[Byte.MAX_VALUE];
-
-  private BytesWritable groupKey;
-
-  List<Object> row = new ArrayList<Object>(Utilities.reduceFieldNameList.size());
-
   public void reduce(Object key, Iterator values, OutputCollector output,
       Reporter reporter) throws IOException {
     if (reducer.getDone()) {
@@ -212,7 +202,7 @@ public class ExecReducer extends MapRedu
           groupKey = new BytesWritable();
         } else {
           // If a operator wants to do some work at the end of a group
-          l4j.trace("End Group");
+          LOG.trace("End Group");
           reducer.endGroup();
         }
 
@@ -227,7 +217,7 @@ public class ExecReducer extends MapRedu
         }
 
         groupKey.set(keyWritable.get(), 0, keyWritable.getSize());
-        l4j.trace("Start Group");
+        LOG.trace("Start Group");
         reducer.setGroupKeyObject(keyObject);
         reducer.startGroup();
       }
@@ -253,7 +243,7 @@ public class ExecReducer extends MapRedu
           cntr++;
           if (cntr == nextCntr) {
             long used_memory = memoryMXBean.getHeapMemoryUsage().getUsed();
-            l4j.info("ExecReducer: processing " + cntr
+            LOG.info("ExecReducer: processing " + cntr
                 + " rows: used memory = " + used_memory);
             nextCntr = getNextCntr(cntr);
           }
@@ -279,7 +269,7 @@ public class ExecReducer extends MapRedu
         // Don't create a new object if we are already out of memory
         throw (OutOfMemoryError) e;
       } else {
-        l4j.fatal(StringUtils.stringifyException(e));
+        LOG.fatal(StringUtils.stringifyException(e));
         throw new RuntimeException(e);
       }
     }
@@ -301,17 +291,17 @@ public class ExecReducer extends MapRedu
 
     // No row was processed
     if (oc == null) {
-      l4j.trace("Close called no row");
+      LOG.trace("Close called without any rows processed");
     }
 
     try {
       if (groupKey != null) {
         // If a operator wants to do some work at the end of a group
-        l4j.trace("End Group");
+        LOG.trace("End Group");
         reducer.endGroup();
       }
       if (isLogInfoEnabled) {
-        l4j.info("ExecReducer: processed " + cntr + " rows: used memory = "
+        LOG.info("ExecReducer: processed " + cntr + " rows: used memory = "
             + memoryMXBean.getHeapMemoryUsage().getUsed());
       }
 
@@ -322,7 +312,7 @@ public class ExecReducer extends MapRedu
     } catch (Exception e) {
       if (!abort) {
         // signal new failure to map-reduce
-        l4j.error("Hit error while closing operators - failing tree");
+        LOG.error("Hit error while closing operators - failing tree");
         throw new RuntimeException("Hive Runtime Error while closing operators: "
             + e.getMessage(), e);
       }

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java Tue Aug  5 07:23:02 2014
@@ -23,7 +23,6 @@ import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
 
 import org.apache.hadoop.fs.ContentSummary;
 import org.apache.hadoop.hive.conf.HiveConf;
@@ -51,13 +50,11 @@ public class HookContext {
   private UserGroupInformation ugi;
   private HookType hookType;
   final private Map<String, ContentSummary> inputPathToContentSummary;
-
-  public HookContext(QueryPlan queryPlan, HiveConf conf) throws Exception{
-    this(queryPlan, conf, new ConcurrentHashMap<String, ContentSummary>());
-  }
+  private final String ipAddress;
+  private final String userName;
 
   public HookContext(QueryPlan queryPlan, HiveConf conf,
-      Map<String, ContentSummary> inputPathToContentSummary) throws Exception {
+      Map<String, ContentSummary> inputPathToContentSummary, String userName, String ipAddress) throws Exception {
     this.queryPlan = queryPlan;
     this.conf = conf;
     this.inputPathToContentSummary = inputPathToContentSummary;
@@ -69,6 +66,8 @@ public class HookContext {
     if(SessionState.get() != null){
       linfo = SessionState.get().getLineageState().getLineageInfo();
     }
+    this.ipAddress = ipAddress;
+    this.userName = userName;
   }
 
   public QueryPlan getQueryPlan() {
@@ -143,7 +142,15 @@ public class HookContext {
     this.hookType = hookType;
   }
 
+  public String getIpAddress() {
+    return this.ipAddress;
+ }
+
   public String getOperationName() {
     return SessionState.get().getHiveOperation().name();
   }
+
+  public String getUserName() {
+    return this.userName;
+  }
 }

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Tue Aug  5 07:23:02 2014
@@ -71,8 +71,20 @@ import org.apache.hadoop.util.Reflection
 public class HiveInputFormat<K extends WritableComparable, V extends Writable>
     implements InputFormat<K, V>, JobConfigurable {
 
-  public static final String CLASS_NAME = HiveInputFormat.class.getName();
-  public static final Log LOG = LogFactory.getLog(CLASS_NAME);
+  private static final String CLASS_NAME = HiveInputFormat.class.getName();
+  private static final Log LOG = LogFactory.getLog(CLASS_NAME);
+
+  /**
+   * A cache of InputFormat instances.
+   */
+  private static Map<Class, InputFormat<WritableComparable, Writable>> inputFormats 
+    = new ConcurrentHashMap<Class, InputFormat<WritableComparable, Writable>>();
+
+  private JobConf job;
+
+  // both classes access by subclasses
+  protected Map<String, PartitionDesc> pathToPartitionInfo;
+  protected MapWork mrwork;
 
   /**
    * HiveInputSplit encapsulates an InputSplit with its corresponding
@@ -178,18 +190,10 @@ public class HiveInputFormat<K extends W
     }
   }
 
-  JobConf job;
-
   public void configure(JobConf job) {
     this.job = job;
   }
 
-  /**
-   * A cache of InputFormat instances.
-   */
-  protected static Map<Class, InputFormat<WritableComparable, Writable>> inputFormats 
-    = new ConcurrentHashMap<Class, InputFormat<WritableComparable, Writable>>();
-
   public static InputFormat<WritableComparable, Writable> getInputFormatFromCache(
     Class inputFormatClass, JobConf job) throws IOException {
 
@@ -248,9 +252,6 @@ public class HiveInputFormat<K extends W
     return rr;
   }
 
-  protected Map<String, PartitionDesc> pathToPartitionInfo;
-  MapWork mrwork = null;
-
   protected void init(JobConf job) {
     mrwork = Utilities.getMapWork(job);
     pathToPartitionInfo = mrwork.getPathToPartitionInfo();
@@ -281,7 +282,6 @@ public class HiveInputFormat<K extends W
       headerCount = Utilities.getHeaderCount(table);
       footerCount = Utilities.getFooterCount(table, conf);
       if (headerCount != 0 || footerCount != 0) {
-        
         // Input file has header or footer, cannot be splitted.
         conf.setLong(
             ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),

Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Tue Aug  5 07:23:02 2014
@@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FSDataInputS
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem;
 import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
 import org.apache.hadoop.hive.ql.util.JavaDataModel;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -60,6 +61,7 @@ final class ReaderImpl implements Reader
   private final ObjectInspector inspector;
   private long deserializedSize = -1;
   private final Configuration conf;
+  private final List<Integer> versionList;
 
   //serialized footer - Keeping this around for use by getFileMetaInfo()
   // will help avoid cpu cycles spend in deserializing at cost of increased
@@ -306,6 +308,7 @@ final class ReaderImpl implements Reader
     this.metadata = rInfo.metadata;
     this.footer = rInfo.footer;
     this.inspector = rInfo.inspector;
+    this.versionList = footerMetaData.versionList;
   }
 
 
@@ -387,7 +390,8 @@ final class ReaderImpl implements Reader
         ps.getCompression().toString(),
         (int) ps.getCompressionBlockSize(),
         (int) ps.getMetadataLength(),
-        buffer
+        buffer,
+        ps.getVersionList()
         );
   }
 
@@ -446,18 +450,26 @@ final class ReaderImpl implements Reader
     final int bufferSize;
     final int metadataSize;
     final ByteBuffer footerBuffer;
+    final List<Integer> versionList;
+
+    FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
+        ByteBuffer footerBuffer) {
+      this(compressionType, bufferSize, metadataSize, footerBuffer, null);
+    }
+
     FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
-                 ByteBuffer footerBuffer){
+                 ByteBuffer footerBuffer, List<Integer> versionList){
       this.compressionType = compressionType;
       this.bufferSize = bufferSize;
       this.metadataSize = metadataSize;
       this.footerBuffer = footerBuffer;
+      this.versionList = versionList;
     }
   }
 
   public FileMetaInfo getFileMetaInfo(){
     return new FileMetaInfo(compressionKind.toString(), bufferSize,
-        metadataSize, footerByteBuffer);
+        metadataSize, footerByteBuffer, versionList);
   }
 
 
@@ -629,4 +641,11 @@ final class ReaderImpl implements Reader
     return new Metadata(metadata);
   }
 
+  List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics() {
+    return metadata.getStripeStatsList();
+  }
+
+  public List<UserMetadataItem> getOrcProtoUserMetadata() {
+    return footer.getMetadataList();
+  }
 }