You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by gu...@apache.org on 2014/08/05 09:23:07 UTC
svn commit: r1615872 [10/12] - in /hive/branches/cbo: ./ bin/ common/
common/src/java/org/apache/hadoop/hive/conf/ conf/
contrib/src/java/org/apache/hadoop/hive/contrib/metastore/hooks/
contrib/src/test/queries/clientnegative/ contrib/src/test/queries/...
Modified: hive/branches/cbo/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb (original)
+++ hive/branches/cbo/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb Tue Aug 5 07:23:02 2014
@@ -1008,6 +1008,26 @@ class ColumnStatistics
::Thrift::Struct.generate_accessors self
end
+class AggrStats
+ include ::Thrift::Struct, ::Thrift::Struct_Union
+ COLSTATS = 1
+ PARTSFOUND = 2
+
+ FIELDS = {
+ COLSTATS => {:type => ::Thrift::Types::LIST, :name => 'colStats', :element => {:type => ::Thrift::Types::STRUCT, :class => ::ColumnStatisticsObj}},
+ PARTSFOUND => {:type => ::Thrift::Types::I64, :name => 'partsFound'}
+ }
+
+ def struct_fields; FIELDS; end
+
+ def validate
+ raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field colStats is unset!') unless @colStats
+ raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field partsFound is unset!') unless @partsFound
+ end
+
+ ::Thrift::Struct.generate_accessors self
+end
+
class Schema
include ::Thrift::Struct, ::Thrift::Struct_Union
FIELDSCHEMAS = 1
Modified: hive/branches/cbo/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb (original)
+++ hive/branches/cbo/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb Tue Aug 5 07:23:02 2014
@@ -1231,6 +1231,23 @@ module ThriftHiveMetastore
raise ::Thrift::ApplicationException.new(::Thrift::ApplicationException::MISSING_RESULT, 'get_partitions_statistics_req failed: unknown result')
end
+ def get_aggr_stats_for(request)
+ send_get_aggr_stats_for(request)
+ return recv_get_aggr_stats_for()
+ end
+
+ def send_get_aggr_stats_for(request)
+ send_message('get_aggr_stats_for', Get_aggr_stats_for_args, :request => request)
+ end
+
+ def recv_get_aggr_stats_for()
+ result = receive_message(Get_aggr_stats_for_result)
+ return result.success unless result.success.nil?
+ raise result.o1 unless result.o1.nil?
+ raise result.o2 unless result.o2.nil?
+ raise ::Thrift::ApplicationException.new(::Thrift::ApplicationException::MISSING_RESULT, 'get_aggr_stats_for failed: unknown result')
+ end
+
def delete_partition_column_statistics(db_name, tbl_name, part_name, col_name)
send_delete_partition_column_statistics(db_name, tbl_name, part_name, col_name)
return recv_delete_partition_column_statistics()
@@ -2824,6 +2841,19 @@ module ThriftHiveMetastore
write_result(result, oprot, 'get_partitions_statistics_req', seqid)
end
+ def process_get_aggr_stats_for(seqid, iprot, oprot)
+ args = read_args(iprot, Get_aggr_stats_for_args)
+ result = Get_aggr_stats_for_result.new()
+ begin
+ result.success = @handler.get_aggr_stats_for(args.request)
+ rescue ::NoSuchObjectException => o1
+ result.o1 = o1
+ rescue ::MetaException => o2
+ result.o2 = o2
+ end
+ write_result(result, oprot, 'get_aggr_stats_for', seqid)
+ end
+
def process_delete_partition_column_statistics(seqid, iprot, oprot)
args = read_args(iprot, Delete_partition_column_statistics_args)
result = Delete_partition_column_statistics_result.new()
@@ -6077,6 +6107,42 @@ module ThriftHiveMetastore
::Thrift::Struct.generate_accessors self
end
+ class Get_aggr_stats_for_args
+ include ::Thrift::Struct, ::Thrift::Struct_Union
+ REQUEST = 1
+
+ FIELDS = {
+ REQUEST => {:type => ::Thrift::Types::STRUCT, :name => 'request', :class => ::PartitionsStatsRequest}
+ }
+
+ def struct_fields; FIELDS; end
+
+ def validate
+ end
+
+ ::Thrift::Struct.generate_accessors self
+ end
+
+ class Get_aggr_stats_for_result
+ include ::Thrift::Struct, ::Thrift::Struct_Union
+ SUCCESS = 0
+ O1 = 1
+ O2 = 2
+
+ FIELDS = {
+ SUCCESS => {:type => ::Thrift::Types::STRUCT, :name => 'success', :class => ::AggrStats},
+ O1 => {:type => ::Thrift::Types::STRUCT, :name => 'o1', :class => ::NoSuchObjectException},
+ O2 => {:type => ::Thrift::Types::STRUCT, :name => 'o2', :class => ::MetaException}
+ }
+
+ def struct_fields; FIELDS; end
+
+ def validate
+ end
+
+ ::Thrift::Struct.generate_accessors self
+ end
+
class Delete_partition_column_statistics_args
include ::Thrift::Struct, ::Thrift::Struct_Union
DB_NAME = 1
Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Tue Aug 5 07:23:02 2014
@@ -64,6 +64,7 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
import org.apache.hadoop.hive.metastore.api.AddPartitionsRequest;
import org.apache.hadoop.hive.metastore.api.AddPartitionsResult;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -89,7 +90,6 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.GrantRevokePrivilegeResponse;
import org.apache.hadoop.hive.metastore.api.GrantRevokeRoleRequest;
import org.apache.hadoop.hive.metastore.api.GrantRevokeRoleResponse;
-import org.apache.hadoop.hive.metastore.api.GrantRevokeType;
import org.apache.hadoop.hive.metastore.api.HeartbeatRequest;
import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeRequest;
import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse;
@@ -241,13 +241,12 @@ public class HiveMetaStore extends Thrif
public static class HMSHandler extends FacebookBase implements
IHMSHandler {
public static final Log LOG = HiveMetaStore.LOG;
- private static boolean createDefaultDB = false;
- private static boolean defaultRolesCreated = false;
- private static boolean adminUsersAdded = false;
private String rawStoreClassName;
private final HiveConf hiveConf; // stores datastore (jpox) properties,
// right now they come from jpox.properties
+ private static String currentUrl;
+
private Warehouse wh; // hdfs warehouse
private final ThreadLocal<RawStore> threadLocalMS =
new ThreadLocal<RawStore>() {
@@ -316,8 +315,6 @@ public class HiveMetaStore extends Thrif
address, cmd).toString());
}
- // The next serial number to be assigned
- private boolean checkForDefaultDb;
private static int nextSerialNum = 0;
private static ThreadLocal<Integer> threadLocalId = new ThreadLocal<Integer>() {
@Override
@@ -350,10 +347,6 @@ public class HiveMetaStore extends Thrif
return threadLocalId.get();
}
- public static void resetDefaultDBFlag() {
- createDefaultDB = false;
- }
-
public HMSHandler(String name) throws MetaException {
super(name);
hiveConf = new HiveConf(this.getClass());
@@ -387,8 +380,6 @@ public class HiveMetaStore extends Thrif
private boolean init() throws MetaException {
rawStoreClassName = hiveConf.getVar(HiveConf.ConfVars.METASTORE_RAW_STORE_IMPL);
- checkForDefaultDb = hiveConf.getBoolean(
- "hive.metastore.checkForDefaultDb", true);
initListeners = MetaStoreUtils.getMetaStoreListeners(
MetaStoreInitListener.class, hiveConf,
hiveConf.getVar(HiveConf.ConfVars.METASTORE_INIT_HOOKS));
@@ -404,9 +395,12 @@ public class HiveMetaStore extends Thrif
wh = new Warehouse(hiveConf);
synchronized (HMSHandler.class) {
- createDefaultDB();
- createDefaultRoles();
- addAdminUsers();
+ if (currentUrl == null || !currentUrl.equals(MetaStoreInit.getConnectionURL(hiveConf))) {
+ createDefaultDB();
+ createDefaultRoles();
+ addAdminUsers();
+ currentUrl = MetaStoreInit.getConnectionURL(hiveConf);
+ }
}
if (hiveConf.getBoolean("hive.metastore.metrics.enabled", false)) {
@@ -517,7 +511,6 @@ public class HiveMetaStore extends Thrif
db.setOwnerType(PrincipalType.ROLE);
ms.createDatabase(db);
}
- HMSHandler.createDefaultDB = true;
}
/**
@@ -526,9 +519,6 @@ public class HiveMetaStore extends Thrif
* @throws MetaException
*/
private void createDefaultDB() throws MetaException {
- if (HMSHandler.createDefaultDB || !checkForDefaultDb) {
- return;
- }
try {
createDefaultDB_core(getMS());
} catch (InvalidObjectException e) {
@@ -541,11 +531,6 @@ public class HiveMetaStore extends Thrif
private void createDefaultRoles() throws MetaException {
- if(defaultRolesCreated) {
- LOG.debug("Admin role already created previously.");
- return;
- }
-
RawStore ms = getMS();
try {
ms.addRole(ADMIN, ADMIN);
@@ -579,16 +564,10 @@ public class HiveMetaStore extends Thrif
// Unlikely to be thrown.
LOG.warn("Failed while granting global privs to admin", e);
}
-
- defaultRolesCreated = true;
}
private void addAdminUsers() throws MetaException {
- if(adminUsersAdded) {
- LOG.debug("Admin users already added.");
- return;
- }
// now add pre-configured users to admin role
String userStr = HiveConf.getVar(hiveConf,ConfVars.USERS_IN_ADMIN_ROLE,"").trim();
if (userStr.isEmpty()) {
@@ -623,7 +602,6 @@ public class HiveMetaStore extends Thrif
LOG.debug(userName + " already in admin role", e);
}
}
- adminUsersAdded = true;
}
private void logInfo(String m) {
@@ -4031,7 +4009,7 @@ public class HiveMetaStore extends Thrif
incrementCounter("drop_role");
firePreEvent(new PreAuthorizationCallEvent(this));
if (ADMIN.equals(roleName) || PUBLIC.equals(roleName)) {
- throw new MetaException(PUBLIC + "/" + ADMIN +" role can't be dropped.");
+ throw new MetaException(PUBLIC + "," + ADMIN + " roles can't be dropped.");
}
Boolean ret = null;
try {
@@ -4101,6 +4079,7 @@ public class HiveMetaStore extends Thrif
return ret;
}
+ @Override
public GrantRevokeRoleResponse grant_revoke_role(GrantRevokeRoleRequest request)
throws MetaException, org.apache.thrift.TException {
GrantRevokeRoleResponse response = new GrantRevokeRoleResponse();
@@ -4998,6 +4977,25 @@ public class HiveMetaStore extends Thrif
return rolePrinGrantList;
}
+ @Override
+ public AggrStats get_aggr_stats_for(PartitionsStatsRequest request)
+ throws NoSuchObjectException, MetaException, TException {
+ startFunction("get_aggr_stats_for: db=" + request.getDbName() + " table=" + request.getTblName());
+ AggrStats aggrStats = null;
+ try {
+ //TODO: We are setting partitionCnt for which we were able to retrieve stats same as
+ // incoming number from request. This is not correct, but currently no users of this api
+ // rely on this. Only, current user StatsAnnotation don't care for it. StatsOptimizer
+ // will care for it, so before StatsOptimizer begin using it, we need to fix this.
+ aggrStats = new AggrStats(getMS().get_aggr_stats_for(request.getDbName(),
+ request.getTblName(), request.getPartNames(), request.getColNames()), request.getPartNames().size());
+ return aggrStats;
+ } finally {
+ endFunction("get_partitions_statistics_req: ", aggrStats == null, null, request.getTblName());
+ }
+
+ }
+
}
public static IHMSHandler newHMSHandler(String name, HiveConf hiveConf) throws MetaException {
Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java Tue Aug 5 07:23:02 2014
@@ -34,6 +34,7 @@ import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
+import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -51,6 +52,7 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
import org.apache.hadoop.hive.metastore.api.AddPartitionsRequest;
import org.apache.hadoop.hive.metastore.api.AddPartitionsResult;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -144,6 +146,8 @@ public class HiveMetaStoreClient impleme
private String tokenStrForm;
private final boolean localMetaStore;
+ private Map<String, String> currentMetaVars;
+
// for thrift connects
private int retries = 5;
private int retryDelaySeconds = 0;
@@ -171,6 +175,7 @@ public class HiveMetaStoreClient impleme
// through the network
client = HiveMetaStore.newHMSHandler("hive client", conf);
isConnected = true;
+ snapshotActiveConf();
return;
}
@@ -231,6 +236,26 @@ public class HiveMetaStoreClient impleme
}
@Override
+ public boolean isCompatibleWith(HiveConf conf) {
+ if (currentMetaVars == null) {
+ return false; // recreate
+ }
+ boolean compatible = true;
+ for (ConfVars oneVar : HiveConf.metaVars) {
+ // Since metaVars are all of different types, use string for comparison
+ String oldVar = currentMetaVars.get(oneVar.varname);
+ String newVar = conf.get(oneVar.varname, "");
+ if (oldVar == null ||
+ (oneVar.isCaseSensitive() ? !oldVar.equals(newVar) : !oldVar.equalsIgnoreCase(newVar))) {
+ LOG.info("Mestastore configuration " + oneVar.varname +
+ " changed from " + oldVar + " to " + newVar);
+ compatible = false;
+ }
+ }
+ return compatible;
+ }
+
+ @Override
public void reconnect() throws MetaException {
if (localMetaStore) {
// For direct DB connections we don't yet support reestablishing connections.
@@ -383,9 +408,19 @@ public class HiveMetaStoreClient impleme
throw new MetaException("Could not connect to meta store using any of the URIs provided." +
" Most recent failure: " + StringUtils.stringifyException(tte));
}
+
+ snapshotActiveConf();
+
LOG.info("Connected to metastore.");
}
+ private void snapshotActiveConf() {
+ currentMetaVars = new HashMap<String, String>(HiveConf.metaVars.length);
+ for (ConfVars oneVar : HiveConf.metaVars) {
+ currentMetaVars.put(oneVar.varname, conf.get(oneVar.varname, ""));
+ }
+ }
+
public String getTokenStrForm() throws IOException {
return tokenStrForm;
}
@@ -393,6 +428,7 @@ public class HiveMetaStoreClient impleme
@Override
public void close() {
isConnected = false;
+ currentMetaVars = null;
try {
if (null != client) {
client.shutdown();
@@ -1785,4 +1821,11 @@ public class HiveMetaStoreClient impleme
NoSuchObjectException, UnsupportedOperationException {
client.drop_table_with_environment_context(dbname, name, deleteData, envContext);
}
+
+ @Override
+ public AggrStats getAggrColStatsFor(String dbName, String tblName,
+ List<String> colNames, List<String> partNames) throws NoSuchObjectException, MetaException, TException {
+ PartitionsStatsRequest req = new PartitionsStatsRequest(dbName, tblName, colNames, partNames);
+ return client.get_aggr_stats_for(req);
+ }
}
Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java Tue Aug 5 07:23:02 2014
@@ -19,9 +19,9 @@
package org.apache.hadoop.hive.metastore;
import org.apache.hadoop.hive.common.ValidTxnList;
+import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.CompactionType;
import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse;
import org.apache.hadoop.hive.metastore.api.HeartbeatTxnRangeResponse;
import org.apache.hadoop.hive.metastore.api.LockRequest;
import org.apache.hadoop.hive.metastore.api.LockResponse;
@@ -38,22 +38,18 @@ import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.CompactionType;
import org.apache.hadoop.hive.metastore.api.ConfigValSecurityException;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.Function;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsInfoResponse;
-import org.apache.hadoop.hive.metastore.api.GetOpenTxnsResponse;
import org.apache.hadoop.hive.metastore.api.GetPrincipalsInRoleRequest;
import org.apache.hadoop.hive.metastore.api.GetPrincipalsInRoleResponse;
import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalRequest;
import org.apache.hadoop.hive.metastore.api.GetRoleGrantsForPrincipalResponse;
-import org.apache.hadoop.hive.metastore.api.GrantRevokePrivilegeRequest;
-import org.apache.hadoop.hive.metastore.api.GrantRevokePrivilegeResponse;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
import org.apache.hadoop.hive.metastore.api.HiveObjectRef;
import org.apache.hadoop.hive.metastore.api.Index;
@@ -61,28 +57,18 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.InvalidObjectException;
import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
import org.apache.hadoop.hive.metastore.api.InvalidPartitionException;
-import org.apache.hadoop.hive.metastore.api.LockRequest;
-import org.apache.hadoop.hive.metastore.api.LockResponse;
import org.apache.hadoop.hive.metastore.api.MetaException;
-import org.apache.hadoop.hive.metastore.api.NoSuchLockException;
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
-import org.apache.hadoop.hive.metastore.api.NoSuchTxnException;
-import org.apache.hadoop.hive.metastore.api.OpenTxnsResponse;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.PartitionEventType;
import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
import org.apache.hadoop.hive.metastore.api.PrincipalType;
import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
import org.apache.hadoop.hive.metastore.api.Role;
-import org.apache.hadoop.hive.metastore.api.ShowCompactResponse;
-import org.apache.hadoop.hive.metastore.api.ShowLocksResponse;
import org.apache.hadoop.hive.metastore.api.Table;
-import org.apache.hadoop.hive.metastore.api.TxnAbortedException;
-import org.apache.hadoop.hive.metastore.api.TxnOpenException;
import org.apache.hadoop.hive.metastore.api.UnknownDBException;
import org.apache.hadoop.hive.metastore.api.UnknownPartitionException;
import org.apache.hadoop.hive.metastore.api.UnknownTableException;
-import org.apache.thrift.TException;
/**
* TODO Unnecessary when the server sides for both dbstore and filestore are
@@ -91,6 +77,12 @@ import org.apache.thrift.TException;
public interface IMetaStoreClient {
/**
+ * Returns whether current client is convertible with conf or not
+ * @return
+ */
+ public boolean isCompatibleWith(HiveConf conf);
+
+ /**
* Tries to reconnect this MetaStoreClient to the MetaStore.
*/
public void reconnect() throws MetaException;
@@ -1299,4 +1291,7 @@ public interface IMetaStoreClient {
*/
GetRoleGrantsForPrincipalResponse get_role_grants_for_principal(
GetRoleGrantsForPrincipalRequest getRolePrincReq) throws MetaException, TException;
+
+ public AggrStats getAggrColStatsFor(String dbName, String tblName,
+ List<String> colNames, List<String> partName) throws NoSuchObjectException, MetaException, TException;
}
Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java Tue Aug 5 07:23:02 2014
@@ -427,6 +427,7 @@ class MetaStoreDirectSql {
+ " where \"PART_ID\" in (" + partIds + ") and \"PARAM_KEY\" is not null"
+ " order by \"PART_ID\" asc";
loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {
+ @Override
public void apply(Partition t, Object[] fields) {
t.putToParameters((String)fields[1], (String)fields[2]);
}});
@@ -435,6 +436,7 @@ class MetaStoreDirectSql {
+ " where \"PART_ID\" in (" + partIds + ") and \"INTEGER_IDX\" >= 0"
+ " order by \"PART_ID\" asc, \"INTEGER_IDX\" asc";
loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {
+ @Override
public void apply(Partition t, Object[] fields) {
t.addToValues((String)fields[1]);
}});
@@ -452,6 +454,7 @@ class MetaStoreDirectSql {
+ " where \"SD_ID\" in (" + sdIds + ") and \"PARAM_KEY\" is not null"
+ " order by \"SD_ID\" asc";
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+ @Override
public void apply(StorageDescriptor t, Object[] fields) {
t.putToParameters((String)fields[1], (String)fields[2]);
}});
@@ -460,6 +463,7 @@ class MetaStoreDirectSql {
+ " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0"
+ " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+ @Override
public void apply(StorageDescriptor t, Object[] fields) {
if (fields[2] == null) return;
t.addToSortCols(new Order((String)fields[1], extractSqlInt(fields[2])));
@@ -469,6 +473,7 @@ class MetaStoreDirectSql {
+ " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0"
+ " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+ @Override
public void apply(StorageDescriptor t, Object[] fields) {
t.addToBucketCols((String)fields[1]);
}});
@@ -479,6 +484,7 @@ class MetaStoreDirectSql {
+ " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
boolean hasSkewedColumns =
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+ @Override
public void apply(StorageDescriptor t, Object[] fields) {
if (!t.isSetSkewedInfo()) t.setSkewedInfo(new SkewedInfo());
t.getSkewedInfo().addToSkewedColNames((String)fields[1]);
@@ -502,6 +508,7 @@ class MetaStoreDirectSql {
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
private Long currentListId;
private List<String> currentList;
+ @Override
public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
if (!t.isSetSkewedInfo()) t.setSkewedInfo(new SkewedInfo());
// Note that this is not a typical list accumulator - there's no call to finalize
@@ -539,6 +546,7 @@ class MetaStoreDirectSql {
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
private Long currentListId;
private List<String> currentList;
+ @Override
public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
if (!t.isSetSkewedInfo()) {
SkewedInfo skewedInfo = new SkewedInfo();
@@ -572,6 +580,7 @@ class MetaStoreDirectSql {
+ " from \"COLUMNS_V2\" where \"CD_ID\" in (" + colIds + ") and \"INTEGER_IDX\" >= 0"
+ " order by \"CD_ID\" asc, \"INTEGER_IDX\" asc";
loopJoinOrderedResult(colss, queryText, 0, new ApplyFunc<List<FieldSchema>>() {
+ @Override
public void apply(List<FieldSchema> t, Object[] fields) {
t.add(new FieldSchema((String)fields[2], (String)fields[3], (String)fields[1]));
}});
@@ -582,6 +591,7 @@ class MetaStoreDirectSql {
+ " where \"SERDE_ID\" in (" + serdeIds + ") and \"PARAM_KEY\" is not null"
+ " order by \"SERDE_ID\" asc";
loopJoinOrderedResult(serdes, queryText, 0, new ApplyFunc<SerDeInfo>() {
+ @Override
public void apply(SerDeInfo t, Object[] fields) {
t.putToParameters((String)fields[1], (String)fields[2]);
}});
@@ -891,19 +901,49 @@ class MetaStoreDirectSql {
return result;
}
- public List<ColumnStatistics> getPartitionStats(String dbName, String tableName,
+ public List<ColumnStatisticsObj> aggrColStatsForPartitions(String dbName, String tableName,
List<String> partNames, List<String> colNames) throws MetaException {
- if (colNames.isEmpty() || partNames.isEmpty()) {
- return Lists.newArrayList();
- }
- boolean doTrace = LOG.isDebugEnabled();
- long start = doTrace ? System.nanoTime() : 0;
- String queryText = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from \"PART_COL_STATS\""
+ String qText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", "
+ + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), "
+ + "min(\"BIG_DECIMAL_LOW_VALUE\"), max(\"BIG_DECIMAL_HIGH_VALUE\"), sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), "
+ + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\") from \"PART_COL_STATS\""
+ " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in ("
+ makeParams(colNames.size()) + ") AND \"PARTITION_NAME\" in ("
- + makeParams(partNames.size()) + ") order by \"PARTITION_NAME\"";
+ + makeParams(partNames.size()) + ") group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
+
+ boolean doTrace = LOG.isDebugEnabled();
+ long start = doTrace ? System.nanoTime() : 0;
+ Query query = pm.newQuery("javax.jdo.query.SQL", qText);
+ Object qResult = query.executeWithArray(prepareParams(dbName, tableName, partNames, colNames));
+ if (qResult == null) {
+ query.closeAll();
+ return Lists.newArrayList();
+ }
+ List<Object[]> list = ensureList(qResult);
+ List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(list.size());
+ for (Object[] row : list) {
+ colStats.add(prepareCSObj(row,0));
+ }
+ long end = doTrace ? System.nanoTime() : 0;
+ timingTrace(doTrace, qText, start, end);
+ query.closeAll();
+ return colStats;
+ }
+
+ private ColumnStatisticsObj prepareCSObj (Object[] row, int i) {
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data);
+ Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
+ declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
+ avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++];
+ StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
+ llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses);
+ return cso;
+ }
+
+ private Object[] prepareParams(String dbName, String tableName, List<String> partNames,
+ List<String> colNames) throws MetaException {
- Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
Object[] params = new Object[colNames.size() + partNames.size() + 2];
int paramI = 0;
params[paramI++] = dbName;
@@ -914,7 +954,24 @@ class MetaStoreDirectSql {
for (String partName : partNames) {
params[paramI++] = partName;
}
- Object qResult = query.executeWithArray(params);
+
+ return params;
+ }
+
+ public List<ColumnStatistics> getPartitionStats(String dbName, String tableName,
+ List<String> partNames, List<String> colNames) throws MetaException {
+ if (colNames.isEmpty() || partNames.isEmpty()) {
+ return Lists.newArrayList();
+ }
+ boolean doTrace = LOG.isDebugEnabled();
+ long start = doTrace ? System.nanoTime() : 0;
+ String queryText = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from \"PART_COL_STATS\""
+ + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in ("
+ + makeParams(colNames.size()) + ") AND \"PARTITION_NAME\" in ("
+ + makeParams(partNames.size()) + ") order by \"PARTITION_NAME\"";
+
+ Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
+ Object qResult = query.executeWithArray(prepareParams(dbName, tableName, partNames, colNames));
long queryTime = doTrace ? System.nanoTime() : 0;
if (qResult == null) {
query.closeAll();
@@ -963,16 +1020,7 @@ class MetaStoreDirectSql {
if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > extractSqlLong(laObj))) {
csd.setLastAnalyzed(extractSqlLong(laObj));
}
- ColumnStatisticsData data = new ColumnStatisticsData();
- // see STATS_COLLIST
- int i = offset;
- ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data);
- Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
- declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
- avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++];
- StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
- llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses);
- csos.add(cso);
+ csos.add(prepareCSObj(row, offset));
}
result.setStatsObj(csos);
return result;
Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Tue Aug 5 07:23:02 2014
@@ -5901,6 +5901,29 @@ public class ObjectStore implements RawS
}.run(true);
}
+
+ @Override
+ public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName, String tblName,
+ final List<String> partNames, final List<String> colNames) throws MetaException, NoSuchObjectException {
+
+ return new GetListHelper<ColumnStatisticsObj>(dbName, tblName, true, false) {
+ @Override
+ protected List<ColumnStatisticsObj> getSqlResult(
+ GetHelper<List<ColumnStatisticsObj>> ctx) throws MetaException {
+ return directSql.aggrColStatsForPartitions(dbName, tblName, partNames, colNames);
+ }
+
+ @Override
+ protected List<ColumnStatisticsObj> getJdoResult(
+ GetHelper<List<ColumnStatisticsObj>> ctx) throws MetaException,
+ NoSuchObjectException {
+ // This is fast path for query optimizations, if we can find this info quickly using
+ // directSql, do it. No point in failing back to slow path here.
+ throw new MetaException("Jdo path is not implemented for stats aggr.");
+ }
+ }.run(true);
+ }
+
private List<MPartitionColumnStatistics> getMPartitionColumnStatistics(
Table table, List<String> partNames, List<String> colNames)
throws NoSuchObjectException, MetaException {
@@ -6747,5 +6770,4 @@ public class ObjectStore implements RawS
}
return funcs;
}
-
}
Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java Tue Aug 5 07:23:02 2014
@@ -27,6 +27,7 @@ import java.util.Map;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.Function;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -547,4 +548,6 @@ public interface RawStore extends Config
*/
public List<String> getFunctions(String dbName, String pattern) throws MetaException;
+ public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName, String tblName,
+ List<String> partNames, List<String> colNames) throws MetaException, NoSuchObjectException;
}
Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStoreProxy.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStoreProxy.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStoreProxy.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/RawStoreProxy.java Tue Aug 5 07:23:02 2014
@@ -26,8 +26,6 @@ import java.lang.reflect.UndeclaredThrow
import java.util.List;
import org.apache.commons.lang.ClassUtils;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hadoop.hive.common.classification.InterfaceStability;
@@ -55,7 +53,7 @@ public class RawStoreProxy implements In
// This has to be called before initializing the instance of RawStore
init();
- this.base = (RawStore) ReflectionUtils.newInstance(rawStoreClass, conf);
+ this.base = ReflectionUtils.newInstance(rawStoreClass, conf);
}
public static RawStore getProxy(HiveConf hiveConf, Configuration conf, String rawStoreClassName,
@@ -96,14 +94,6 @@ public class RawStoreProxy implements In
public Object invoke(Object proxy, Method method, Object[] args) throws Throwable {
Object ret = null;
- boolean reloadConf = HiveConf.getBoolVar(hiveConf,
- HiveConf.ConfVars.METASTOREFORCERELOADCONF);
-
- if (reloadConf) {
- MetaStoreInit.updateConnectionURL(hiveConf, getConf(), null, metaStoreInitData);
- initMS();
- }
-
try {
ret = method.invoke(base, args);
} catch (UndeclaredThrowableException e) {
Modified: hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java (original)
+++ hive/branches/cbo/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java Tue Aug 5 07:23:02 2014
@@ -17,17 +17,24 @@
*/
package org.apache.hadoop.hive.metastore.txn;
-import com.jolbox.bonecp.BoneCP;
import com.jolbox.bonecp.BoneCPConfig;
+import com.jolbox.bonecp.BoneCPDataSource;
+import org.apache.commons.dbcp.ConnectionFactory;
+import org.apache.commons.dbcp.DriverManagerConnectionFactory;
+import org.apache.commons.dbcp.PoolableConnectionFactory;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.commons.dbcp.PoolingDataSource;
+import org.apache.commons.pool.ObjectPool;
+import org.apache.commons.pool.impl.GenericObjectPool;
import org.apache.hadoop.hive.common.ValidTxnList;
import org.apache.hadoop.hive.common.ValidTxnListImpl;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.*;
import org.apache.hadoop.util.StringUtils;
+import javax.sql.DataSource;
import java.sql.*;
import java.util.*;
@@ -65,9 +72,9 @@ public class TxnHandler {
static final private int ALLOWED_REPEATED_DEADLOCKS = 5;
static final private Log LOG = LogFactory.getLog(TxnHandler.class.getName());
- static private BoneCP connPool;
- private static final Boolean lockLock = new Boolean("true"); // Random object to lock on for the
- // lock method
+ static private DataSource connPool;
+ private static Boolean lockLock = new Boolean("true"); // Random object to lock on for the lock
+ // method
/**
* Number of consecutive deadlocks we have seen
@@ -1596,14 +1603,28 @@ public class TxnHandler {
String driverUrl = HiveConf.getVar(conf, HiveConf.ConfVars.METASTORECONNECTURLKEY);
String user = HiveConf.getVar(conf, HiveConf.ConfVars.METASTORE_CONNECTION_USER_NAME);
String passwd = HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREPWD);
+ String connectionPooler = HiveConf.getVar(conf,
+ HiveConf.ConfVars.METASTORE_CONNECTION_POOLING_TYPE).toLowerCase();
- BoneCPConfig config = new BoneCPConfig();
- config.setJdbcUrl(driverUrl);
- config.setMaxConnectionsPerPartition(10);
- config.setPartitionCount(1);
- config.setUser(user);
- config.setPassword(passwd);
- connPool = new BoneCP(config);
+ if ("bonecp".equals(connectionPooler)) {
+ BoneCPConfig config = new BoneCPConfig();
+ config.setJdbcUrl(driverUrl);
+ config.setMaxConnectionsPerPartition(10);
+ config.setPartitionCount(1);
+ config.setUser(user);
+ config.setPassword(passwd);
+ connPool = new BoneCPDataSource(config);
+ } else if ("dbcp".equals(connectionPooler)) {
+ ObjectPool objectPool = new GenericObjectPool();
+ ConnectionFactory connFactory = new DriverManagerConnectionFactory(driverUrl, user, passwd);
+ // This doesn't get used, but it's still necessary, see
+ // http://svn.apache.org/viewvc/commons/proper/dbcp/branches/DBCP_1_4_x_BRANCH/doc/ManualPoolingDataSourceExample.java?view=markup
+ PoolableConnectionFactory poolConnFactory =
+ new PoolableConnectionFactory(connFactory, objectPool, null, null, false, true);
+ connPool = new PoolingDataSource(objectPool);
+ } else {
+ throw new RuntimeException("Unknown JDBC connection pooling " + connectionPooler);
+ }
}
private static synchronized void buildJumpTable() {
Modified: hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java (original)
+++ hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java Tue Aug 5 07:23:02 2014
@@ -25,6 +25,7 @@ import java.util.Map;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.Function;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -36,6 +37,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.PartitionEventType;
+import org.apache.hadoop.hive.metastore.api.PartitionsStatsRequest;
import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
import org.apache.hadoop.hive.metastore.api.PrincipalType;
import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
@@ -709,5 +711,12 @@ public class DummyRawStoreControlledComm
return objectStore.getFunctions(dbName, pattern);
}
+ @Override
+ public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName,
+ String tblName, List<String> partNames, List<String> colNames)
+ throws MetaException {
+ return null;
+ }
+
}
Modified: hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java (original)
+++ hive/branches/cbo/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java Tue Aug 5 07:23:02 2014
@@ -26,6 +26,7 @@ import junit.framework.Assert;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.Function;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -37,6 +38,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.PartitionEventType;
+import org.apache.hadoop.hive.metastore.api.PartitionsStatsRequest;
import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
import org.apache.hadoop.hive.metastore.api.PrincipalType;
import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
@@ -726,7 +728,12 @@ public class DummyRawStoreForJdoConnecti
return null;
}
-
+ @Override
+ public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName,
+ String tblName, List<String> partNames, List<String> colNames)
+ throws MetaException {
+ return null;
+ }
}
Modified: hive/branches/cbo/pom.xml
URL: http://svn.apache.org/viewvc/hive/branches/cbo/pom.xml?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/pom.xml (original)
+++ hive/branches/cbo/pom.xml Tue Aug 5 07:23:02 2014
@@ -104,6 +104,8 @@
<commons-lang.version>2.4</commons-lang.version>
<commons-lang3.version>3.1</commons-lang3.version>
<commons-logging.version>1.1.3</commons-logging.version>
+ <commons-pool.version>1.5.4</commons-pool.version>
+ <commons-dbcp.version>1.4</commons-dbcp.version>
<derby.version>10.10.1.1</derby.version>
<guava.version>11.0.2</guava.version>
<groovy.version>2.1.6</groovy.version>
@@ -111,8 +113,8 @@
<hadoop-20S.version>1.2.1</hadoop-20S.version>
<hadoop-23.version>2.4.0</hadoop-23.version>
<hadoop.bin.path>${basedir}/${hive.path.to.root}/testutils/hadoop</hadoop.bin.path>
- <hbase.hadoop1.version>0.96.0-hadoop1</hbase.hadoop1.version>
- <hbase.hadoop2.version>0.96.0-hadoop2</hbase.hadoop2.version>
+ <hbase.hadoop1.version>0.98.3-hadoop1</hbase.hadoop1.version>
+ <hbase.hadoop2.version>0.98.3-hadoop2</hbase.hadoop2.version>
<!-- httpcomponents are not always in version sync -->
<httpcomponents.client.version>4.2.5</httpcomponents.client.version>
<httpcomponents.core.version>4.2.5</httpcomponents.core.version>
@@ -772,7 +774,7 @@
<test.warehouse.dir>${test.warehouse.scheme}${test.warehouse.dir}</test.warehouse.dir>
<java.net.preferIPv4Stack>true</java.net.preferIPv4Stack>
<!-- EnforceReadOnlyTables hook and QTestUtil -->
- <test.src.tables>src,src1,srcbucket,srcbucket2,src_json,src_thrift,src_sequencefile,srcpart,alltypesorc</test.src.tables>
+ <test.src.tables>src,src1,srcbucket,srcbucket2,src_json,src_thrift,src_sequencefile,srcpart,alltypesorc,src_hbase</test.src.tables>
<java.security.krb5.conf>${test.tmp.dir}/conf/krb5.conf</java.security.krb5.conf>
</systemPropertyVariables>
</configuration>
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Context.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Context.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Context.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Context.java Tue Aug 5 07:23:02 2014
@@ -39,7 +39,6 @@ import org.apache.hadoop.hive.ql.lockmgr
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.shims.ShimLoader;
-import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.StringUtils;
import java.io.DataInput;
@@ -54,8 +53,6 @@ import java.util.Map;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
-import javax.security.auth.login.LoginException;
-
/**
* Context for Semantic Analyzers. Usage: not reusable - construct a new one for
* each query should call clear() at end of use to remove temporary folders
@@ -337,7 +334,14 @@ public class Context {
* external URI to which the tmp data has to be eventually moved
* @return next available tmp path on the file system corresponding extURI
*/
- public Path getExternalTmpPath(URI extURI) {
+ public Path getExternalTmpPath(Path path) {
+ URI extURI = path.toUri();
+ if (extURI.getScheme().equals("viewfs")) {
+ // if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/..
+ // to final /user/hive/warehouse/ will fail later, so instead pick tmp dir
+ // on same namespace as tbl dir.
+ return getExtTmpPathRelTo(path.getParent());
+ }
return new Path(getExternalScratchDir(extURI), EXT_PREFIX +
nextPathId());
}
@@ -347,7 +351,8 @@ public class Context {
* within passed in uri, whereas getExternalTmpPath() ignores passed in path and returns temp
* path within /tmp
*/
- public Path getExtTmpPathRelTo(URI uri) {
+ public Path getExtTmpPathRelTo(Path path) {
+ URI uri = path.toUri();
return new Path (getScratchDir(uri.getScheme(), uri.getAuthority(), !explain,
uri.getPath() + Path.SEPARATOR + "_" + this.executionId), EXT_PREFIX + nextPathId());
}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Tue Aug 5 07:23:02 2014
@@ -82,6 +82,7 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer;
+import org.apache.hadoop.hive.ql.parse.ColumnAccessInfo;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHook;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContext;
import org.apache.hadoop.hive.ql.parse.HiveSemanticAnalyzerHookContextImpl;
@@ -503,8 +504,13 @@ public class Driver implements CommandPr
Hive db = sem.getDb();
if (ss.isAuthorizationModeV2()) {
- doAuthorizationV2(ss, op, inputs, outputs, command);
- return;
+ // get mapping of tables to columns used
+ ColumnAccessInfo colAccessInfo = sem.getColumnAccessInfo();
+ // colAccessInfo is set only in case of SemanticAnalyzer
+ Map<String, Set<String>> tab2Cols = colAccessInfo != null ? colAccessInfo
+ .getTableToColumnAccessMap() : null;
+ doAuthorizationV2(ss, op, inputs, outputs, command, tab2Cols);
+ return;
}
if (op == null) {
throw new HiveException("Operation should not be null");
@@ -583,56 +589,9 @@ public class Driver implements CommandPr
}
}
- //for a select or create-as-select query, populate the partition to column (par2Cols) or
- // table to columns mapping (tab2Cols)
- if (op.equals(HiveOperation.CREATETABLE_AS_SELECT)
- || op.equals(HiveOperation.QUERY)) {
- SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
- ParseContext parseCtx = querySem.getParseContext();
- Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();
-
- for (Map.Entry<String, Operator<? extends OperatorDesc>> topOpMap : querySem
- .getParseContext().getTopOps().entrySet()) {
- Operator<? extends OperatorDesc> topOp = topOpMap.getValue();
- if (topOp instanceof TableScanOperator
- && tsoTopMap.containsKey(topOp)) {
- TableScanOperator tableScanOp = (TableScanOperator) topOp;
- Table tbl = tsoTopMap.get(tableScanOp);
- List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
- List<FieldSchema> columns = tbl.getCols();
- List<String> cols = new ArrayList<String>();
- for (int i = 0; i < neededColumnIds.size(); i++) {
- cols.add(columns.get(neededColumnIds.get(i)).getName());
- }
- //map may not contain all sources, since input list may have been optimized out
- //or non-existent tho such sources may still be referenced by the TableScanOperator
- //if it's null then the partition probably doesn't exist so let's use table permission
- if (tbl.isPartitioned() &&
- tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
- String alias_id = topOpMap.getKey();
-
- PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp,
- parseCtx, alias_id);
- Set<Partition> parts = partsList.getPartitions();
- for (Partition part : parts) {
- List<String> existingCols = part2Cols.get(part);
- if (existingCols == null) {
- existingCols = new ArrayList<String>();
- }
- existingCols.addAll(cols);
- part2Cols.put(part, existingCols);
- }
- } else {
- List<String> existingCols = tab2Cols.get(tbl);
- if (existingCols == null) {
- existingCols = new ArrayList<String>();
- }
- existingCols.addAll(cols);
- tab2Cols.put(tbl, existingCols);
- }
- }
- }
- }
+ getTablePartitionUsedColumns(op, sem, tab2Cols, part2Cols, tableUsePartLevelAuth);
+
+
// cache the results for table authorization
Set<String> tableAuthChecked = new HashSet<String>();
@@ -683,8 +642,65 @@ public class Driver implements CommandPr
}
}
+ private static void getTablePartitionUsedColumns(HiveOperation op, BaseSemanticAnalyzer sem,
+ Map<Table, List<String>> tab2Cols, Map<Partition, List<String>> part2Cols,
+ Map<String, Boolean> tableUsePartLevelAuth) throws HiveException {
+ // for a select or create-as-select query, populate the partition to column
+ // (par2Cols) or
+ // table to columns mapping (tab2Cols)
+ if (op.equals(HiveOperation.CREATETABLE_AS_SELECT)
+ || op.equals(HiveOperation.QUERY)) {
+ SemanticAnalyzer querySem = (SemanticAnalyzer) sem;
+ ParseContext parseCtx = querySem.getParseContext();
+ Map<TableScanOperator, Table> tsoTopMap = parseCtx.getTopToTable();
+
+ for (Map.Entry<String, Operator<? extends OperatorDesc>> topOpMap : querySem
+ .getParseContext().getTopOps().entrySet()) {
+ Operator<? extends OperatorDesc> topOp = topOpMap.getValue();
+ if (topOp instanceof TableScanOperator
+ && tsoTopMap.containsKey(topOp)) {
+ TableScanOperator tableScanOp = (TableScanOperator) topOp;
+ Table tbl = tsoTopMap.get(tableScanOp);
+ List<Integer> neededColumnIds = tableScanOp.getNeededColumnIDs();
+ List<FieldSchema> columns = tbl.getCols();
+ List<String> cols = new ArrayList<String>();
+ for (int i = 0; i < neededColumnIds.size(); i++) {
+ cols.add(columns.get(neededColumnIds.get(i)).getName());
+ }
+ //map may not contain all sources, since input list may have been optimized out
+ //or non-existent tho such sources may still be referenced by the TableScanOperator
+ //if it's null then the partition probably doesn't exist so let's use table permission
+ if (tbl.isPartitioned() &&
+ tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
+ String alias_id = topOpMap.getKey();
+
+ PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp,
+ parseCtx, alias_id);
+ Set<Partition> parts = partsList.getPartitions();
+ for (Partition part : parts) {
+ List<String> existingCols = part2Cols.get(part);
+ if (existingCols == null) {
+ existingCols = new ArrayList<String>();
+ }
+ existingCols.addAll(cols);
+ part2Cols.put(part, existingCols);
+ }
+ } else {
+ List<String> existingCols = tab2Cols.get(tbl);
+ if (existingCols == null) {
+ existingCols = new ArrayList<String>();
+ }
+ existingCols.addAll(cols);
+ tab2Cols.put(tbl, existingCols);
+ }
+ }
+ }
+ }
+
+ }
+
private static void doAuthorizationV2(SessionState ss, HiveOperation op, HashSet<ReadEntity> inputs,
- HashSet<WriteEntity> outputs, String command) throws HiveException {
+ HashSet<WriteEntity> outputs, String command, Map<String, Set<String>> tab2cols) throws HiveException {
HiveAuthzContext.Builder authzContextBuilder = new HiveAuthzContext.Builder();
@@ -696,11 +712,34 @@ public class Driver implements CommandPr
HiveOperationType hiveOpType = getHiveOperationType(op);
List<HivePrivilegeObject> inputsHObjs = getHivePrivObjects(inputs);
+ updateInputColumnInfo(inputsHObjs, tab2cols);
+
List<HivePrivilegeObject> outputHObjs = getHivePrivObjects(outputs);
ss.getAuthorizerV2().checkPrivileges(hiveOpType, inputsHObjs, outputHObjs, authzContextBuilder.build());
return;
}
+ /**
+ * Add column information for input table objects
+ * @param inputsHObjs input HivePrivilegeObject
+ * @param map table to used input columns mapping
+ */
+ private static void updateInputColumnInfo(List<HivePrivilegeObject> inputsHObjs,
+ Map<String, Set<String>> tableName2Cols) {
+ if(tableName2Cols == null) {
+ return;
+ }
+ for(HivePrivilegeObject inputObj : inputsHObjs){
+ if(inputObj.getType() != HivePrivilegeObjectType.TABLE_OR_VIEW){
+ // input columns are relevant only for tables or views
+ continue;
+ }
+ Set<String> cols = tableName2Cols.get(Table.getCompleteName(inputObj.getDbname(),
+ inputObj.getObjectName()));
+ inputObj.setColumns(cols);
+ }
+ }
+
private static List<HivePrivilegeObject> getHivePrivObjects(HashSet<? extends Entity> privObjects) {
List<HivePrivilegeObject> hivePrivobjs = new ArrayList<HivePrivilegeObject>();
if(privObjects == null){
@@ -1213,7 +1252,8 @@ public class Driver implements CommandPr
}
resStream = null;
- HookContext hookContext = new HookContext(plan, conf, ctx.getPathToCS());
+ SessionState ss = SessionState.get();
+ HookContext hookContext = new HookContext(plan, conf, ctx.getPathToCS(), ss.getUserName(), ss.getUserIpAddress());
hookContext.setHookType(HookContext.HookType.PRE_EXEC_HOOK);
for (Hook peh : getHooks(HiveConf.ConfVars.PREEXECHOOKS)) {
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Tue Aug 5 07:23:02 2014
@@ -35,6 +35,7 @@ import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
+import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -88,9 +89,9 @@ import org.apache.hadoop.hive.ql.QueryPl
import org.apache.hadoop.hive.ql.exec.ArchiveUtils.PartSpecInfo;
import org.apache.hadoop.hive.ql.hooks.ReadEntity;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
+import org.apache.hadoop.hive.ql.io.merge.MergeTask;
+import org.apache.hadoop.hive.ql.io.merge.MergeWork;
import org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe;
-import org.apache.hadoop.hive.ql.io.rcfile.merge.BlockMergeTask;
-import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork;
import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateTask;
import org.apache.hadoop.hive.ql.io.rcfile.truncate.ColumnTruncateWork;
import org.apache.hadoop.hive.ql.lockmgr.DbLockManager;
@@ -550,12 +551,13 @@ public class DDLTask extends Task<DDLWor
throws HiveException {
// merge work only needs input and output.
MergeWork mergeWork = new MergeWork(mergeFilesDesc.getInputDir(),
- mergeFilesDesc.getOutputDir());
+ mergeFilesDesc.getOutputDir(), mergeFilesDesc.getInputFormatClass());
mergeWork.setListBucketingCtx(mergeFilesDesc.getLbCtx());
mergeWork.resolveConcatenateMerge(db.getConf());
mergeWork.setMapperCannotSpanPartns(true);
+ mergeWork.setSourceTableInputFormat(mergeFilesDesc.getInputFormatClass());
DriverContext driverCxt = new DriverContext();
- BlockMergeTask taskExec = new BlockMergeTask();
+ MergeTask taskExec = new MergeTask();
taskExec.initialize(db.getConf(), null, driverCxt);
taskExec.setWork(mergeWork);
taskExec.setQueryPlan(this.getQueryPlan());
@@ -598,10 +600,13 @@ public class DDLTask extends Task<DDLWor
HiveAuthorizer authorizer = getSessionAuthorizer();
try {
+ Set<String> colSet = showGrantDesc.getColumns() != null ? new HashSet<String>(
+ showGrantDesc.getColumns()) : null;
List<HivePrivilegeInfo> privInfos = authorizer.showPrivileges(
AuthorizationUtils.getHivePrincipal(showGrantDesc.getPrincipalDesc()),
AuthorizationUtils.getHivePrivilegeObject(showGrantDesc.getHiveObj(),
- showGrantDesc.getColumns()));
+ colSet
+ ));
boolean testMode = conf.getBoolVar(HiveConf.ConfVars.HIVE_IN_TEST);
writeToFile(writeGrantInfo(privInfos, testMode), showGrantDesc.getResFile());
} catch (IOException e) {
@@ -1283,7 +1288,7 @@ public class DDLTask extends Task<DDLWor
// First create the archive in a tmp dir so that if the job fails, the
// bad files don't pollute the filesystem
Path tmpPath = new Path(driverContext.getCtx()
- .getExternalTmpPath(originalDir.toUri()), "partlevel");
+ .getExternalTmpPath(originalDir), "partlevel");
console.printInfo("Creating " + archiveName +
" for " + originalDir.toString());
@@ -1478,7 +1483,7 @@ public class DDLTask extends Task<DDLWor
throw new HiveException("Haven't found any archive where it should be");
}
- Path tmpPath = driverContext.getCtx().getExternalTmpPath(originalDir.toUri());
+ Path tmpPath = driverContext.getCtx().getExternalTmpPath(originalDir);
try {
fs = tmpPath.getFileSystem(conf);
@@ -3919,12 +3924,16 @@ public class DDLTask extends Task<DDLWor
tbl.setInputFormatClass(crtTbl.getInputFormat());
tbl.setOutputFormatClass(crtTbl.getOutputFormat());
- tbl.getTTable().getSd().setInputFormat(
- tbl.getInputFormatClass().getName());
- tbl.getTTable().getSd().setOutputFormat(
- tbl.getOutputFormatClass().getName());
+ // only persist input/ouput format to metadata when it is explicitly specified.
+ // Otherwise, load lazily via StorageHandler at query time.
+ if (crtTbl.getInputFormat() != null && !crtTbl.getInputFormat().isEmpty()) {
+ tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
+ }
+ if (crtTbl.getOutputFormat() != null && !crtTbl.getOutputFormat().isEmpty()) {
+ tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
+ }
- if (!Utilities.isDefaultNameNode(conf)) {
+ if (!Utilities.isDefaultNameNode(conf) && tbl.getTTable().getSd().isSetLocation()) {
// If location is specified - ensure that it is a full qualified name
makeLocationQualified(tbl.getDbName(), tbl.getTTable().getSd(), tbl.getTableName());
}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/MoveTask.java Tue Aug 5 07:23:02 2014
@@ -36,7 +36,7 @@ import org.apache.hadoop.hive.ql.exec.mr
import org.apache.hadoop.hive.ql.hooks.LineageInfo.DataContainer;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils;
-import org.apache.hadoop.hive.ql.io.rcfile.merge.BlockMergeTask;
+import org.apache.hadoop.hive.ql.io.merge.MergeTask;
import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockManager;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
@@ -294,7 +294,7 @@ public class MoveTask extends Task<MoveW
while (task.getParentTasks() != null && task.getParentTasks().size() == 1) {
task = (Task)task.getParentTasks().get(0);
// If it was a merge task or a local map reduce task, nothing can be inferred
- if (task instanceof BlockMergeTask || task instanceof MapredLocalTask) {
+ if (task instanceof MergeTask || task instanceof MapredLocalTask) {
break;
}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Task.java Tue Aug 5 07:23:02 2014
@@ -165,7 +165,7 @@ public abstract class Task<T extends Ser
}
return retval;
} catch (IOException e) {
- throw new RuntimeException(e.getMessage());
+ throw new RuntimeException("Unexpected error: " + e.getMessage(), e);
}
}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java Tue Aug 5 07:23:02 2014
@@ -28,8 +28,8 @@ import org.apache.hadoop.hive.ql.exec.mr
import org.apache.hadoop.hive.ql.exec.tez.TezTask;
import org.apache.hadoop.hive.ql.index.IndexMetadataChangeTask;
import org.apache.hadoop.hive.ql.index.IndexMetadataChangeWork;
-import org.apache.hadoop.hive.ql.io.rcfile.merge.BlockMergeTask;
-import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork;
+import org.apache.hadoop.hive.ql.io.merge.MergeTask;
+import org.apache.hadoop.hive.ql.io.merge.MergeWork;
import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanTask;
import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork;
import org.apache.hadoop.hive.ql.plan.ColumnStatsWork;
@@ -93,7 +93,7 @@ public final class TaskFactory {
taskvec.add(new TaskTuple<StatsNoJobWork>(StatsNoJobWork.class, StatsNoJobTask.class));
taskvec.add(new TaskTuple<ColumnStatsWork>(ColumnStatsWork.class, ColumnStatsTask.class));
taskvec.add(new TaskTuple<MergeWork>(MergeWork.class,
- BlockMergeTask.class));
+ MergeTask.class));
taskvec.add(new TaskTuple<DependencyCollectionWork>(DependencyCollectionWork.class,
DependencyCollectionTask.class));
taskvec.add(new TaskTuple<PartialScanWork>(PartialScanWork.class,
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/Utilities.java Tue Aug 5 07:23:02 2014
@@ -120,7 +120,8 @@ import org.apache.hadoop.hive.ql.io.Hive
import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat;
import org.apache.hadoop.hive.ql.io.RCFile;
import org.apache.hadoop.hive.ql.io.ReworkMapredInputFormat;
-import org.apache.hadoop.hive.ql.io.rcfile.merge.MergeWork;
+import org.apache.hadoop.hive.ql.io.merge.MergeWork;
+import org.apache.hadoop.hive.ql.io.orc.OrcFileMergeMapper;
import org.apache.hadoop.hive.ql.io.rcfile.merge.RCFileMergeMapper;
import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanMapper;
import org.apache.hadoop.hive.ql.io.rcfile.stats.PartialScanWork;
@@ -346,7 +347,8 @@ public final class Utilities {
if(MAP_PLAN_NAME.equals(name)){
if (ExecMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))){
gWork = deserializePlan(in, MapWork.class, conf);
- } else if(RCFileMergeMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
+ } else if(RCFileMergeMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS)) ||
+ OrcFileMergeMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
gWork = deserializePlan(in, MergeWork.class, conf);
} else if(ColumnTruncateMapper.class.getName().equals(conf.get(MAPRED_MAPPER_CLASS))) {
gWork = deserializePlan(in, ColumnTruncateWork.class, conf);
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecDriver.java Tue Aug 5 07:23:02 2014
@@ -499,7 +499,7 @@ public class ExecDriver extends Task<Map
inputPaths.add(new Path(path));
}
- Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0).toUri());
+ Path tmpPath = context.getCtx().getExternalTmpPath(inputPaths.get(0));
Path partitionFile = new Path(tmpPath, ".partitions");
ShimLoader.getHadoopShims().setTotalOrderPartitionFile(job, partitionFile);
PartitionKeySampler sampler = new PartitionKeySampler();
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java Tue Aug 5 07:23:02 2014
@@ -39,7 +39,6 @@ import org.apache.hadoop.hive.ql.metadat
import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde2.Deserializer;
-import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -56,7 +55,7 @@ import org.apache.hadoop.util.StringUtil
/**
* ExecReducer is the generic Reducer class for Hive. Together with ExecMapper it is
* the bridge between the map-reduce framework and the Hive operator pipeline at
- * execution time. It's main responsabilities are:
+ * execution time. It's main responsibilities are:
*
* - Load and setup the operator pipeline from XML
* - Run the pipeline by transforming key, value pairs to records and forwarding them to the operators
@@ -66,8 +65,20 @@ import org.apache.hadoop.util.StringUtil
*/
public class ExecReducer extends MapReduceBase implements Reducer {
+ private static final Log LOG = LogFactory.getLog("ExecReducer");
private static final String PLAN_KEY = "__REDUCE_PLAN__";
+ // used to log memory usage periodically
+ private final MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
+ // Input value serde needs to be an array to support different SerDe
+ // for different tags
+ private final Deserializer[] inputValueDeserializer = new Deserializer[Byte.MAX_VALUE];
+ private final Object[] valueObject = new Object[Byte.MAX_VALUE];
+ private final List<Object> row = new ArrayList<Object>(Utilities.reduceFieldNameList.size());
+ private final boolean isLogInfoEnabled = LOG.isInfoEnabled();
+
+ // TODO: move to DynamicSerDe when it's ready
+ private Deserializer inputKeyDeserializer;
private JobConf jc;
private OutputCollector<?, ?> oc;
private Operator<?> reducer;
@@ -76,23 +87,13 @@ public class ExecReducer extends MapRedu
private boolean isTagged = false;
private long cntr = 0;
private long nextCntr = 1;
-
- public static final Log l4j = LogFactory.getLog("ExecReducer");
- private boolean isLogInfoEnabled = false;
-
- // used to log memory usage periodically
- private MemoryMXBean memoryMXBean;
-
- // TODO: move to DynamicSerDe when it's ready
- private Deserializer inputKeyDeserializer;
- // Input value serde needs to be an array to support different SerDe
- // for different tags
- private final SerDe[] inputValueDeserializer = new SerDe[Byte.MAX_VALUE];
-
- TableDesc keyTableDesc;
- TableDesc[] valueTableDesc;
-
- ObjectInspector[] rowObjectInspector;
+ private TableDesc keyTableDesc;
+ private TableDesc[] valueTableDesc;
+ private ObjectInspector[] rowObjectInspector;
+
+ // runtime objects
+ private transient Object keyObject;
+ private transient BytesWritable groupKey;
@Override
public void configure(JobConf job) {
@@ -100,20 +101,16 @@ public class ExecReducer extends MapRedu
ObjectInspector[] valueObjectInspector = new ObjectInspector[Byte.MAX_VALUE];
ObjectInspector keyObjectInspector;
- // Allocate the bean at the beginning -
- memoryMXBean = ManagementFactory.getMemoryMXBean();
- l4j.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax());
-
- isLogInfoEnabled = l4j.isInfoEnabled();
+ LOG.info("maximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax());
try {
- l4j.info("conf classpath = "
+ LOG.info("conf classpath = "
+ Arrays.asList(((URLClassLoader) job.getClassLoader()).getURLs()));
- l4j.info("thread classpath = "
+ LOG.info("thread classpath = "
+ Arrays.asList(((URLClassLoader) Thread.currentThread()
.getContextClassLoader()).getURLs()));
} catch (Exception e) {
- l4j.info("cannot get classpath: " + e.getMessage());
+ LOG.info("cannot get classpath: " + e.getMessage());
}
jc = job;
@@ -132,7 +129,7 @@ public class ExecReducer extends MapRedu
isTagged = gWork.getNeedsTagging();
try {
keyTableDesc = gWork.getKeyDesc();
- inputKeyDeserializer = (SerDe) ReflectionUtils.newInstance(keyTableDesc
+ inputKeyDeserializer = ReflectionUtils.newInstance(keyTableDesc
.getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputKeyDeserializer, null, keyTableDesc.getProperties(), null);
keyObjectInspector = inputKeyDeserializer.getObjectInspector();
@@ -140,7 +137,7 @@ public class ExecReducer extends MapRedu
for (int tag = 0; tag < gWork.getTagToValueDesc().size(); tag++) {
// We should initialize the SerDe with the TypeInfo when available.
valueTableDesc[tag] = gWork.getTagToValueDesc().get(tag);
- inputValueDeserializer[tag] = (SerDe) ReflectionUtils.newInstance(
+ inputValueDeserializer[tag] = ReflectionUtils.newInstance(
valueTableDesc[tag].getDeserializerClass(), null);
SerDeUtils.initializeSerDe(inputValueDeserializer[tag], null,
valueTableDesc[tag].getProperties(), null);
@@ -162,7 +159,7 @@ public class ExecReducer extends MapRedu
// initialize reduce operator tree
try {
- l4j.info(reducer.dump(0));
+ LOG.info(reducer.dump(0));
reducer.initialize(jc, rowObjectInspector);
} catch (Throwable e) {
abort = true;
@@ -175,13 +172,6 @@ public class ExecReducer extends MapRedu
}
}
- private Object keyObject;
- private final Object[] valueObject = new Object[Byte.MAX_VALUE];
-
- private BytesWritable groupKey;
-
- List<Object> row = new ArrayList<Object>(Utilities.reduceFieldNameList.size());
-
public void reduce(Object key, Iterator values, OutputCollector output,
Reporter reporter) throws IOException {
if (reducer.getDone()) {
@@ -212,7 +202,7 @@ public class ExecReducer extends MapRedu
groupKey = new BytesWritable();
} else {
// If a operator wants to do some work at the end of a group
- l4j.trace("End Group");
+ LOG.trace("End Group");
reducer.endGroup();
}
@@ -227,7 +217,7 @@ public class ExecReducer extends MapRedu
}
groupKey.set(keyWritable.get(), 0, keyWritable.getSize());
- l4j.trace("Start Group");
+ LOG.trace("Start Group");
reducer.setGroupKeyObject(keyObject);
reducer.startGroup();
}
@@ -253,7 +243,7 @@ public class ExecReducer extends MapRedu
cntr++;
if (cntr == nextCntr) {
long used_memory = memoryMXBean.getHeapMemoryUsage().getUsed();
- l4j.info("ExecReducer: processing " + cntr
+ LOG.info("ExecReducer: processing " + cntr
+ " rows: used memory = " + used_memory);
nextCntr = getNextCntr(cntr);
}
@@ -279,7 +269,7 @@ public class ExecReducer extends MapRedu
// Don't create a new object if we are already out of memory
throw (OutOfMemoryError) e;
} else {
- l4j.fatal(StringUtils.stringifyException(e));
+ LOG.fatal(StringUtils.stringifyException(e));
throw new RuntimeException(e);
}
}
@@ -301,17 +291,17 @@ public class ExecReducer extends MapRedu
// No row was processed
if (oc == null) {
- l4j.trace("Close called no row");
+ LOG.trace("Close called without any rows processed");
}
try {
if (groupKey != null) {
// If a operator wants to do some work at the end of a group
- l4j.trace("End Group");
+ LOG.trace("End Group");
reducer.endGroup();
}
if (isLogInfoEnabled) {
- l4j.info("ExecReducer: processed " + cntr + " rows: used memory = "
+ LOG.info("ExecReducer: processed " + cntr + " rows: used memory = "
+ memoryMXBean.getHeapMemoryUsage().getUsed());
}
@@ -322,7 +312,7 @@ public class ExecReducer extends MapRedu
} catch (Exception e) {
if (!abort) {
// signal new failure to map-reduce
- l4j.error("Hit error while closing operators - failing tree");
+ LOG.error("Hit error while closing operators - failing tree");
throw new RuntimeException("Hive Runtime Error while closing operators: "
+ e.getMessage(), e);
}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/hooks/HookContext.java Tue Aug 5 07:23:02 2014
@@ -23,7 +23,6 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
-import java.util.concurrent.ConcurrentHashMap;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.hive.conf.HiveConf;
@@ -51,13 +50,11 @@ public class HookContext {
private UserGroupInformation ugi;
private HookType hookType;
final private Map<String, ContentSummary> inputPathToContentSummary;
-
- public HookContext(QueryPlan queryPlan, HiveConf conf) throws Exception{
- this(queryPlan, conf, new ConcurrentHashMap<String, ContentSummary>());
- }
+ private final String ipAddress;
+ private final String userName;
public HookContext(QueryPlan queryPlan, HiveConf conf,
- Map<String, ContentSummary> inputPathToContentSummary) throws Exception {
+ Map<String, ContentSummary> inputPathToContentSummary, String userName, String ipAddress) throws Exception {
this.queryPlan = queryPlan;
this.conf = conf;
this.inputPathToContentSummary = inputPathToContentSummary;
@@ -69,6 +66,8 @@ public class HookContext {
if(SessionState.get() != null){
linfo = SessionState.get().getLineageState().getLineageInfo();
}
+ this.ipAddress = ipAddress;
+ this.userName = userName;
}
public QueryPlan getQueryPlan() {
@@ -143,7 +142,15 @@ public class HookContext {
this.hookType = hookType;
}
+ public String getIpAddress() {
+ return this.ipAddress;
+ }
+
public String getOperationName() {
return SessionState.get().getHiveOperation().name();
}
+
+ public String getUserName() {
+ return this.userName;
+ }
}
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/HiveInputFormat.java Tue Aug 5 07:23:02 2014
@@ -71,8 +71,20 @@ import org.apache.hadoop.util.Reflection
public class HiveInputFormat<K extends WritableComparable, V extends Writable>
implements InputFormat<K, V>, JobConfigurable {
- public static final String CLASS_NAME = HiveInputFormat.class.getName();
- public static final Log LOG = LogFactory.getLog(CLASS_NAME);
+ private static final String CLASS_NAME = HiveInputFormat.class.getName();
+ private static final Log LOG = LogFactory.getLog(CLASS_NAME);
+
+ /**
+ * A cache of InputFormat instances.
+ */
+ private static Map<Class, InputFormat<WritableComparable, Writable>> inputFormats
+ = new ConcurrentHashMap<Class, InputFormat<WritableComparable, Writable>>();
+
+ private JobConf job;
+
+ // both classes access by subclasses
+ protected Map<String, PartitionDesc> pathToPartitionInfo;
+ protected MapWork mrwork;
/**
* HiveInputSplit encapsulates an InputSplit with its corresponding
@@ -178,18 +190,10 @@ public class HiveInputFormat<K extends W
}
}
- JobConf job;
-
public void configure(JobConf job) {
this.job = job;
}
- /**
- * A cache of InputFormat instances.
- */
- protected static Map<Class, InputFormat<WritableComparable, Writable>> inputFormats
- = new ConcurrentHashMap<Class, InputFormat<WritableComparable, Writable>>();
-
public static InputFormat<WritableComparable, Writable> getInputFormatFromCache(
Class inputFormatClass, JobConf job) throws IOException {
@@ -248,9 +252,6 @@ public class HiveInputFormat<K extends W
return rr;
}
- protected Map<String, PartitionDesc> pathToPartitionInfo;
- MapWork mrwork = null;
-
protected void init(JobConf job) {
mrwork = Utilities.getMapWork(job);
pathToPartitionInfo = mrwork.getPathToPartitionInfo();
@@ -281,7 +282,6 @@ public class HiveInputFormat<K extends W
headerCount = Utilities.getHeaderCount(table);
footerCount = Utilities.getFooterCount(table, conf);
if (headerCount != 0 || footerCount != 0) {
-
// Input file has header or footer, cannot be splitted.
conf.setLong(
ShimLoader.getHadoopShims().getHadoopConfNames().get("MAPREDMINSPLITSIZE"),
Modified: hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java?rev=1615872&r1=1615871&r2=1615872&view=diff
==============================================================================
--- hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java (original)
+++ hive/branches/cbo/ql/src/java/org/apache/hadoop/hive/ql/io/orc/ReaderImpl.java Tue Aug 5 07:23:02 2014
@@ -34,6 +34,7 @@ import org.apache.hadoop.fs.FSDataInputS
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.orc.OrcProto.Type;
+import org.apache.hadoop.hive.ql.io.orc.OrcProto.UserMetadataItem;
import org.apache.hadoop.hive.ql.io.sarg.SearchArgument;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
@@ -60,6 +61,7 @@ final class ReaderImpl implements Reader
private final ObjectInspector inspector;
private long deserializedSize = -1;
private final Configuration conf;
+ private final List<Integer> versionList;
//serialized footer - Keeping this around for use by getFileMetaInfo()
// will help avoid cpu cycles spend in deserializing at cost of increased
@@ -306,6 +308,7 @@ final class ReaderImpl implements Reader
this.metadata = rInfo.metadata;
this.footer = rInfo.footer;
this.inspector = rInfo.inspector;
+ this.versionList = footerMetaData.versionList;
}
@@ -387,7 +390,8 @@ final class ReaderImpl implements Reader
ps.getCompression().toString(),
(int) ps.getCompressionBlockSize(),
(int) ps.getMetadataLength(),
- buffer
+ buffer,
+ ps.getVersionList()
);
}
@@ -446,18 +450,26 @@ final class ReaderImpl implements Reader
final int bufferSize;
final int metadataSize;
final ByteBuffer footerBuffer;
+ final List<Integer> versionList;
+
+ FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
+ ByteBuffer footerBuffer) {
+ this(compressionType, bufferSize, metadataSize, footerBuffer, null);
+ }
+
FileMetaInfo(String compressionType, int bufferSize, int metadataSize,
- ByteBuffer footerBuffer){
+ ByteBuffer footerBuffer, List<Integer> versionList){
this.compressionType = compressionType;
this.bufferSize = bufferSize;
this.metadataSize = metadataSize;
this.footerBuffer = footerBuffer;
+ this.versionList = versionList;
}
}
public FileMetaInfo getFileMetaInfo(){
return new FileMetaInfo(compressionKind.toString(), bufferSize,
- metadataSize, footerByteBuffer);
+ metadataSize, footerByteBuffer, versionList);
}
@@ -629,4 +641,11 @@ final class ReaderImpl implements Reader
return new Metadata(metadata);
}
+ List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics() {
+ return metadata.getStripeStatsList();
+ }
+
+ public List<UserMetadataItem> getOrcProtoUserMetadata() {
+ return footer.getMetadataList();
+ }
}