You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by br...@apache.org on 2014/08/10 03:33:55 UTC
svn commit: r1617040 [10/13] - in /hive/branches/spark: ./
ant/src/org/apache/hadoop/hive/ant/ beeline/
beeline/src/java/org/apache/hive/beeline/
common/src/java/org/apache/hadoop/hive/common/
common/src/java/org/apache/hadoop/hive/conf/ data/conf/ dat...
Modified: hive/branches/spark/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb (original)
+++ hive/branches/spark/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb Sun Aug 10 01:33:50 2014
@@ -1008,6 +1008,26 @@ class ColumnStatistics
::Thrift::Struct.generate_accessors self
end
+class AggrStats
+ include ::Thrift::Struct, ::Thrift::Struct_Union
+ COLSTATS = 1
+ PARTSFOUND = 2
+
+ FIELDS = {
+ COLSTATS => {:type => ::Thrift::Types::LIST, :name => 'colStats', :element => {:type => ::Thrift::Types::STRUCT, :class => ::ColumnStatisticsObj}},
+ PARTSFOUND => {:type => ::Thrift::Types::I64, :name => 'partsFound'}
+ }
+
+ def struct_fields; FIELDS; end
+
+ def validate
+ raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field colStats is unset!') unless @colStats
+ raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field partsFound is unset!') unless @partsFound
+ end
+
+ ::Thrift::Struct.generate_accessors self
+end
+
class Schema
include ::Thrift::Struct, ::Thrift::Struct_Union
FIELDSCHEMAS = 1
Modified: hive/branches/spark/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb (original)
+++ hive/branches/spark/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb Sun Aug 10 01:33:50 2014
@@ -1231,6 +1231,23 @@ module ThriftHiveMetastore
raise ::Thrift::ApplicationException.new(::Thrift::ApplicationException::MISSING_RESULT, 'get_partitions_statistics_req failed: unknown result')
end
+ def get_aggr_stats_for(request)
+ send_get_aggr_stats_for(request)
+ return recv_get_aggr_stats_for()
+ end
+
+ def send_get_aggr_stats_for(request)
+ send_message('get_aggr_stats_for', Get_aggr_stats_for_args, :request => request)
+ end
+
+ def recv_get_aggr_stats_for()
+ result = receive_message(Get_aggr_stats_for_result)
+ return result.success unless result.success.nil?
+ raise result.o1 unless result.o1.nil?
+ raise result.o2 unless result.o2.nil?
+ raise ::Thrift::ApplicationException.new(::Thrift::ApplicationException::MISSING_RESULT, 'get_aggr_stats_for failed: unknown result')
+ end
+
def delete_partition_column_statistics(db_name, tbl_name, part_name, col_name)
send_delete_partition_column_statistics(db_name, tbl_name, part_name, col_name)
return recv_delete_partition_column_statistics()
@@ -2824,6 +2841,19 @@ module ThriftHiveMetastore
write_result(result, oprot, 'get_partitions_statistics_req', seqid)
end
+ def process_get_aggr_stats_for(seqid, iprot, oprot)
+ args = read_args(iprot, Get_aggr_stats_for_args)
+ result = Get_aggr_stats_for_result.new()
+ begin
+ result.success = @handler.get_aggr_stats_for(args.request)
+ rescue ::NoSuchObjectException => o1
+ result.o1 = o1
+ rescue ::MetaException => o2
+ result.o2 = o2
+ end
+ write_result(result, oprot, 'get_aggr_stats_for', seqid)
+ end
+
def process_delete_partition_column_statistics(seqid, iprot, oprot)
args = read_args(iprot, Delete_partition_column_statistics_args)
result = Delete_partition_column_statistics_result.new()
@@ -6077,6 +6107,42 @@ module ThriftHiveMetastore
::Thrift::Struct.generate_accessors self
end
+ class Get_aggr_stats_for_args
+ include ::Thrift::Struct, ::Thrift::Struct_Union
+ REQUEST = 1
+
+ FIELDS = {
+ REQUEST => {:type => ::Thrift::Types::STRUCT, :name => 'request', :class => ::PartitionsStatsRequest}
+ }
+
+ def struct_fields; FIELDS; end
+
+ def validate
+ end
+
+ ::Thrift::Struct.generate_accessors self
+ end
+
+ class Get_aggr_stats_for_result
+ include ::Thrift::Struct, ::Thrift::Struct_Union
+ SUCCESS = 0
+ O1 = 1
+ O2 = 2
+
+ FIELDS = {
+ SUCCESS => {:type => ::Thrift::Types::STRUCT, :name => 'success', :class => ::AggrStats},
+ O1 => {:type => ::Thrift::Types::STRUCT, :name => 'o1', :class => ::NoSuchObjectException},
+ O2 => {:type => ::Thrift::Types::STRUCT, :name => 'o2', :class => ::MetaException}
+ }
+
+ def struct_fields; FIELDS; end
+
+ def validate
+ end
+
+ ::Thrift::Struct.generate_accessors self
+ end
+
class Delete_partition_column_statistics_args
include ::Thrift::Struct, ::Thrift::Struct_Union
DB_NAME = 1
Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java Sun Aug 10 01:33:50 2014
@@ -29,6 +29,7 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.Database;
@@ -156,7 +157,7 @@ public class HiveAlterHandler implements
destPath = new Path(newTblLoc);
destFs = wh.getFs(destPath);
// check that src and dest are on the same file system
- if (! equalsFileSystem(srcFs, destFs)) {
+ if (!FileUtils.equalsFileSystem(srcFs, destFs)) {
throw new InvalidOperationException("table new location " + destPath
+ " is on a different file system than the old location "
+ srcPath + ". This operation is not supported");
@@ -251,21 +252,6 @@ public class HiveAlterHandler implements
}
}
- /**
- * @param fs1
- * @param fs2
- * @return return true if both file system arguments point to same file system
- */
- private boolean equalsFileSystem(FileSystem fs1, FileSystem fs2) {
- //When file system cache is disabled, you get different FileSystem objects
- // for same file system, so '==' can't be used in such cases
- //FileSystem api doesn't have a .equals() function implemented, so using
- //the uri for comparison. FileSystem already uses uri+Configuration for
- //equality in its CACHE .
- //Once equality has been added in HDFS-4321, we should make use of it
- return fs1.getUri().equals(fs2.getUri());
- }
-
public Partition alterPartition(final RawStore msdb, Warehouse wh, final String dbname,
final String name, final List<String> part_vals, final Partition new_part)
throws InvalidOperationException, InvalidObjectException, AlreadyExistsException,
Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Sun Aug 10 01:33:50 2014
@@ -64,6 +64,7 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
import org.apache.hadoop.hive.metastore.api.AddPartitionsRequest;
import org.apache.hadoop.hive.metastore.api.AddPartitionsResult;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -423,7 +424,7 @@ public class HiveMetaStore extends Thrif
String partitionValidationRegex =
hiveConf.getVar(HiveConf.ConfVars.METASTORE_PARTITION_NAME_WHITELIST_PATTERN);
- if (partitionValidationRegex != null && partitionValidationRegex != "") {
+ if (partitionValidationRegex != null && !partitionValidationRegex.isEmpty()) {
partitionValidationPattern = Pattern.compile(partitionValidationRegex);
} else {
partitionValidationPattern = null;
@@ -4008,7 +4009,7 @@ public class HiveMetaStore extends Thrif
incrementCounter("drop_role");
firePreEvent(new PreAuthorizationCallEvent(this));
if (ADMIN.equals(roleName) || PUBLIC.equals(roleName)) {
- throw new MetaException(PUBLIC + "/" + ADMIN +" role can't be dropped.");
+ throw new MetaException(PUBLIC + "," + ADMIN + " roles can't be dropped.");
}
Boolean ret = null;
try {
@@ -4078,6 +4079,7 @@ public class HiveMetaStore extends Thrif
return ret;
}
+ @Override
public GrantRevokeRoleResponse grant_revoke_role(GrantRevokeRoleRequest request)
throws MetaException, org.apache.thrift.TException {
GrantRevokeRoleResponse response = new GrantRevokeRoleResponse();
@@ -4975,6 +4977,25 @@ public class HiveMetaStore extends Thrif
return rolePrinGrantList;
}
+ @Override
+ public AggrStats get_aggr_stats_for(PartitionsStatsRequest request)
+ throws NoSuchObjectException, MetaException, TException {
+ startFunction("get_aggr_stats_for: db=" + request.getDbName() + " table=" + request.getTblName());
+ AggrStats aggrStats = null;
+ try {
+ //TODO: We are setting partitionCnt for which we were able to retrieve stats same as
+ // incoming number from request. This is not correct, but currently no users of this api
+ // rely on this. Only, current user StatsAnnotation don't care for it. StatsOptimizer
+ // will care for it, so before StatsOptimizer begin using it, we need to fix this.
+ aggrStats = new AggrStats(getMS().get_aggr_stats_for(request.getDbName(),
+ request.getTblName(), request.getPartNames(), request.getColNames()), request.getPartNames().size());
+ return aggrStats;
+ } finally {
+ endFunction("get_partitions_statistics_req: ", aggrStats == null, null, request.getTblName());
+ }
+
+ }
+
}
public static IHMSHandler newHMSHandler(String name, HiveConf hiveConf) throws MetaException {
Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java Sun Aug 10 01:33:50 2014
@@ -52,6 +52,7 @@ import org.apache.hadoop.hive.conf.HiveC
import org.apache.hadoop.hive.metastore.api.AbortTxnRequest;
import org.apache.hadoop.hive.metastore.api.AddPartitionsRequest;
import org.apache.hadoop.hive.metastore.api.AddPartitionsResult;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.CheckLockRequest;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
@@ -1820,4 +1821,11 @@ public class HiveMetaStoreClient impleme
NoSuchObjectException, UnsupportedOperationException {
client.drop_table_with_environment_context(dbname, name, deleteData, envContext);
}
+
+ @Override
+ public AggrStats getAggrColStatsFor(String dbName, String tblName,
+ List<String> colNames, List<String> partNames) throws NoSuchObjectException, MetaException, TException {
+ PartitionsStatsRequest req = new PartitionsStatsRequest(dbName, tblName, colNames, partNames);
+ return client.get_aggr_stats_for(req);
+ }
}
Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java Sun Aug 10 01:33:50 2014
@@ -38,6 +38,7 @@ import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.AlreadyExistsException;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
@@ -1290,4 +1291,7 @@ public interface IMetaStoreClient {
*/
GetRoleGrantsForPrincipalResponse get_role_grants_for_principal(
GetRoleGrantsForPrincipalRequest getRolePrincReq) throws MetaException, TException;
+
+ public AggrStats getAggrColStatsFor(String dbName, String tblName,
+ List<String> colNames, List<String> partName) throws NoSuchObjectException, MetaException, TException;
}
Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java Sun Aug 10 01:33:50 2014
@@ -309,7 +309,7 @@ class MetaStoreDirectSql {
StringBuilder partSb = new StringBuilder(sbCapacity);
// Assume db and table names are the same for all partition, that's what we're selecting for.
for (Object partitionId : sqlResult) {
- partSb.append(extractSqlLong(partitionId)).append(",");
+ partSb.append(StatObjectConverter.extractSqlLong(partitionId)).append(",");
}
String partIds = trimCommaList(partSb);
timingTrace(doTrace, queryText, start, queryTime);
@@ -346,10 +346,10 @@ class MetaStoreDirectSql {
dbName = dbName.toLowerCase();
for (Object[] fields : sqlResult2) {
// Here comes the ugly part...
- long partitionId = extractSqlLong(fields[0]);
- Long sdId = extractSqlLong(fields[1]);
- Long colId = extractSqlLong(fields[2]);
- Long serdeId = extractSqlLong(fields[3]);
+ long partitionId = StatObjectConverter.extractSqlLong(fields[0]);
+ Long sdId = StatObjectConverter.extractSqlLong(fields[1]);
+ Long colId = StatObjectConverter.extractSqlLong(fields[2]);
+ Long serdeId = StatObjectConverter.extractSqlLong(fields[3]);
// A partition must have either everything set, or nothing set if it's a view.
if (sdId == null || colId == null || serdeId == null) {
if (isView == null) {
@@ -427,6 +427,7 @@ class MetaStoreDirectSql {
+ " where \"PART_ID\" in (" + partIds + ") and \"PARAM_KEY\" is not null"
+ " order by \"PART_ID\" asc";
loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {
+ @Override
public void apply(Partition t, Object[] fields) {
t.putToParameters((String)fields[1], (String)fields[2]);
}});
@@ -435,6 +436,7 @@ class MetaStoreDirectSql {
+ " where \"PART_ID\" in (" + partIds + ") and \"INTEGER_IDX\" >= 0"
+ " order by \"PART_ID\" asc, \"INTEGER_IDX\" asc";
loopJoinOrderedResult(partitions, queryText, 0, new ApplyFunc<Partition>() {
+ @Override
public void apply(Partition t, Object[] fields) {
t.addToValues((String)fields[1]);
}});
@@ -452,6 +454,7 @@ class MetaStoreDirectSql {
+ " where \"SD_ID\" in (" + sdIds + ") and \"PARAM_KEY\" is not null"
+ " order by \"SD_ID\" asc";
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+ @Override
public void apply(StorageDescriptor t, Object[] fields) {
t.putToParameters((String)fields[1], (String)fields[2]);
}});
@@ -460,6 +463,7 @@ class MetaStoreDirectSql {
+ " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0"
+ " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+ @Override
public void apply(StorageDescriptor t, Object[] fields) {
if (fields[2] == null) return;
t.addToSortCols(new Order((String)fields[1], extractSqlInt(fields[2])));
@@ -469,6 +473,7 @@ class MetaStoreDirectSql {
+ " where \"SD_ID\" in (" + sdIds + ") and \"INTEGER_IDX\" >= 0"
+ " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+ @Override
public void apply(StorageDescriptor t, Object[] fields) {
t.addToBucketCols((String)fields[1]);
}});
@@ -479,6 +484,7 @@ class MetaStoreDirectSql {
+ " order by \"SD_ID\" asc, \"INTEGER_IDX\" asc";
boolean hasSkewedColumns =
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
+ @Override
public void apply(StorageDescriptor t, Object[] fields) {
if (!t.isSetSkewedInfo()) t.setSkewedInfo(new SkewedInfo());
t.getSkewedInfo().addToSkewedColNames((String)fields[1]);
@@ -502,6 +508,7 @@ class MetaStoreDirectSql {
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
private Long currentListId;
private List<String> currentList;
+ @Override
public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
if (!t.isSetSkewedInfo()) t.setSkewedInfo(new SkewedInfo());
// Note that this is not a typical list accumulator - there's no call to finalize
@@ -511,7 +518,7 @@ class MetaStoreDirectSql {
currentListId = null;
t.getSkewedInfo().addToSkewedColValues(new ArrayList<String>());
} else {
- long fieldsListId = extractSqlLong(fields[1]);
+ long fieldsListId = StatObjectConverter.extractSqlLong(fields[1]);
if (currentListId == null || fieldsListId != currentListId) {
currentList = new ArrayList<String>();
currentListId = fieldsListId;
@@ -539,6 +546,7 @@ class MetaStoreDirectSql {
loopJoinOrderedResult(sds, queryText, 0, new ApplyFunc<StorageDescriptor>() {
private Long currentListId;
private List<String> currentList;
+ @Override
public void apply(StorageDescriptor t, Object[] fields) throws MetaException {
if (!t.isSetSkewedInfo()) {
SkewedInfo skewedInfo = new SkewedInfo();
@@ -552,7 +560,7 @@ class MetaStoreDirectSql {
currentList = new ArrayList<String>(); // left outer join produced a list with no values
currentListId = null;
} else {
- long fieldsListId = extractSqlLong(fields[1]);
+ long fieldsListId = StatObjectConverter.extractSqlLong(fields[1]);
if (currentListId == null || fieldsListId != currentListId) {
currentList = new ArrayList<String>();
currentListId = fieldsListId;
@@ -572,6 +580,7 @@ class MetaStoreDirectSql {
+ " from \"COLUMNS_V2\" where \"CD_ID\" in (" + colIds + ") and \"INTEGER_IDX\" >= 0"
+ " order by \"CD_ID\" asc, \"INTEGER_IDX\" asc";
loopJoinOrderedResult(colss, queryText, 0, new ApplyFunc<List<FieldSchema>>() {
+ @Override
public void apply(List<FieldSchema> t, Object[] fields) {
t.add(new FieldSchema((String)fields[2], (String)fields[3], (String)fields[1]));
}});
@@ -582,6 +591,7 @@ class MetaStoreDirectSql {
+ " where \"SERDE_ID\" in (" + serdeIds + ") and \"PARAM_KEY\" is not null"
+ " order by \"SERDE_ID\" asc";
loopJoinOrderedResult(serdes, queryText, 0, new ApplyFunc<SerDeInfo>() {
+ @Override
public void apply(SerDeInfo t, Object[] fields) {
t.putToParameters((String)fields[1], (String)fields[2]);
}});
@@ -589,14 +599,6 @@ class MetaStoreDirectSql {
return orderedResult;
}
- private Long extractSqlLong(Object obj) throws MetaException {
- if (obj == null) return null;
- if (!(obj instanceof Number)) {
- throw new MetaException("Expected numeric type but got " + obj.getClass().getName());
- }
- return ((Number)obj).longValue();
- }
-
private void timingTrace(boolean doTrace, String queryText, long start, long queryTime) {
if (!doTrace) return;
LOG.debug("Direct SQL query in " + (queryTime - start) / 1000000.0 + "ms + " +
@@ -664,7 +666,7 @@ class MetaStoreDirectSql {
if (fields == null) {
fields = iter.next();
}
- long nestedId = extractSqlLong(fields[keyIndex]);
+ long nestedId = StatObjectConverter.extractSqlLong(fields[keyIndex]);
if (nestedId < id) throw new MetaException("Found entries for unknown ID " + nestedId);
if (nestedId > id) break; // fields belong to one of the next entries
func.apply(entry.getValue(), fields);
@@ -891,19 +893,49 @@ class MetaStoreDirectSql {
return result;
}
- public List<ColumnStatistics> getPartitionStats(String dbName, String tableName,
+ public List<ColumnStatisticsObj> aggrColStatsForPartitions(String dbName, String tableName,
List<String> partNames, List<String> colNames) throws MetaException {
- if (colNames.isEmpty() || partNames.isEmpty()) {
- return Lists.newArrayList();
- }
- boolean doTrace = LOG.isDebugEnabled();
- long start = doTrace ? System.nanoTime() : 0;
- String queryText = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from \"PART_COL_STATS\""
+ String qText = "select \"COLUMN_NAME\", \"COLUMN_TYPE\", "
+ + "min(\"LONG_LOW_VALUE\"), max(\"LONG_HIGH_VALUE\"), min(\"DOUBLE_LOW_VALUE\"), max(\"DOUBLE_HIGH_VALUE\"), "
+ + "min(\"BIG_DECIMAL_LOW_VALUE\"), max(\"BIG_DECIMAL_HIGH_VALUE\"), sum(\"NUM_NULLS\"), max(\"NUM_DISTINCTS\"), "
+ + "max(\"AVG_COL_LEN\"), max(\"MAX_COL_LEN\"), sum(\"NUM_TRUES\"), sum(\"NUM_FALSES\") from \"PART_COL_STATS\""
+ " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in ("
+ makeParams(colNames.size()) + ") AND \"PARTITION_NAME\" in ("
- + makeParams(partNames.size()) + ") order by \"PARTITION_NAME\"";
+ + makeParams(partNames.size()) + ") group by \"COLUMN_NAME\", \"COLUMN_TYPE\"";
+
+ boolean doTrace = LOG.isDebugEnabled();
+ long start = doTrace ? System.nanoTime() : 0;
+ Query query = pm.newQuery("javax.jdo.query.SQL", qText);
+ Object qResult = query.executeWithArray(prepareParams(dbName, tableName, partNames, colNames));
+ if (qResult == null) {
+ query.closeAll();
+ return Lists.newArrayList();
+ }
+ List<Object[]> list = ensureList(qResult);
+ List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(list.size());
+ for (Object[] row : list) {
+ colStats.add(prepareCSObj(row,0));
+ }
+ long end = doTrace ? System.nanoTime() : 0;
+ timingTrace(doTrace, qText, start, end);
+ query.closeAll();
+ return colStats;
+ }
+
+ private ColumnStatisticsObj prepareCSObj (Object[] row, int i) throws MetaException {
+ ColumnStatisticsData data = new ColumnStatisticsData();
+ ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data);
+ Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
+ declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
+ avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++];
+ StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
+ llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses);
+ return cso;
+ }
+
+ private Object[] prepareParams(String dbName, String tableName, List<String> partNames,
+ List<String> colNames) throws MetaException {
- Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
Object[] params = new Object[colNames.size() + partNames.size() + 2];
int paramI = 0;
params[paramI++] = dbName;
@@ -914,7 +946,24 @@ class MetaStoreDirectSql {
for (String partName : partNames) {
params[paramI++] = partName;
}
- Object qResult = query.executeWithArray(params);
+
+ return params;
+ }
+
+ public List<ColumnStatistics> getPartitionStats(String dbName, String tableName,
+ List<String> partNames, List<String> colNames) throws MetaException {
+ if (colNames.isEmpty() || partNames.isEmpty()) {
+ return Lists.newArrayList();
+ }
+ boolean doTrace = LOG.isDebugEnabled();
+ long start = doTrace ? System.nanoTime() : 0;
+ String queryText = "select \"PARTITION_NAME\", " + STATS_COLLIST + " from \"PART_COL_STATS\""
+ + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in ("
+ + makeParams(colNames.size()) + ") AND \"PARTITION_NAME\" in ("
+ + makeParams(partNames.size()) + ") order by \"PARTITION_NAME\"";
+
+ Query query = pm.newQuery("javax.jdo.query.SQL", queryText);
+ Object qResult = query.executeWithArray(prepareParams(dbName, tableName, partNames, colNames));
long queryTime = doTrace ? System.nanoTime() : 0;
if (qResult == null) {
query.closeAll();
@@ -960,19 +1009,10 @@ class MetaStoreDirectSql {
// LastAnalyzed is stored per column but thrift has it per several;
// get the lowest for now as nobody actually uses this field.
Object laObj = row[offset + 14];
- if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > extractSqlLong(laObj))) {
- csd.setLastAnalyzed(extractSqlLong(laObj));
+ if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() > StatObjectConverter.extractSqlLong(laObj))) {
+ csd.setLastAnalyzed(StatObjectConverter.extractSqlLong(laObj));
}
- ColumnStatisticsData data = new ColumnStatisticsData();
- // see STATS_COLLIST
- int i = offset;
- ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], (String)row[i++], data);
- Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = row[i++],
- declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = row[i++],
- avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = row[i++];
- StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
- llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, maxlen, trues, falses);
- csos.add(cso);
+ csos.add(prepareCSObj(row, offset));
}
result.setStatsObj(csos);
return result;
Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java Sun Aug 10 01:33:50 2014
@@ -5901,6 +5901,29 @@ public class ObjectStore implements RawS
}.run(true);
}
+
+ @Override
+ public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName, String tblName,
+ final List<String> partNames, final List<String> colNames) throws MetaException, NoSuchObjectException {
+
+ return new GetListHelper<ColumnStatisticsObj>(dbName, tblName, true, false) {
+ @Override
+ protected List<ColumnStatisticsObj> getSqlResult(
+ GetHelper<List<ColumnStatisticsObj>> ctx) throws MetaException {
+ return directSql.aggrColStatsForPartitions(dbName, tblName, partNames, colNames);
+ }
+
+ @Override
+ protected List<ColumnStatisticsObj> getJdoResult(
+ GetHelper<List<ColumnStatisticsObj>> ctx) throws MetaException,
+ NoSuchObjectException {
+ // This is fast path for query optimizations, if we can find this info quickly using
+ // directSql, do it. No point in failing back to slow path here.
+ throw new MetaException("Jdo path is not implemented for stats aggr.");
+ }
+ }.run(true);
+ }
+
private List<MPartitionColumnStatistics> getMPartitionColumnStatistics(
Table table, List<String> partNames, List<String> colNames)
throws NoSuchObjectException, MetaException {
@@ -6747,5 +6770,4 @@ public class ObjectStore implements RawS
}
return funcs;
}
-
}
Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java Sun Aug 10 01:33:50 2014
@@ -27,6 +27,7 @@ import java.util.Map;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.Function;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -547,4 +548,6 @@ public interface RawStore extends Config
*/
public List<String> getFunctions(String dbName, String pattern) throws MetaException;
+ public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName, String tblName,
+ List<String> partNames, List<String> colNames) throws MetaException, NoSuchObjectException;
}
Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java Sun Aug 10 01:33:50 2014
@@ -352,61 +352,61 @@ public class StatObjectConverter {
// SQL
public static void fillColumnStatisticsData(String colType, ColumnStatisticsData data,
Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, Object dechigh,
- Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses) {
+ Object nulls, Object dist, Object avglen, Object maxlen, Object trues, Object falses) throws MetaException {
if (colType.equals("boolean")) {
BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
- boolStats.setNumFalses((Long)falses);
- boolStats.setNumTrues((Long)trues);
- boolStats.setNumNulls((Long)nulls);
+ boolStats.setNumFalses(extractSqlLong(falses));
+ boolStats.setNumTrues(extractSqlLong(trues));
+ boolStats.setNumNulls(extractSqlLong(nulls));
data.setBooleanStats(boolStats);
} else if (colType.equals("string") ||
colType.startsWith("varchar") || colType.startsWith("char")) {
StringColumnStatsData stringStats = new StringColumnStatsData();
- stringStats.setNumNulls((Long)nulls);
+ stringStats.setNumNulls(extractSqlLong(nulls));
stringStats.setAvgColLen((Double)avglen);
- stringStats.setMaxColLen((Long)maxlen);
- stringStats.setNumDVs((Long)dist);
+ stringStats.setMaxColLen(extractSqlLong(maxlen));
+ stringStats.setNumDVs(extractSqlLong(dist));
data.setStringStats(stringStats);
} else if (colType.equals("binary")) {
BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
- binaryStats.setNumNulls((Long)nulls);
+ binaryStats.setNumNulls(extractSqlLong(nulls));
binaryStats.setAvgColLen((Double)avglen);
- binaryStats.setMaxColLen((Long)maxlen);
+ binaryStats.setMaxColLen(extractSqlLong(maxlen));
data.setBinaryStats(binaryStats);
} else if (colType.equals("bigint") || colType.equals("int") ||
colType.equals("smallint") || colType.equals("tinyint") ||
colType.equals("timestamp")) {
LongColumnStatsData longStats = new LongColumnStatsData();
- longStats.setNumNulls((Long)nulls);
+ longStats.setNumNulls(extractSqlLong(nulls));
if (lhigh != null) {
- longStats.setHighValue((Long)lhigh);
+ longStats.setHighValue(extractSqlLong(lhigh));
}
if (llow != null) {
- longStats.setLowValue((Long)llow);
+ longStats.setLowValue(extractSqlLong(llow));
}
- longStats.setNumDVs((Long)dist);
+ longStats.setNumDVs(extractSqlLong(dist));
data.setLongStats(longStats);
} else if (colType.equals("double") || colType.equals("float")) {
DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
- doubleStats.setNumNulls((Long)nulls);
+ doubleStats.setNumNulls(extractSqlLong(nulls));
if (dhigh != null) {
doubleStats.setHighValue((Double)dhigh);
}
if (dlow != null) {
doubleStats.setLowValue((Double)dlow);
}
- doubleStats.setNumDVs((Long)dist);
+ doubleStats.setNumDVs(extractSqlLong(dist));
data.setDoubleStats(doubleStats);
} else if (colType.startsWith("decimal")) {
DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
- decimalStats.setNumNulls((Long)nulls);
+ decimalStats.setNumNulls(extractSqlLong(nulls));
if (dechigh != null) {
decimalStats.setHighValue(createThriftDecimal((String)dechigh));
}
if (declow != null) {
decimalStats.setLowValue(createThriftDecimal((String)declow));
}
- decimalStats.setNumDVs((Long)dist);
+ decimalStats.setNumDVs(extractSqlLong(dist));
data.setDecimalStats(decimalStats);
}
}
@@ -419,4 +419,12 @@ public class StatObjectConverter {
private static String createJdoDecimalString(Decimal d) {
return new BigDecimal(new BigInteger(d.getUnscaled()), d.getScale()).toString();
}
+
+ static Long extractSqlLong(Object obj) throws MetaException {
+ if (obj == null) return null;
+ if (!(obj instanceof Number)) {
+ throw new MetaException("Expected numeric type but got " + obj.getClass().getName());
+ }
+ return ((Number)obj).longValue();
+ }
}
Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnDbUtil.java Sun Aug 10 01:33:50 2014
@@ -18,6 +18,7 @@
package org.apache.hadoop.hive.metastore.txn;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.shims.ShimLoader;
import java.sql.Connection;
import java.sql.Driver;
@@ -201,7 +202,8 @@ public class TxnDbUtil {
Properties prop = new Properties();
String driverUrl = HiveConf.getVar(conf, HiveConf.ConfVars.METASTORECONNECTURLKEY);
String user = HiveConf.getVar(conf, HiveConf.ConfVars.METASTORE_CONNECTION_USER_NAME);
- String passwd = HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREPWD);
+ String passwd = ShimLoader.getHadoopShims().getPassword(conf,
+ HiveConf.ConfVars.METASTOREPWD.varname);
prop.put("user", user);
prop.put("password", passwd);
return driver.connect(driverUrl, prop);
Modified: hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java (original)
+++ hive/branches/spark/metastore/src/java/org/apache/hadoop/hive/metastore/txn/TxnHandler.java Sun Aug 10 01:33:50 2014
@@ -32,9 +32,12 @@ import org.apache.hadoop.hive.common.Val
import org.apache.hadoop.hive.common.ValidTxnListImpl;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.*;
+import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.util.StringUtils;
import javax.sql.DataSource;
+
+import java.io.IOException;
import java.sql.*;
import java.util.*;
@@ -1602,7 +1605,13 @@ public class TxnHandler {
String driverUrl = HiveConf.getVar(conf, HiveConf.ConfVars.METASTORECONNECTURLKEY);
String user = HiveConf.getVar(conf, HiveConf.ConfVars.METASTORE_CONNECTION_USER_NAME);
- String passwd = HiveConf.getVar(conf, HiveConf.ConfVars.METASTOREPWD);
+ String passwd;
+ try {
+ passwd = ShimLoader.getHadoopShims().getPassword(conf,
+ HiveConf.ConfVars.METASTOREPWD.varname);
+ } catch (IOException err) {
+ throw new SQLException("Error getting metastore password", err);
+ }
String connectionPooler = HiveConf.getVar(conf,
HiveConf.ConfVars.METASTORE_CONNECTION_POOLING_TYPE).toLowerCase();
Modified: hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java (original)
+++ hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java Sun Aug 10 01:33:50 2014
@@ -25,6 +25,7 @@ import java.util.Map;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.Function;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -36,6 +37,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.PartitionEventType;
+import org.apache.hadoop.hive.metastore.api.PartitionsStatsRequest;
import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
import org.apache.hadoop.hive.metastore.api.PrincipalType;
import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
@@ -709,5 +711,12 @@ public class DummyRawStoreControlledComm
return objectStore.getFunctions(dbName, pattern);
}
+ @Override
+ public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName,
+ String tblName, List<String> partNames, List<String> colNames)
+ throws MetaException {
+ return null;
+ }
+
}
Modified: hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java (original)
+++ hive/branches/spark/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java Sun Aug 10 01:33:50 2014
@@ -26,6 +26,7 @@ import junit.framework.Assert;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
+import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hadoop.hive.metastore.api.Database;
import org.apache.hadoop.hive.metastore.api.Function;
import org.apache.hadoop.hive.metastore.api.HiveObjectPrivilege;
@@ -37,6 +38,7 @@ import org.apache.hadoop.hive.metastore.
import org.apache.hadoop.hive.metastore.api.NoSuchObjectException;
import org.apache.hadoop.hive.metastore.api.Partition;
import org.apache.hadoop.hive.metastore.api.PartitionEventType;
+import org.apache.hadoop.hive.metastore.api.PartitionsStatsRequest;
import org.apache.hadoop.hive.metastore.api.PrincipalPrivilegeSet;
import org.apache.hadoop.hive.metastore.api.PrincipalType;
import org.apache.hadoop.hive.metastore.api.PrivilegeBag;
@@ -726,7 +728,12 @@ public class DummyRawStoreForJdoConnecti
return null;
}
-
+ @Override
+ public List<ColumnStatisticsObj> get_aggr_stats_for(String dbName,
+ String tblName, List<String> partNames, List<String> colNames)
+ throws MetaException {
+ return null;
+ }
}
Modified: hive/branches/spark/pom.xml
URL: http://svn.apache.org/viewvc/hive/branches/spark/pom.xml?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/pom.xml (original)
+++ hive/branches/spark/pom.xml Sun Aug 10 01:33:50 2014
@@ -113,8 +113,8 @@
<hadoop-20S.version>1.2.1</hadoop-20S.version>
<hadoop-23.version>2.4.0</hadoop-23.version>
<hadoop.bin.path>${basedir}/${hive.path.to.root}/testutils/hadoop</hadoop.bin.path>
- <hbase.hadoop1.version>0.96.0-hadoop1</hbase.hadoop1.version>
- <hbase.hadoop2.version>0.96.0-hadoop2</hbase.hadoop2.version>
+ <hbase.hadoop1.version>0.98.3-hadoop1</hbase.hadoop1.version>
+ <hbase.hadoop2.version>0.98.3-hadoop2</hbase.hadoop2.version>
<!-- httpcomponents are not always in version sync -->
<httpcomponents.client.version>4.2.5</httpcomponents.client.version>
<httpcomponents.core.version>4.2.5</httpcomponents.core.version>
@@ -777,7 +777,7 @@
<test.warehouse.dir>${test.warehouse.scheme}${test.warehouse.dir}</test.warehouse.dir>
<java.net.preferIPv4Stack>true</java.net.preferIPv4Stack>
<!-- EnforceReadOnlyTables hook and QTestUtil -->
- <test.src.tables>src,src1,srcbucket,srcbucket2,src_json,src_thrift,src_sequencefile,srcpart,alltypesorc</test.src.tables>
+ <test.src.tables>src,src1,srcbucket,srcbucket2,src_json,src_thrift,src_sequencefile,srcpart,alltypesorc,src_hbase</test.src.tables>
<java.security.krb5.conf>${test.tmp.dir}/conf/krb5.conf</java.security.krb5.conf>
</systemPropertyVariables>
</configuration>
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/Driver.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/Driver.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/Driver.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/Driver.java Sun Aug 10 01:33:50 2014
@@ -608,7 +608,7 @@ public class Driver implements CommandPr
Partition partition = read.getPartition();
tbl = partition.getTable();
// use partition level authorization
- if (tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
+ if (Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) {
List<String> cols = part2Cols.get(partition);
if (cols != null && cols.size() > 0) {
authorizer.authorize(partition.getTable(),
@@ -626,7 +626,7 @@ public class Driver implements CommandPr
// check, and the table authorization may already happened because of other
// partitions
if (tbl != null && !tableAuthChecked.contains(tbl.getTableName()) &&
- !(tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE)) {
+ !(Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName())))) {
List<String> cols = tab2Cols.get(tbl);
if (cols != null && cols.size() > 0) {
authorizer.authorize(tbl, null, cols,
@@ -671,7 +671,7 @@ public class Driver implements CommandPr
//or non-existent tho such sources may still be referenced by the TableScanOperator
//if it's null then the partition probably doesn't exist so let's use table permission
if (tbl.isPartitioned() &&
- tableUsePartLevelAuth.get(tbl.getTableName()) == Boolean.TRUE) {
+ Boolean.TRUE.equals(tableUsePartLevelAuth.get(tbl.getTableName()))) {
String alias_id = topOpMap.getKey();
PrunedPartitionList partsList = PartitionPruner.prune(tableScanOp,
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java Sun Aug 10 01:33:50 2014
@@ -705,13 +705,9 @@ public abstract class CommonJoinOperator
}
if (allOne) {
- LOG.info("calling genAllOneUniqueJoinObject");
genAllOneUniqueJoinObject();
- LOG.info("called genAllOneUniqueJoinObject");
} else {
- LOG.trace("calling genUniqueJoinObject");
genUniqueJoinObject(0, 0);
- LOG.trace("called genUniqueJoinObject");
}
} else {
// does any result need to be emitted
@@ -753,17 +749,11 @@ public abstract class CommonJoinOperator
}
if (!hasEmpty && !mayHasMoreThanOne) {
- LOG.trace("calling genAllOneUniqueJoinObject");
genAllOneUniqueJoinObject();
- LOG.trace("called genAllOneUniqueJoinObject");
} else if (!hasEmpty && !hasLeftSemiJoin) {
- LOG.trace("calling genUniqueJoinObject");
genUniqueJoinObject(0, 0);
- LOG.trace("called genUniqueJoinObject");
} else {
- LOG.trace("calling genObject");
genJoinObject();
- LOG.trace("called genObject");
}
}
Arrays.fill(aliasFilterTags, (byte)0xff);
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Sun Aug 10 01:33:50 2014
@@ -3924,12 +3924,16 @@ public class DDLTask extends Task<DDLWor
tbl.setInputFormatClass(crtTbl.getInputFormat());
tbl.setOutputFormatClass(crtTbl.getOutputFormat());
- tbl.getTTable().getSd().setInputFormat(
- tbl.getInputFormatClass().getName());
- tbl.getTTable().getSd().setOutputFormat(
- tbl.getOutputFormatClass().getName());
+ // only persist input/ouput format to metadata when it is explicitly specified.
+ // Otherwise, load lazily via StorageHandler at query time.
+ if (crtTbl.getInputFormat() != null && !crtTbl.getInputFormat().isEmpty()) {
+ tbl.getTTable().getSd().setInputFormat(tbl.getInputFormatClass().getName());
+ }
+ if (crtTbl.getOutputFormat() != null && !crtTbl.getOutputFormat().isEmpty()) {
+ tbl.getTTable().getSd().setOutputFormat(tbl.getOutputFormatClass().getName());
+ }
- if (!Utilities.isDefaultNameNode(conf)) {
+ if (!Utilities.isDefaultNameNode(conf) && tbl.getTTable().getSd().isSetLocation()) {
// If location is specified - ensure that it is a full qualified name
makeLocationQualified(tbl.getDbName(), tbl.getTTable().getSd(), tbl.getTableName());
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java Sun Aug 10 01:33:50 2014
@@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Map;
+import org.apache.hadoop.hive.ql.exec.vector.VectorExtractOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorFileSinkOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorFilterOperator;
import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
@@ -64,23 +65,9 @@ import org.apache.hadoop.hive.ql.plan.Un
*
*/
public final class OperatorFactory {
+ private static final List<OpTuple> opvec;
+ private static final List<OpTuple> vectorOpvec;
- /**
- * OpTuple.
- *
- * @param <T>
- */
- public static final class OpTuple<T extends OperatorDesc> {
- public Class<T> descClass;
- public Class<? extends Operator<T>> opClass;
-
- public OpTuple(Class<T> descClass, Class<? extends Operator<T>> opClass) {
- this.descClass = descClass;
- this.opClass = opClass;
- }
- }
-
- public static ArrayList<OpTuple> opvec;
static {
opvec = new ArrayList<OpTuple>();
opvec.add(new OpTuple<FilterDesc>(FilterDesc.class, FilterOperator.class));
@@ -116,7 +103,6 @@ public final class OperatorFactory {
MuxOperator.class));
}
- public static ArrayList<OpTuple> vectorOpvec;
static {
vectorOpvec = new ArrayList<OpTuple>();
vectorOpvec.add(new OpTuple<SelectDesc>(SelectDesc.class, VectorSelectOperator.class));
@@ -128,8 +114,20 @@ public final class OperatorFactory {
vectorOpvec.add(new OpTuple<FileSinkDesc>(FileSinkDesc.class, VectorFileSinkOperator.class));
vectorOpvec.add(new OpTuple<FilterDesc>(FilterDesc.class, VectorFilterOperator.class));
vectorOpvec.add(new OpTuple<LimitDesc>(LimitDesc.class, VectorLimitOperator.class));
+ vectorOpvec.add(new OpTuple<ExtractDesc>(ExtractDesc.class, VectorExtractOperator.class));
}
+ private static final class OpTuple<T extends OperatorDesc> {
+ private final Class<T> descClass;
+ private final Class<? extends Operator<T>> opClass;
+
+ public OpTuple(Class<T> descClass, Class<? extends Operator<T>> opClass) {
+ this.descClass = descClass;
+ this.opClass = opClass;
+ }
+ }
+
+
public static <T extends OperatorDesc> Operator<T> getVectorOperator(T conf,
VectorizationContext vContext) throws HiveException {
Class<T> descClass = (Class<T>) conf.getClass();
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/ReduceSinkOperator.java Sun Aug 10 01:33:50 2014
@@ -25,6 +25,8 @@ import java.util.Arrays;
import java.util.List;
import java.util.Random;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.io.HiveKey;
@@ -61,9 +63,20 @@ public class ReduceSinkOperator extends
PTFUtils.makeTransient(ReduceSinkOperator.class, "inputAliases", "valueIndex");
}
+ private static final Log LOG = LogFactory.getLog(ReduceSinkOperator.class.getName());
private static final long serialVersionUID = 1L;
- protected transient OutputCollector out;
+ private static final MurmurHash hash = (MurmurHash) MurmurHash.getInstance();
+
+ private transient ObjectInspector[] partitionObjectInspectors;
+ private transient ObjectInspector[] bucketObjectInspectors;
+ private transient int buckColIdxInKey;
+ private boolean firstRow;
+ private transient int tag;
+ private boolean skipTag = false;
+ private transient InspectableObject tempInspectableObject = new InspectableObject();
+ private transient int[] valueIndex; // index for value(+ from keys, - from values)
+ protected transient OutputCollector out;
/**
* The evaluators for the key columns. Key columns decide the sort order on
* the reducer side. Key columns are passed to the reducer in the "key".
@@ -84,38 +97,40 @@ public class ReduceSinkOperator extends
* Evaluators for bucketing columns. This is used to compute bucket number.
*/
protected transient ExprNodeEvaluator[] bucketEval = null;
-
- // TODO: we use MetadataTypedColumnsetSerDe for now, till DynamicSerDe is
- // ready
+ // TODO: we use MetadataTypedColumnsetSerDe for now, till DynamicSerDe is ready
protected transient Serializer keySerializer;
protected transient boolean keyIsText;
protected transient Serializer valueSerializer;
- transient int tag;
protected transient byte[] tagByte = new byte[1];
- transient protected int numDistributionKeys;
- transient protected int numDistinctExprs;
- transient String[] inputAliases; // input aliases of this RS for join (used for PPD)
- private boolean skipTag = false;
+ protected transient int numDistributionKeys;
+ protected transient int numDistinctExprs;
+ protected transient String[] inputAliases; // input aliases of this RS for join (used for PPD)
protected transient boolean autoParallel = false;
-
- protected static final MurmurHash hash = (MurmurHash)MurmurHash.getInstance();
-
- private transient int[] valueIndex; // index for value(+ from keys, - from values)
-
- public void setInputAliases(String[] inputAliases) {
- this.inputAliases = inputAliases;
- }
-
- public String[] getInputAliases() {
- return inputAliases;
- }
-
- public void setOutputCollector(OutputCollector _out) {
- this.out = _out;
- }
-
// picks topN K:V pairs from input.
protected transient TopNHash reducerHash = new TopNHash();
+ protected transient HiveKey keyWritable = new HiveKey();
+ protected transient ObjectInspector keyObjectInspector;
+ protected transient ObjectInspector valueObjectInspector;
+ protected transient Object[] cachedValues;
+ protected transient List<List<Integer>> distinctColIndices;
+ protected transient Random random;
+ /**
+ * This two dimensional array holds key data and a corresponding Union object
+ * which contains the tag identifying the aggregate expression for distinct columns.
+ *
+ * If there is no distict expression, cachedKeys is simply like this.
+ * cachedKeys[0] = [col0][col1]
+ *
+ * with two distict expression, union(tag:key) is attatched for each distinct expression
+ * cachedKeys[0] = [col0][col1][0:dist1]
+ * cachedKeys[1] = [col0][col1][1:dist2]
+ *
+ * in this case, child GBY evaluates distict values with expression like KEY.col2:0.dist1
+ * see {@link ExprNodeColumnEvaluator}
+ */
+ // TODO: we only ever use one row of these at a time. Why do we need to cache multiple?
+ protected transient Object[][] cachedKeys;
+
@Override
protected void initializeOp(Configuration hconf) throws HiveException {
try {
@@ -184,40 +199,12 @@ public class ReduceSinkOperator extends
firstRow = true;
initializeChildren(hconf);
} catch (Exception e) {
- e.printStackTrace();
+ String msg = "Error initializing ReduceSinkOperator: " + e.getMessage();
+ LOG.error(msg, e);
throw new RuntimeException(e);
}
}
- transient InspectableObject tempInspectableObject = new InspectableObject();
- protected transient HiveKey keyWritable = new HiveKey();
-
- protected transient ObjectInspector keyObjectInspector;
- protected transient ObjectInspector valueObjectInspector;
- transient ObjectInspector[] partitionObjectInspectors;
- transient ObjectInspector[] bucketObjectInspectors = null;
- transient int buckColIdxInKey;
-
- protected transient Object[] cachedValues;
- protected transient List<List<Integer>> distinctColIndices;
- /**
- * This two dimensional array holds key data and a corresponding Union object
- * which contains the tag identifying the aggregate expression for distinct columns.
- *
- * If there is no distict expression, cachedKeys is simply like this.
- * cachedKeys[0] = [col0][col1]
- *
- * with two distict expression, union(tag:key) is attatched for each distinct expression
- * cachedKeys[0] = [col0][col1][0:dist1]
- * cachedKeys[1] = [col0][col1][1:dist2]
- *
- * in this case, child GBY evaluates distict values with expression like KEY.col2:0.dist1
- * see {@link ExprNodeColumnEvaluator}
- */
- // TODO: we only ever use one row of these at a time. Why do we need to cache multiple?
- protected transient Object[][] cachedKeys;
- boolean firstRow;
- protected transient Random random;
/**
* Initializes array of ExprNodeEvaluator. Adds Union field for distinct
@@ -509,4 +496,16 @@ public class ReduceSinkOperator extends
public int[] getValueIndex() {
return valueIndex;
}
+
+ public void setInputAliases(String[] inputAliases) {
+ this.inputAliases = inputAliases;
+ }
+
+ public String[] getInputAliases() {
+ return inputAliases;
+ }
+
+ public void setOutputCollector(OutputCollector _out) {
+ this.out = _out;
+ }
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapper.java Sun Aug 10 01:33:50 2014
@@ -250,7 +250,7 @@ public class ExecMapper extends MapReduc
+ used_memory);
}
- reportStats rps = new reportStats(rp);
+ ReportStats rps = new ReportStats(rp);
mo.preorderMap(rps);
return;
} catch (Exception e) {
@@ -285,10 +285,10 @@ public class ExecMapper extends MapReduc
* reportStats.
*
*/
- public static class reportStats implements Operator.OperatorFunc {
- Reporter rp;
+ public static class ReportStats implements Operator.OperatorFunc {
+ private final Reporter rp;
- public reportStats(Reporter rp) {
+ public ReportStats(Reporter rp) {
this.rp = rp;
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecReducer.java Sun Aug 10 01:33:50 2014
@@ -34,7 +34,7 @@ import org.apache.hadoop.hive.ql.exec.Ob
import org.apache.hadoop.hive.ql.exec.ObjectCacheFactory;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.reportStats;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ReduceWork;
import org.apache.hadoop.hive.ql.plan.TableDesc;
@@ -306,7 +306,7 @@ public class ExecReducer extends MapRedu
}
reducer.close(abort);
- reportStats rps = new reportStats(rp);
+ ReportStats rps = new ReportStats(rp);
reducer.preorderMap(rps);
} catch (Exception e) {
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/MapRecordProcessor.java Sun Aug 10 01:33:50 2014
@@ -33,7 +33,7 @@ import org.apache.hadoop.hive.ql.exec.Ob
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.reportStats;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector;
import org.apache.hadoop.hive.ql.exec.vector.VectorMapOperator;
@@ -225,7 +225,7 @@ public class MapRecordProcessor extends
if (isLogInfoEnabled) {
logCloseInfo();
}
- reportStats rps = new reportStats(reporter);
+ ReportStats rps = new ReportStats(reporter);
mapOp.preorderMap(rps);
return;
} catch (Exception e) {
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java Sun Aug 10 01:33:50 2014
@@ -35,7 +35,7 @@ import org.apache.hadoop.hive.ql.exec.Ob
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.OperatorUtils;
import org.apache.hadoop.hive.ql.exec.Utilities;
-import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.reportStats;
+import org.apache.hadoop.hive.ql.exec.mr.ExecMapper.ReportStats;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
import org.apache.hadoop.hive.ql.exec.tez.TezProcessor.TezKVOutputCollector;
import org.apache.hadoop.hive.ql.exec.tez.tools.InputMerger;
@@ -136,7 +136,7 @@ public class ReduceRecordProcessor exte
reducer.setParentOperators(null); // clear out any parents as reducer is the
// root
isTagged = redWork.getNeedsTagging();
- vectorized = redWork.getVectorModeOn() != null;
+ vectorized = redWork.getVectorMode();
try {
keyTableDesc = redWork.getKeyDesc();
@@ -519,7 +519,7 @@ public class ReduceRecordProcessor exte
dummyOp.close(abort);
}
}
- reportStats rps = new reportStats(reporter);
+ ReportStats rps = new ReportStats(reporter);
reducer.preorderMap(rps);
} catch (Exception e) {
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedBatchUtil.java Sun Aug 10 01:33:50 2014
@@ -37,6 +37,7 @@ import org.apache.hadoop.hive.serde2.obj
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo;
import org.apache.hadoop.io.BooleanWritable;
+import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
@@ -123,6 +124,7 @@ public class VectorizedBatchUtil {
case DOUBLE:
cvList.add(new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE));
break;
+ case BINARY:
case STRING:
cvList.add(new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE));
break;
@@ -237,7 +239,7 @@ public class VectorizedBatchUtil {
// float/double. String types have no default value for null.
switch (poi.getPrimitiveCategory()) {
case BOOLEAN: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+ LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
if (writableCol != null) {
lcv.vector[rowIndex] = ((BooleanWritable) writableCol).get() ? 1 : 0;
lcv.isNull[rowIndex] = false;
@@ -248,7 +250,7 @@ public class VectorizedBatchUtil {
}
break;
case BYTE: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+ LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
if (writableCol != null) {
lcv.vector[rowIndex] = ((ByteWritable) writableCol).get();
lcv.isNull[rowIndex] = false;
@@ -259,7 +261,7 @@ public class VectorizedBatchUtil {
}
break;
case SHORT: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+ LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
if (writableCol != null) {
lcv.vector[rowIndex] = ((ShortWritable) writableCol).get();
lcv.isNull[rowIndex] = false;
@@ -270,7 +272,7 @@ public class VectorizedBatchUtil {
}
break;
case INT: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+ LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
if (writableCol != null) {
lcv.vector[rowIndex] = ((IntWritable) writableCol).get();
lcv.isNull[rowIndex] = false;
@@ -281,7 +283,7 @@ public class VectorizedBatchUtil {
}
break;
case LONG: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+ LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
if (writableCol != null) {
lcv.vector[rowIndex] = ((LongWritable) writableCol).get();
lcv.isNull[rowIndex] = false;
@@ -292,7 +294,7 @@ public class VectorizedBatchUtil {
}
break;
case DATE: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+ LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
if (writableCol != null) {
lcv.vector[rowIndex] = ((DateWritable) writableCol).getDays();
lcv.isNull[rowIndex] = false;
@@ -303,7 +305,7 @@ public class VectorizedBatchUtil {
}
break;
case FLOAT: {
- DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off+i];
+ DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off + i];
if (writableCol != null) {
dcv.vector[rowIndex] = ((FloatWritable) writableCol).get();
dcv.isNull[rowIndex] = false;
@@ -314,7 +316,7 @@ public class VectorizedBatchUtil {
}
break;
case DOUBLE: {
- DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off+i];
+ DoubleColumnVector dcv = (DoubleColumnVector) batch.cols[off + i];
if (writableCol != null) {
dcv.vector[rowIndex] = ((DoubleWritable) writableCol).get();
dcv.isNull[rowIndex] = false;
@@ -325,7 +327,7 @@ public class VectorizedBatchUtil {
}
break;
case TIMESTAMP: {
- LongColumnVector lcv = (LongColumnVector) batch.cols[off+i];
+ LongColumnVector lcv = (LongColumnVector) batch.cols[off + i];
if (writableCol != null) {
Timestamp t = ((TimestampWritable) writableCol).getTimestamp();
lcv.vector[rowIndex] = TimestampUtils.getTimeNanoSec(t);
@@ -336,8 +338,27 @@ public class VectorizedBatchUtil {
}
}
break;
+ case BINARY: {
+ BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
+ if (writableCol != null) {
+ bcv.isNull[rowIndex] = false;
+ BytesWritable bw = (BytesWritable) writableCol;
+ byte[] bytes = bw.getBytes();
+ int start = buffer.getLength();
+ int length = bytes.length;
+ try {
+ buffer.write(bytes, 0, length);
+ } catch (IOException ioe) {
+ throw new IllegalStateException("bad write", ioe);
+ }
+ bcv.setRef(rowIndex, buffer.getData(), start, length);
+ } else {
+ setNullColIsNullValue(bcv, rowIndex);
+ }
+ }
+ break;
case STRING: {
- BytesColumnVector bcv = (BytesColumnVector) batch.cols[off+i];
+ BytesColumnVector bcv = (BytesColumnVector) batch.cols[off + i];
if (writableCol != null) {
bcv.isNull[rowIndex] = false;
Text colText = (Text) writableCol;
@@ -355,7 +376,7 @@ public class VectorizedBatchUtil {
}
break;
case DECIMAL:
- DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[off+i];
+ DecimalColumnVector dcv = (DecimalColumnVector) batch.cols[off + i];
if (writableCol != null) {
dcv.isNull[rowIndex] = false;
HiveDecimalWritable wobj = (HiveDecimalWritable) writableCol;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedColumnarSerDe.java Sun Aug 10 01:33:50 2014
@@ -152,13 +152,20 @@ public class VectorizedColumnarSerDe ext
ByteBuffer b = Text.encode(String.valueOf(dcv.vector[rowIndex]));
serializeVectorStream.write(b.array(), 0, b.limit());
break;
- case STRING:
+ case BINARY: {
+ BytesColumnVector bcv = (BytesColumnVector) batch.cols[k];
+ byte[] bytes = bcv.vector[rowIndex];
+ serializeVectorStream.write(bytes, 0, bytes.length);
+ }
+ break;
+ case STRING: {
BytesColumnVector bcv = (BytesColumnVector) batch.cols[k];
LazyUtils.writeEscaped(serializeVectorStream, bcv.vector[rowIndex],
bcv.start[rowIndex],
bcv.length[rowIndex],
serdeParams.isEscaped(), serdeParams.getEscapeChar(), serdeParams
.getNeedsEscape());
+ }
break;
case TIMESTAMP:
LongColumnVector tcv = (LongColumnVector) batch.cols[k];
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java Sun Aug 10 01:33:50 2014
@@ -278,7 +278,7 @@ public class VectorizedRowBatchCtx {
case PRIMITIVE: {
PrimitiveObjectInspector poi = (PrimitiveObjectInspector) foi;
// Vectorization currently only supports the following data types:
- // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, STRING, TIMESTAMP,
+ // BOOLEAN, BYTE, SHORT, INT, LONG, FLOAT, DOUBLE, BINARY, STRING, TIMESTAMP,
// DATE and DECIMAL
switch (poi.getPrimitiveCategory()) {
case BOOLEAN:
@@ -294,6 +294,7 @@ public class VectorizedRowBatchCtx {
case DOUBLE:
result.cols[j] = new DoubleColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
break;
+ case BINARY:
case STRING:
result.cols[j] = new BytesColumnVector(VectorizedRowBatch.DEFAULT_SIZE);
break;
@@ -404,7 +405,7 @@ public class VectorizedRowBatchCtx {
lcv.isNull[0] = true;
lcv.isRepeating = true;
} else {
- lcv.fill((Boolean)value == true ? 1 : 0);
+ lcv.fill((Boolean) value == true ? 1 : 0);
lcv.isNull[0] = false;
}
}
@@ -417,7 +418,7 @@ public class VectorizedRowBatchCtx {
lcv.isNull[0] = true;
lcv.isRepeating = true;
} else {
- lcv.fill((Byte)value);
+ lcv.fill((Byte) value);
lcv.isNull[0] = false;
}
}
@@ -430,7 +431,7 @@ public class VectorizedRowBatchCtx {
lcv.isNull[0] = true;
lcv.isRepeating = true;
} else {
- lcv.fill((Short)value);
+ lcv.fill((Short) value);
lcv.isNull[0] = false;
}
}
@@ -443,7 +444,7 @@ public class VectorizedRowBatchCtx {
lcv.isNull[0] = true;
lcv.isRepeating = true;
} else {
- lcv.fill((Integer)value);
+ lcv.fill((Integer) value);
lcv.isNull[0] = false;
}
}
@@ -456,7 +457,7 @@ public class VectorizedRowBatchCtx {
lcv.isNull[0] = true;
lcv.isRepeating = true;
} else {
- lcv.fill((Long)value);
+ lcv.fill((Long) value);
lcv.isNull[0] = false;
}
}
@@ -469,7 +470,7 @@ public class VectorizedRowBatchCtx {
lcv.isNull[0] = true;
lcv.isRepeating = true;
} else {
- lcv.fill(((Date)value).getTime());
+ lcv.fill(((Date) value).getTime());
lcv.isNull[0] = false;
}
}
@@ -521,17 +522,31 @@ public class VectorizedRowBatchCtx {
dv.isNull[0] = true;
dv.isRepeating = true;
} else {
- HiveDecimal hd = (HiveDecimal)(value);
- dv.vector[0] = new Decimal128(hd.toString(), (short)hd.scale());
+ HiveDecimal hd = (HiveDecimal) value;
+ dv.vector[0] = new Decimal128(hd.toString(), (short) hd.scale());
dv.isRepeating = true;
dv.isNull[0] = false;
}
}
break;
-
+
+ case BINARY: {
+ BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex];
+ byte[] bytes = (byte[]) value;
+ if (bytes == null) {
+ bcv.noNulls = false;
+ bcv.isNull[0] = true;
+ bcv.isRepeating = true;
+ } else {
+ bcv.fill(bytes);
+ bcv.isNull[0] = false;
+ }
+ }
+ break;
+
case STRING: {
BytesColumnVector bcv = (BytesColumnVector) batch.cols[colIndex];
- String sVal = (String)value;
+ String sVal = (String) value;
if (sVal == null) {
bcv.noNulls = false;
bcv.isNull[0] = true;
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorUDFDateString.java Sun Aug 10 01:33:50 2014
@@ -23,11 +23,15 @@ import org.apache.commons.logging.LogFac
import org.apache.hadoop.hive.ql.exec.vector.VectorGroupByOperator;
import org.apache.hadoop.io.Text;
-import java.sql.Date;
+import java.text.SimpleDateFormat;
+import java.util.Date;
+import java.text.ParseException;
public class VectorUDFDateString extends StringUnaryUDF {
private static final long serialVersionUID = 1L;
+ private transient static SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd");
+
private static final Log LOG = LogFactory.getLog(
VectorUDFDateString.class.getName());
@@ -41,13 +45,10 @@ public class VectorUDFDateString extends
return null;
}
try {
- Date date = Date.valueOf(s.toString());
- t.set(date.toString());
+ Date date = formatter.parse(s.toString());
+ t.set(formatter.format(date));
return t;
- } catch (IllegalArgumentException e) {
- if (LOG.isDebugEnabled()) {
- LOG.info("VectorUDFDateString passed bad string for Date.valueOf '" + s.toString() + "'");
- }
+ } catch (ParseException e) {
return null;
}
}
Modified: hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java
URL: http://svn.apache.org/viewvc/hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java?rev=1617040&r1=1617039&r2=1617040&view=diff
==============================================================================
--- hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java (original)
+++ hive/branches/spark/ql/src/java/org/apache/hadoop/hive/ql/io/orc/RecordReaderImpl.java Sun Aug 10 01:33:50 2014
@@ -910,8 +910,11 @@ class RecordReaderImpl implements Record
private InStream stream;
private IntegerReader lengths = null;
+ private final LongColumnVector scratchlcv;
+
BinaryTreeReader(Path path, int columnId, Configuration conf) {
super(path, columnId, conf);
+ scratchlcv = new LongColumnVector();
}
@Override
@@ -969,8 +972,18 @@ class RecordReaderImpl implements Record
@Override
Object nextVector(Object previousVector, long batchSize) throws IOException {
- throw new UnsupportedOperationException(
- "NextBatch is not supported operation for Binary type");
+ BytesColumnVector result = null;
+ if (previousVector == null) {
+ result = new BytesColumnVector();
+ } else {
+ result = (BytesColumnVector) previousVector;
+ }
+
+ // Read present/isNull stream
+ super.nextVector(result, batchSize);
+
+ BytesColumnVectorUtil.setRefToOrcByteArrays(stream, lengths, scratchlcv, result, batchSize);
+ return result;
}
@Override
@@ -1361,6 +1374,66 @@ class RecordReaderImpl implements Record
}
}
+ private static class BytesColumnVectorUtil {
+ // This method has the common code for reading in bytes into a BytesColumnVector.
+ // It is used by the BINARY, STRING, CHAR, VARCHAR types.
+ public static void setRefToOrcByteArrays(InStream stream, IntegerReader lengths, LongColumnVector scratchlcv,
+ BytesColumnVector result, long batchSize) throws IOException {
+
+ // Read lengths
+ scratchlcv.isNull = result.isNull; // Notice we are replacing the isNull vector here...
+ lengths.nextVector(scratchlcv, batchSize);
+ int totalLength = 0;
+ if (!scratchlcv.isRepeating) {
+ for (int i = 0; i < batchSize; i++) {
+ if (!scratchlcv.isNull[i]) {
+ totalLength += (int) scratchlcv.vector[i];
+ }
+ }
+ } else {
+ if (!scratchlcv.isNull[0]) {
+ totalLength = (int) (batchSize * scratchlcv.vector[0]);
+ }
+ }
+
+ // Read all the strings for this batch
+ byte[] allBytes = new byte[totalLength];
+ int offset = 0;
+ int len = totalLength;
+ while (len > 0) {
+ int bytesRead = stream.read(allBytes, offset, len);
+ if (bytesRead < 0) {
+ throw new EOFException("Can't finish byte read from " + stream);
+ }
+ len -= bytesRead;
+ offset += bytesRead;
+ }
+
+ // Too expensive to figure out 'repeating' by comparisons.
+ result.isRepeating = false;
+ offset = 0;
+ if (!scratchlcv.isRepeating) {
+ for (int i = 0; i < batchSize; i++) {
+ if (!scratchlcv.isNull[i]) {
+ result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]);
+ offset += scratchlcv.vector[i];
+ } else {
+ result.setRef(i, allBytes, 0, 0);
+ }
+ }
+ } else {
+ for (int i = 0; i < batchSize; i++) {
+ if (!scratchlcv.isNull[i]) {
+ result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]);
+ offset += scratchlcv.vector[0];
+ } else {
+ result.setRef(i, allBytes, 0, 0);
+ }
+ }
+ }
+ }
+ }
+
/**
* A reader for string columns that are direct encoded in the current
* stripe.
@@ -1443,57 +1516,7 @@ class RecordReaderImpl implements Record
// Read present/isNull stream
super.nextVector(result, batchSize);
- // Read lengths
- scratchlcv.isNull = result.isNull;
- lengths.nextVector(scratchlcv, batchSize);
- int totalLength = 0;
- if (!scratchlcv.isRepeating) {
- for (int i = 0; i < batchSize; i++) {
- if (!scratchlcv.isNull[i]) {
- totalLength += (int) scratchlcv.vector[i];
- }
- }
- } else {
- if (!scratchlcv.isNull[0]) {
- totalLength = (int) (batchSize * scratchlcv.vector[0]);
- }
- }
-
- //Read all the strings for this batch
- byte[] allBytes = new byte[totalLength];
- int offset = 0;
- int len = totalLength;
- while (len > 0) {
- int bytesRead = stream.read(allBytes, offset, len);
- if (bytesRead < 0) {
- throw new EOFException("Can't finish byte read from " + stream);
- }
- len -= bytesRead;
- offset += bytesRead;
- }
-
- // Too expensive to figure out 'repeating' by comparisons.
- result.isRepeating = false;
- offset = 0;
- if (!scratchlcv.isRepeating) {
- for (int i = 0; i < batchSize; i++) {
- if (!scratchlcv.isNull[i]) {
- result.setRef(i, allBytes, offset, (int) scratchlcv.vector[i]);
- offset += scratchlcv.vector[i];
- } else {
- result.setRef(i, allBytes, 0, 0);
- }
- }
- } else {
- for (int i = 0; i < batchSize; i++) {
- if (!scratchlcv.isNull[i]) {
- result.setRef(i, allBytes, offset, (int) scratchlcv.vector[0]);
- offset += scratchlcv.vector[0];
- } else {
- result.setRef(i, allBytes, 0, 0);
- }
- }
- }
+ BytesColumnVectorUtil.setRefToOrcByteArrays(stream, lengths, scratchlcv, result, batchSize);
return result;
}