You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/08/10 23:10:29 UTC
hive git commit: HIVE-17285 : Fixes for bit vector retrievals and
merging (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Repository: hive
Updated Branches:
refs/heads/master 637123a77 -> 9e5dca9f6
HIVE-17285 : Fixes for bit vector retrievals and merging (Ashutosh Chauhan via Jesus Camacho Rodriguez)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9e5dca9f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9e5dca9f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9e5dca9f
Branch: refs/heads/master
Commit: 9e5dca9f6e98fc291f9e54bc767deeebd6221fe9
Parents: 637123a
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Wed Aug 9 18:42:45 2017 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Thu Aug 10 16:09:28 2017 -0700
----------------------------------------------------------------------
.../hive/metastore/MetaStoreDirectSql.java | 35 ++++++---
.../hadoop/hive/metastore/MetaStoreUtils.java | 62 +++++++++++----
.../hive/metastore/cache/CachedStore.java | 81 ++++++++------------
.../aggr/DateColumnStatsAggregator.java | 15 ++--
.../aggr/DecimalColumnStatsAggregator.java | 11 ++-
.../aggr/DoubleColumnStatsAggregator.java | 15 ++--
.../aggr/LongColumnStatsAggregator.java | 13 ++--
.../aggr/StringColumnStatsAggregator.java | 14 ++--
8 files changed, 147 insertions(+), 99 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index 73754ff..b3274ca 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -127,7 +127,7 @@ class MetaStoreDirectSql {
@java.lang.annotation.Target(java.lang.annotation.ElementType.FIELD)
@java.lang.annotation.Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
private @interface TableName {}
-
+
// Table names with schema name, if necessary
@TableName
private String DBS, TBLS, PARTITIONS, DATABASE_PARAMS, PARTITION_PARAMS, SORT_COLS, SD_PARAMS,
@@ -151,7 +151,7 @@ class MetaStoreDirectSql {
batchSize = DatabaseProduct.needsInBatching(dbType) ? 1000 : NO_BATCHING;
}
this.batchSize = batchSize;
-
+
for (java.lang.reflect.Field f : this.getClass().getDeclaredFields()) {
if (f.getAnnotation(TableName.class) == null) continue;
try {
@@ -281,7 +281,7 @@ class MetaStoreDirectSql {
public String getSchema() {
return schema;
}
-
+
public boolean isCompatibleDatastore() {
return isCompatibleDatastore;
}
@@ -393,6 +393,7 @@ class MetaStoreDirectSql {
return Collections.emptyList();
}
return runBatched(partNames, new Batchable<String, Partition>() {
+ @Override
public List<Partition> run(List<String> input) throws MetaException {
String filter = "" + PARTITIONS + ".\"PART_NAME\" in (" + makeParams(input.size()) + ")";
return getPartitionsViaSqlFilterInternal(dbName, tblName, null, filter, input,
@@ -415,8 +416,8 @@ class MetaStoreDirectSql {
}
public static class SqlFilterForPushdown {
- private List<Object> params = new ArrayList<Object>();
- private List<String> joins = new ArrayList<String>();
+ private final List<Object> params = new ArrayList<Object>();
+ private final List<String> joins = new ArrayList<String>();
private String filter;
private Table table;
}
@@ -526,6 +527,7 @@ class MetaStoreDirectSql {
// Get full objects. For Oracle/etc. do it in batches.
List<Partition> result = runBatched(sqlResult, new Batchable<Object, Partition>() {
+ @Override
public List<Partition> run(List<Object> input) throws MetaException {
return getPartitionsFromPartitionIds(dbNameLcase, tblNameLcase, isView, input);
}
@@ -949,6 +951,7 @@ class MetaStoreDirectSql {
if (value == null)
return null;
if (value instanceof Blob) {
+ //derby, oracle
try {
// getBytes function says: pos the ordinal position of the first byte in
// the BLOB value to be extracted; the first byte is at position 1
@@ -956,7 +959,12 @@ class MetaStoreDirectSql {
} catch (SQLException e) {
throw new MetaException("Encounter error while processing blob.");
}
- } else {
+ }
+ else if (value instanceof byte[]) {
+ // mysql, postgres, sql server
+ return new String((byte[])value);
+ }
+ else {
// this may happen when enablebitvector is false
LOG.debug("Expected blob type but got " + value.getClass().getName());
return null;
@@ -1251,6 +1259,7 @@ class MetaStoreDirectSql {
final String queryText0 = "select " + getStatsList(enableBitVector) + " from " + TAB_COL_STATS + " "
+ " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in (";
Batchable<String, Object[]> b = new Batchable<String, Object[]>() {
+ @Override
public List<Object[]> run(List<String> input) throws MetaException {
String queryText = queryText0 + makeParams(input.size()) + ")";
Object[] params = new Object[input.size() + 2];
@@ -1356,8 +1365,10 @@ class MetaStoreDirectSql {
+ " and \"COLUMN_NAME\" in (%1$s) and \"PARTITION_NAME\" in (%2$s)"
+ " group by \"PARTITION_NAME\"";
List<Long> allCounts = runBatched(colNames, new Batchable<String, Long>() {
+ @Override
public List<Long> run(final List<String> inputColName) throws MetaException {
return runBatched(partNames, new Batchable<String, Long>() {
+ @Override
public List<Long> run(List<String> inputPartNames) throws MetaException {
long partsFound = 0;
String queryText = String.format(queryText0,
@@ -1396,8 +1407,10 @@ class MetaStoreDirectSql {
final boolean useDensityFunctionForNDVEstimation, final double ndvTuner, final boolean enableBitVector) throws MetaException {
final boolean areAllPartsFound = (partsFound == partNames.size());
return runBatched(colNames, new Batchable<String, ColumnStatisticsObj>() {
+ @Override
public List<ColumnStatisticsObj> run(final List<String> inputColNames) throws MetaException {
return runBatched(partNames, new Batchable<String, ColumnStatisticsObj>() {
+ @Override
public List<ColumnStatisticsObj> run(List<String> inputPartNames) throws MetaException {
return columnStatisticsObjForPartitionsBatch(dbName, tableName, inputPartNames,
inputColNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector);
@@ -1466,7 +1479,7 @@ class MetaStoreDirectSql {
String tableName, List<String> partNames, List<String> colNames, boolean areAllPartsFound,
boolean useDensityFunctionForNDVEstimation, double ndvTuner, boolean enableBitVector) throws MetaException {
if(enableBitVector) {
- return aggrStatsUseJava(dbName, tableName, partNames, colNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
+ return aggrStatsUseJava(dbName, tableName, partNames, colNames, useDensityFunctionForNDVEstimation, ndvTuner);
}
else {
return aggrStatsUseDB(dbName, tableName, partNames, colNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
@@ -1474,14 +1487,14 @@ class MetaStoreDirectSql {
}
private List<ColumnStatisticsObj> aggrStatsUseJava(String dbName, String tableName,
- List<String> partNames, List<String> colNames, boolean areAllPartsFound,
+ List<String> partNames, List<String> colNames,
boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
// 1. get all the stats for colNames in partNames;
List<ColumnStatistics> partStats = getPartitionStats(dbName, tableName, partNames, colNames,
true);
// 2. use util function to aggr stats
return MetaStoreUtils.aggrPartitionStats(partStats, dbName, tableName, partNames, colNames,
- areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
+ useDensityFunctionForNDVEstimation, ndvTuner);
}
private List<ColumnStatisticsObj> aggrStatsUseDB(String dbName,
@@ -1679,7 +1692,7 @@ class MetaStoreDirectSql {
row[2 + colStatIndex] = null;
} else {
Long val = extractSqlLong(o);
- row[2 + colStatIndex] = (Long) (val / sumVal * (partNames.size()));
+ row[2 + colStatIndex] = val / sumVal * (partNames.size());
}
} else if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Min
|| IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Max) {
@@ -1802,8 +1815,10 @@ class MetaStoreDirectSql {
+ " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\""
+ " in (%1$s) AND \"PARTITION_NAME\" in (%2$s) order by \"PARTITION_NAME\"";
Batchable<String, Object[]> b = new Batchable<String, Object[]>() {
+ @Override
public List<Object[]> run(final List<String> inputColNames) throws MetaException {
Batchable<String, Object[]> b2 = new Batchable<String, Object[]>() {
+ @Override
public List<Object[]> run(List<String> inputPartNames) throws MetaException {
String queryText = String.format(queryText0,
makeParams(inputColNames.size()), makeParams(inputPartNames.size()));
http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index c95749c..6a54306 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -46,11 +46,18 @@ import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.google.common.base.Predicates;
+import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.commons.collections.CollectionUtils;
import org.apache.commons.collections.ListUtils;
@@ -1906,7 +1913,7 @@ public class MetaStoreUtils {
// present in both, overwrite stats for columns absent in metastore and
// leave alone columns stats missing from stats task. This last case may
// leave stats in stale state. This will be addressed later.
- LOG.debug("New ColumnStats size is {}, but old ColumnStats size is {}",
+ LOG.debug("New ColumnStats size is {}, but old ColumnStats size is {}",
csNew.getStatsObj().size(), csOld.getStatsObjSize());
}
// In this case, we have to find out which columns can be merged.
@@ -1963,7 +1970,7 @@ public class MetaStoreUtils {
// given a list of partStats, this function will give you an aggr stats
public static List<ColumnStatisticsObj> aggrPartitionStats(List<ColumnStatistics> partStats,
String dbName, String tableName, List<String> partNames, List<String> colNames,
- boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, double ndvTuner)
+ boolean useDensityFunctionForNDVEstimation, double ndvTuner)
throws MetaException {
// 1. group by the stats by colNames
// map the colName to List<ColumnStatistics>
@@ -1980,27 +1987,54 @@ public class MetaStoreUtils {
map.get(obj.getColName()).add(singleCS);
}
}
- return aggrPartitionStats(map,dbName,tableName,partNames,colNames,areAllPartsFound,useDensityFunctionForNDVEstimation, ndvTuner);
+ return aggrPartitionStats(map,dbName,tableName,partNames,colNames,useDensityFunctionForNDVEstimation, ndvTuner);
}
public static List<ColumnStatisticsObj> aggrPartitionStats(
Map<String, List<ColumnStatistics>> map, String dbName, String tableName,
- List<String> partNames, List<String> colNames, boolean areAllPartsFound,
- boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
+ final List<String> partNames, List<String> colNames,
+ final boolean useDensityFunctionForNDVEstimation,final double ndvTuner) throws MetaException {
List<ColumnStatisticsObj> colStats = new ArrayList<>();
- // 2. aggr stats for each colName
- // TODO: thread pool can be used to speed up the process
- for (Entry<String, List<ColumnStatistics>> entry : map.entrySet()) {
- List<ColumnStatistics> css = entry.getValue();
- ColumnStatsAggregator aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(css
- .iterator().next().getStatsObj().iterator().next().getStatsData().getSetField(),
- useDensityFunctionForNDVEstimation, ndvTuner);
- ColumnStatisticsObj statsObj = aggregator.aggregate(entry.getKey(), partNames, css);
- colStats.add(statsObj);
+ // 2. Aggregate stats for each column in a separate thread
+ if (map.size()< 1) {
+ //stats are absent in RDBMS
+ LOG.debug("No stats data found for: dbName=" +dbName +" tblName=" + tableName +
+ " partNames= " + partNames + " colNames=" + colNames );
+ return colStats;
+ }
+ final ExecutorService pool = Executors.newFixedThreadPool(Math.min(map.size(), 16),
+ new ThreadFactoryBuilder().setDaemon(true).setNameFormat("aggr-col-stats-%d").build());
+ final List<Future<ColumnStatisticsObj>> futures = Lists.newLinkedList();
+
+ long start = System.currentTimeMillis();
+ for (final Entry<String, List<ColumnStatistics>> entry : map.entrySet()) {
+ futures.add(pool.submit(new Callable<ColumnStatisticsObj>() {
+ @Override
+ public ColumnStatisticsObj call() throws Exception {
+ List<ColumnStatistics> css = entry.getValue();
+ ColumnStatsAggregator aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(css
+ .iterator().next().getStatsObj().iterator().next().getStatsData().getSetField(),
+ useDensityFunctionForNDVEstimation, ndvTuner);
+ ColumnStatisticsObj statsObj = aggregator.aggregate(entry.getKey(), partNames, css);
+ return statsObj;
+ }}));
+ }
+ pool.shutdown();
+ for (Future<ColumnStatisticsObj> future : futures) {
+ try {
+ colStats.add(future.get());
+ } catch (InterruptedException | ExecutionException e) {
+ pool.shutdownNow();
+ LOG.debug(e.toString());
+ throw new MetaException(e.toString());
+ }
}
+ LOG.debug("Time for aggr col stats in seconds: {} Threads used: {}",
+ ((System.currentTimeMillis() - (double)start))/1000, Math.min(map.size(), 16));
return colStats;
}
+
/**
* Produce a hash for the storage descriptor
* @param sd storage descriptor to hash
http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
index ce98a6e..697cc2e 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
@@ -81,19 +81,15 @@ import org.apache.hadoop.hive.metastore.api.Type;
import org.apache.hadoop.hive.metastore.api.UnknownDBException;
import org.apache.hadoop.hive.metastore.api.UnknownPartitionException;
import org.apache.hadoop.hive.metastore.api.UnknownTableException;
-import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger;
-import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerFactory;
import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.util.StringUtils;
import org.apache.hive.common.util.HiveStringUtils;
import org.apache.thrift.TException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.Lists;
// TODO filter->expr
// TODO functionCache
@@ -277,6 +273,7 @@ public class CachedStore implements RawStore, Configurable {
synchronized void startCacheUpdateService() {
if (cacheUpdateMaster == null) {
cacheUpdateMaster = Executors.newScheduledThreadPool(1, new ThreadFactory() {
+ @Override
public Thread newThread(Runnable r) {
Thread t = Executors.defaultThreadFactory().newThread(r);
t.setName("CachedStore-CacheUpdateService: Thread-" + t.getId());
@@ -321,7 +318,7 @@ public class CachedStore implements RawStore, Configurable {
static class CacheUpdateMasterWork implements Runnable {
- private CachedStore cachedStore;
+ private final CachedStore cachedStore;
public CacheUpdateMasterWork(CachedStore cachedStore) {
this.cachedStore = cachedStore;
@@ -1540,63 +1537,51 @@ public class CachedStore implements RawStore, Configurable {
}
@Override
- public AggrStats get_aggr_stats_for(String dbName, String tblName, List<String> partNames,
- List<String> colNames) throws MetaException, NoSuchObjectException {
- List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(colNames.size());
- for (String colName : colNames) {
- ColumnStatisticsObj colStat =
- mergeColStatsForPartitions(HiveStringUtils.normalizeIdentifier(dbName),
- HiveStringUtils.normalizeIdentifier(tblName), partNames, colName);
- if (colStat == null) {
- // Stop and fall back to underlying RawStore
- colStats = null;
- break;
- } else {
- colStats.add(colStat);
- }
- }
- if (colStats == null) {
- return rawStore.get_aggr_stats_for(dbName, tblName, partNames, colNames);
- } else {
+ public AggrStats get_aggr_stats_for(String dbName, String tblName, List<String> partNames,
+ List<String> colNames) throws MetaException, NoSuchObjectException {
+ List<ColumnStatisticsObj> colStats = mergeColStatsForPartitions(
+ HiveStringUtils.normalizeIdentifier(dbName), HiveStringUtils.normalizeIdentifier(tblName),
+ partNames, colNames);
return new AggrStats(colStats, partNames.size());
- }
- }
- private ColumnStatisticsObj mergeColStatsForPartitions(String dbName, String tblName,
- List<String> partNames, String colName) throws MetaException {
+ }
+
+ private List<ColumnStatisticsObj> mergeColStatsForPartitions(String dbName, String tblName,
+ List<String> partNames, List<String> colNames) throws MetaException {
final boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(getConf(),
HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION);
final double ndvTuner = HiveConf.getFloatVar(getConf(),
HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER);
Map<String, List<ColumnStatistics>> map = new HashMap<>();
- List<ColumnStatistics> list = new ArrayList<>();
- boolean areAllPartsFound = true;
- for (String partName : partNames) {
- String colStatsCacheKey = CacheUtils.buildKey(dbName, tblName, partNameToVals(partName),
- colName);
- List<ColumnStatisticsObj> singleObj = new ArrayList<>();
- ColumnStatisticsObj colStatsForPart = SharedCache
- .getCachedPartitionColStats(colStatsCacheKey);
- if (colStatsForPart != null) {
- singleObj.add(colStatsForPart);
- ColumnStatisticsDesc css = new ColumnStatisticsDesc(false, dbName, tblName);
- css.setPartName(partName);
- list.add(new ColumnStatistics(css, singleObj));
- } else {
- areAllPartsFound = false;
+
+ for (String colName : colNames) {
+ List<ColumnStatistics> colStats = new ArrayList<>();
+ for (String partName : partNames) {
+ String colStatsCacheKey = CacheUtils.buildKey(dbName, tblName, partNameToVals(partName),
+ colName);
+ List<ColumnStatisticsObj> colStat = new ArrayList<>();
+ ColumnStatisticsObj colStatsForPart = SharedCache
+ .getCachedPartitionColStats(colStatsCacheKey);
+ if (colStatsForPart != null) {
+ colStat.add(colStatsForPart);
+ ColumnStatisticsDesc csDesc = new ColumnStatisticsDesc(false, dbName, tblName);
+ csDesc.setPartName(partName);
+ colStats.add(new ColumnStatistics(csDesc, colStat));
+ } else {
+ LOG.debug("Stats not found in CachedStore for: dbName={} tblName={} partName={} colName={}",
+ dbName, tblName,partName, colName);
+ }
}
+ map.put(colName, colStats);
}
- map.put(colName, list);
- List<String> colNames = new ArrayList<>();
- colNames.add(colName);
// Note that enableBitVector does not apply here because ColumnStatisticsObj
// itself will tell whether
// bitvector is null or not and aggr logic can automatically apply.
- return MetaStoreUtils
- .aggrPartitionStats(map, dbName, tblName, partNames, colNames, areAllPartsFound,
- useDensityFunctionForNDVEstimation, ndvTuner).iterator().next();
+ return MetaStoreUtils.aggrPartitionStats(map, dbName, tblName, partNames, colNames,
+ useDensityFunctionForNDVEstimation, ndvTuner);
}
+
@Override
public long cleanupEvents() {
return rawStore.cleanupEvents();
http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
index 6fae3e5..04a1eb5 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
@@ -232,12 +232,12 @@ public class DateColumnStatsAggregator extends ColumnStatsAggregator implements
extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap,
adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
}
+ LOG.debug("Ndv estimatation for {} is {} # of partitions requested: {} # of partitions found: {}", colName,
+ columnStatisticsData.getDateStats().getNumDVs(),partNames.size(), css.size());
statsObj.setStatsData(columnStatisticsData);
- LOG.debug("Ndv estimatation for " + colName + " is "
- + columnStatisticsData.getDateStats().getNumDVs());
return statsObj;
}
-
+
private long diff(Date d1, Date d2) {
return d1.getDaysSinceEpoch() - d2.getDaysSinceEpoch();
}
@@ -264,9 +264,10 @@ public class DateColumnStatsAggregator extends ColumnStatsAggregator implements
extractedAdjustedStatsMap.entrySet());
// get the lowValue
Collections.sort(list, new Comparator<Map.Entry<String, DateColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, DateColumnStatsData> o1,
Map.Entry<String, DateColumnStatsData> o2) {
- return diff(o1.getValue().getLowValue(), o2.getValue().getLowValue()) < 0 ? -1 : 1;
+ return o1.getValue().getLowValue().compareTo(o2.getValue().getLowValue());
}
});
double minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -286,9 +287,10 @@ public class DateColumnStatsAggregator extends ColumnStatsAggregator implements
// get the highValue
Collections.sort(list, new Comparator<Map.Entry<String, DateColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, DateColumnStatsData> o1,
Map.Entry<String, DateColumnStatsData> o2) {
- return diff(o1.getValue().getHighValue(), o2.getValue().getHighValue()) < 0 ? -1 : 1;
+ return o1.getValue().getHighValue().compareTo(o2.getValue().getHighValue());
}
});
minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -317,9 +319,10 @@ public class DateColumnStatsAggregator extends ColumnStatsAggregator implements
// get the ndv
long ndv = 0;
Collections.sort(list, new Comparator<Map.Entry<String, DateColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, DateColumnStatsData> o1,
Map.Entry<String, DateColumnStatsData> o2) {
- return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1;
+ return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
}
});
long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java
index c5e72eb..d220e7f 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java
@@ -40,7 +40,7 @@ import org.slf4j.LoggerFactory;
public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implements
IExtrapolatePartStatus {
-
+
private static final Logger LOG = LoggerFactory.getLogger(DecimalColumnStatsAggregator.class);
@Override
@@ -253,9 +253,9 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen
extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap,
adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
}
+ LOG.debug("Ndv estimatation for {} is {} # of partitions requested: {} # of partitions found: {}", colName,
+ columnStatisticsData.getDecimalStats().getNumDVs(),partNames.size(), css.size());
statsObj.setStatsData(columnStatisticsData);
- LOG.debug("Ndv estimatation for " + colName + " is "
- + columnStatisticsData.getDecimalStats().getNumDVs());
return statsObj;
}
@@ -273,6 +273,7 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen
extractedAdjustedStatsMap.entrySet());
// get the lowValue
Collections.sort(list, new Comparator<Map.Entry<String, DecimalColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, DecimalColumnStatsData> o1,
Map.Entry<String, DecimalColumnStatsData> o2) {
return o1.getValue().getLowValue().compareTo(o2.getValue().getLowValue());
@@ -295,6 +296,7 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen
// get the highValue
Collections.sort(list, new Comparator<Map.Entry<String, DecimalColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, DecimalColumnStatsData> o1,
Map.Entry<String, DecimalColumnStatsData> o2) {
return o1.getValue().getHighValue().compareTo(o2.getValue().getHighValue());
@@ -328,9 +330,10 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen
long ndvMin = 0;
long ndvMax = 0;
Collections.sort(list, new Comparator<Map.Entry<String, DecimalColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, DecimalColumnStatsData> o1,
Map.Entry<String, DecimalColumnStatsData> o2) {
- return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1;
+ return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
}
});
long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
index e55c412..1b44dd9 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
@@ -228,8 +228,8 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap,
adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
}
- LOG.debug("Ndv estimatation for " + colName + " is "
- + columnStatisticsData.getDoubleStats().getNumDVs());
+ LOG.debug("Ndv estimatation for {} is {}. # of partitions requested: {}. # of partitions found: {}", colName,
+ columnStatisticsData.getDoubleStats().getNumDVs(),partNames.size(), css.size());
statsObj.setStatsData(columnStatisticsData);
return statsObj;
}
@@ -248,9 +248,10 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
extractedAdjustedStatsMap.entrySet());
// get the lowValue
Collections.sort(list, new Comparator<Map.Entry<String, DoubleColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, DoubleColumnStatsData> o1,
Map.Entry<String, DoubleColumnStatsData> o2) {
- return o1.getValue().getLowValue() < o2.getValue().getLowValue() ? -1 : 1;
+ return Double.compare(o1.getValue().getLowValue(), o2.getValue().getLowValue());
}
});
double minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -270,9 +271,10 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
// get the highValue
Collections.sort(list, new Comparator<Map.Entry<String, DoubleColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, DoubleColumnStatsData> o1,
Map.Entry<String, DoubleColumnStatsData> o2) {
- return o1.getValue().getHighValue() < o2.getValue().getHighValue() ? -1 : 1;
+ return Double.compare(o1.getValue().getHighValue(), o2.getValue().getHighValue());
}
});
minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -303,9 +305,10 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
long ndvMin = 0;
long ndvMax = 0;
Collections.sort(list, new Comparator<Map.Entry<String, DoubleColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, DoubleColumnStatsData> o1,
Map.Entry<String, DoubleColumnStatsData> o2) {
- return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1;
+ return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
}
});
long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
@@ -341,5 +344,5 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
extrapolateDoubleData.setNumDVs(ndv);
extrapolateData.setDoubleStats(extrapolateDoubleData);
}
-
+
}
http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
index 2ee09f3..802ad1a 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
@@ -229,9 +229,9 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap,
adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
}
+ LOG.debug("Ndv estimatation for {} is {} # of partitions requested: {} # of partitions found: {}", colName,
+ columnStatisticsData.getLongStats().getNumDVs(),partNames.size(), css.size());
statsObj.setStatsData(columnStatisticsData);
- LOG.debug("Ndv estimatation for " + colName + " is "
- + columnStatisticsData.getLongStats().getNumDVs());
return statsObj;
}
@@ -249,9 +249,10 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
extractedAdjustedStatsMap.entrySet());
// get the lowValue
Collections.sort(list, new Comparator<Map.Entry<String, LongColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, LongColumnStatsData> o1,
Map.Entry<String, LongColumnStatsData> o2) {
- return o1.getValue().getLowValue() < o2.getValue().getLowValue() ? -1 : 1;
+ return Long.compare(o1.getValue().getLowValue(), o2.getValue().getLowValue());
}
});
double minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -271,9 +272,10 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
// get the highValue
Collections.sort(list, new Comparator<Map.Entry<String, LongColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, LongColumnStatsData> o1,
Map.Entry<String, LongColumnStatsData> o2) {
- return o1.getValue().getHighValue() < o2.getValue().getHighValue() ? -1 : 1;
+ return Long.compare(o1.getValue().getHighValue(), o2.getValue().getHighValue());
}
});
minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -302,9 +304,10 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
// get the ndv
long ndv = 0;
Collections.sort(list, new Comparator<Map.Entry<String, LongColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, LongColumnStatsData> o1,
Map.Entry<String, LongColumnStatsData> o2) {
- return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1;
+ return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
}
});
long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
index 2ea2fcc..e1a781f 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
@@ -136,7 +136,6 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
for (ColumnStatistics cs : css) {
String partName = cs.getStatsDesc().getPartName();
ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
- StringColumnStatsData newData = cso.getStatsData().getStringStats();
adjustedIndexMap.put(partName, (double) indexMap.get(partName));
adjustedStatsMap.put(partName, cso.getStatsData());
}
@@ -201,8 +200,8 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap,
adjustedStatsMap, -1);
}
- LOG.debug("Ndv estimatation for " + colName + " is "
- + columnStatisticsData.getStringStats().getNumDVs());
+ LOG.debug("Ndv estimatation for {} is {} # of partitions requested: {} # of partitions found: {}", colName,
+ columnStatisticsData.getStringStats().getNumDVs(),partNames.size(), css.size());
statsObj.setStatsData(columnStatisticsData);
return statsObj;
}
@@ -221,9 +220,10 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
extractedAdjustedStatsMap.entrySet());
// get the avgLen
Collections.sort(list, new Comparator<Map.Entry<String, StringColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, StringColumnStatsData> o1,
Map.Entry<String, StringColumnStatsData> o2) {
- return o1.getValue().getAvgColLen() < o2.getValue().getAvgColLen() ? -1 : 1;
+ return Double.compare(o1.getValue().getAvgColLen(), o2.getValue().getAvgColLen());
}
});
double minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -243,9 +243,10 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
// get the maxLen
Collections.sort(list, new Comparator<Map.Entry<String, StringColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, StringColumnStatsData> o1,
Map.Entry<String, StringColumnStatsData> o2) {
- return o1.getValue().getMaxColLen() < o2.getValue().getMaxColLen() ? -1 : 1;
+ return Long.compare(o1.getValue().getMaxColLen(), o2.getValue().getMaxColLen());
}
});
minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -274,9 +275,10 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
// get the ndv
long ndv = 0;
Collections.sort(list, new Comparator<Map.Entry<String, StringColumnStatsData>>() {
+ @Override
public int compare(Map.Entry<String, StringColumnStatsData> o1,
Map.Entry<String, StringColumnStatsData> o2) {
- return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1;
+ return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
}
});
minInd = adjustedIndexMap.get(list.get(0).getKey());