You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2017/08/10 23:10:29 UTC

hive git commit: HIVE-17285 : Fixes for bit vector retrievals and merging (Ashutosh Chauhan via Jesus Camacho Rodriguez)

Repository: hive
Updated Branches:
  refs/heads/master 637123a77 -> 9e5dca9f6


HIVE-17285 : Fixes for bit vector retrievals and merging (Ashutosh Chauhan via Jesus Camacho Rodriguez)

Signed-off-by: Ashutosh Chauhan <ha...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/9e5dca9f
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/9e5dca9f
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/9e5dca9f

Branch: refs/heads/master
Commit: 9e5dca9f6e98fc291f9e54bc767deeebd6221fe9
Parents: 637123a
Author: Ashutosh Chauhan <ha...@apache.org>
Authored: Wed Aug 9 18:42:45 2017 -0700
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Thu Aug 10 16:09:28 2017 -0700

----------------------------------------------------------------------
 .../hive/metastore/MetaStoreDirectSql.java      | 35 ++++++---
 .../hadoop/hive/metastore/MetaStoreUtils.java   | 62 +++++++++++----
 .../hive/metastore/cache/CachedStore.java       | 81 ++++++++------------
 .../aggr/DateColumnStatsAggregator.java         | 15 ++--
 .../aggr/DecimalColumnStatsAggregator.java      | 11 ++-
 .../aggr/DoubleColumnStatsAggregator.java       | 15 ++--
 .../aggr/LongColumnStatsAggregator.java         | 13 ++--
 .../aggr/StringColumnStatsAggregator.java       | 14 ++--
 8 files changed, 147 insertions(+), 99 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index 73754ff..b3274ca 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -127,7 +127,7 @@ class MetaStoreDirectSql {
   @java.lang.annotation.Target(java.lang.annotation.ElementType.FIELD)
   @java.lang.annotation.Retention(java.lang.annotation.RetentionPolicy.RUNTIME)
   private @interface TableName {}
-  
+
   // Table names with schema name, if necessary
   @TableName
   private String DBS, TBLS, PARTITIONS, DATABASE_PARAMS, PARTITION_PARAMS, SORT_COLS, SD_PARAMS,
@@ -151,7 +151,7 @@ class MetaStoreDirectSql {
       batchSize = DatabaseProduct.needsInBatching(dbType) ? 1000 : NO_BATCHING;
     }
     this.batchSize = batchSize;
-    
+
     for (java.lang.reflect.Field f : this.getClass().getDeclaredFields()) {
       if (f.getAnnotation(TableName.class) == null) continue;
       try {
@@ -281,7 +281,7 @@ class MetaStoreDirectSql {
   public String getSchema() {
     return schema;
   }
-  
+
   public boolean isCompatibleDatastore() {
     return isCompatibleDatastore;
   }
@@ -393,6 +393,7 @@ class MetaStoreDirectSql {
       return Collections.emptyList();
     }
     return runBatched(partNames, new Batchable<String, Partition>() {
+      @Override
       public List<Partition> run(List<String> input) throws MetaException {
         String filter = "" + PARTITIONS + ".\"PART_NAME\" in (" + makeParams(input.size()) + ")";
         return getPartitionsViaSqlFilterInternal(dbName, tblName, null, filter, input,
@@ -415,8 +416,8 @@ class MetaStoreDirectSql {
   }
 
   public static class SqlFilterForPushdown {
-    private List<Object> params = new ArrayList<Object>();
-    private List<String> joins = new ArrayList<String>();
+    private final List<Object> params = new ArrayList<Object>();
+    private final List<String> joins = new ArrayList<String>();
     private String filter;
     private Table table;
   }
@@ -526,6 +527,7 @@ class MetaStoreDirectSql {
 
     // Get full objects. For Oracle/etc. do it in batches.
     List<Partition> result = runBatched(sqlResult, new Batchable<Object, Partition>() {
+      @Override
       public List<Partition> run(List<Object> input) throws MetaException {
         return getPartitionsFromPartitionIds(dbNameLcase, tblNameLcase, isView, input);
       }
@@ -949,6 +951,7 @@ class MetaStoreDirectSql {
     if (value == null)
       return null;
     if (value instanceof Blob) {
+      //derby, oracle
       try {
         // getBytes function says: pos the ordinal position of the first byte in
         // the BLOB value to be extracted; the first byte is at position 1
@@ -956,7 +959,12 @@ class MetaStoreDirectSql {
       } catch (SQLException e) {
         throw new MetaException("Encounter error while processing blob.");
       }
-    } else {
+    }
+    else if (value instanceof byte[]) {
+      // mysql, postgres, sql server
+      return new String((byte[])value);
+    }
+	else {
       // this may happen when enablebitvector is false
       LOG.debug("Expected blob type but got " + value.getClass().getName());
       return null;
@@ -1251,6 +1259,7 @@ class MetaStoreDirectSql {
     final String queryText0 = "select " + getStatsList(enableBitVector) + " from " + TAB_COL_STATS + " "
           + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" in (";
     Batchable<String, Object[]> b = new Batchable<String, Object[]>() {
+      @Override
       public List<Object[]> run(List<String> input) throws MetaException {
         String queryText = queryText0 + makeParams(input.size()) + ")";
         Object[] params = new Object[input.size() + 2];
@@ -1356,8 +1365,10 @@ class MetaStoreDirectSql {
         + " and \"COLUMN_NAME\" in (%1$s) and \"PARTITION_NAME\" in (%2$s)"
         + " group by \"PARTITION_NAME\"";
     List<Long> allCounts = runBatched(colNames, new Batchable<String, Long>() {
+      @Override
       public List<Long> run(final List<String> inputColName) throws MetaException {
         return runBatched(partNames, new Batchable<String, Long>() {
+          @Override
           public List<Long> run(List<String> inputPartNames) throws MetaException {
             long partsFound = 0;
             String queryText = String.format(queryText0,
@@ -1396,8 +1407,10 @@ class MetaStoreDirectSql {
     final boolean useDensityFunctionForNDVEstimation, final double ndvTuner, final boolean enableBitVector) throws MetaException {
     final boolean areAllPartsFound = (partsFound == partNames.size());
     return runBatched(colNames, new Batchable<String, ColumnStatisticsObj>() {
+      @Override
       public List<ColumnStatisticsObj> run(final List<String> inputColNames) throws MetaException {
         return runBatched(partNames, new Batchable<String, ColumnStatisticsObj>() {
+          @Override
           public List<ColumnStatisticsObj> run(List<String> inputPartNames) throws MetaException {
             return columnStatisticsObjForPartitionsBatch(dbName, tableName, inputPartNames,
                 inputColNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector);
@@ -1466,7 +1479,7 @@ class MetaStoreDirectSql {
       String tableName, List<String> partNames, List<String> colNames, boolean areAllPartsFound,
       boolean useDensityFunctionForNDVEstimation, double ndvTuner, boolean enableBitVector) throws MetaException {
     if(enableBitVector) {
-      return aggrStatsUseJava(dbName, tableName, partNames, colNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
+      return aggrStatsUseJava(dbName, tableName, partNames, colNames, useDensityFunctionForNDVEstimation, ndvTuner);
     }
     else {
       return aggrStatsUseDB(dbName, tableName, partNames, colNames, areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
@@ -1474,14 +1487,14 @@ class MetaStoreDirectSql {
   }
 
   private List<ColumnStatisticsObj> aggrStatsUseJava(String dbName, String tableName,
-      List<String> partNames, List<String> colNames, boolean areAllPartsFound,
+      List<String> partNames, List<String> colNames,
       boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
     // 1. get all the stats for colNames in partNames;
     List<ColumnStatistics> partStats = getPartitionStats(dbName, tableName, partNames, colNames,
         true);
     // 2. use util function to aggr stats
     return MetaStoreUtils.aggrPartitionStats(partStats, dbName, tableName, partNames, colNames,
-        areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
+        useDensityFunctionForNDVEstimation, ndvTuner);
   }
 
   private List<ColumnStatisticsObj> aggrStatsUseDB(String dbName,
@@ -1679,7 +1692,7 @@ class MetaStoreDirectSql {
                 row[2 + colStatIndex] = null;
               } else {
                 Long val = extractSqlLong(o);
-                row[2 + colStatIndex] = (Long) (val / sumVal * (partNames.size()));
+                row[2 + colStatIndex] = val / sumVal * (partNames.size());
               }
             } else if (IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Min
                 || IExtrapolatePartStatus.aggrTypes[colStatIndex] == IExtrapolatePartStatus.AggrType.Max) {
@@ -1802,8 +1815,10 @@ class MetaStoreDirectSql {
         + " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\""
         + "  in (%1$s) AND \"PARTITION_NAME\" in (%2$s) order by \"PARTITION_NAME\"";
     Batchable<String, Object[]> b = new Batchable<String, Object[]>() {
+      @Override
       public List<Object[]> run(final List<String> inputColNames) throws MetaException {
         Batchable<String, Object[]> b2 = new Batchable<String, Object[]>() {
+          @Override
           public List<Object[]> run(List<String> inputPartNames) throws MetaException {
             String queryText = String.format(queryText0,
                 makeParams(inputColNames.size()), makeParams(inputPartNames.size()));

http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index c95749c..6a54306 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -46,11 +46,18 @@ import java.util.SortedMap;
 import java.util.SortedSet;
 import java.util.TreeMap;
 import java.util.TreeSet;
+import java.util.concurrent.Callable;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
 import com.google.common.base.Predicates;
+import com.google.common.collect.Lists;
 import com.google.common.collect.Maps;
+import com.google.common.util.concurrent.ThreadFactoryBuilder;
 
 import org.apache.commons.collections.CollectionUtils;
 import org.apache.commons.collections.ListUtils;
@@ -1906,7 +1913,7 @@ public class MetaStoreUtils {
       // present in both, overwrite stats for columns absent in metastore and
       // leave alone columns stats missing from stats task. This last case may
       // leave stats in stale state. This will be addressed later.
-      LOG.debug("New ColumnStats size is {}, but old ColumnStats size is {}", 
+      LOG.debug("New ColumnStats size is {}, but old ColumnStats size is {}",
           csNew.getStatsObj().size(), csOld.getStatsObjSize());
     }
     // In this case, we have to find out which columns can be merged.
@@ -1963,7 +1970,7 @@ public class MetaStoreUtils {
   // given a list of partStats, this function will give you an aggr stats
   public static List<ColumnStatisticsObj> aggrPartitionStats(List<ColumnStatistics> partStats,
       String dbName, String tableName, List<String> partNames, List<String> colNames,
-      boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, double ndvTuner)
+      boolean useDensityFunctionForNDVEstimation, double ndvTuner)
       throws MetaException {
     // 1. group by the stats by colNames
     // map the colName to List<ColumnStatistics>
@@ -1980,27 +1987,54 @@ public class MetaStoreUtils {
         map.get(obj.getColName()).add(singleCS);
       }
     }
-    return aggrPartitionStats(map,dbName,tableName,partNames,colNames,areAllPartsFound,useDensityFunctionForNDVEstimation, ndvTuner);
+    return aggrPartitionStats(map,dbName,tableName,partNames,colNames,useDensityFunctionForNDVEstimation, ndvTuner);
   }
 
   public static List<ColumnStatisticsObj> aggrPartitionStats(
       Map<String, List<ColumnStatistics>> map, String dbName, String tableName,
-      List<String> partNames, List<String> colNames, boolean areAllPartsFound,
-      boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws MetaException {
+      final List<String> partNames, List<String> colNames,
+      final boolean useDensityFunctionForNDVEstimation,final double ndvTuner) throws MetaException {
     List<ColumnStatisticsObj> colStats = new ArrayList<>();
-    // 2. aggr stats for each colName
-    // TODO: thread pool can be used to speed up the process
-    for (Entry<String, List<ColumnStatistics>> entry : map.entrySet()) {
-      List<ColumnStatistics> css = entry.getValue();
-      ColumnStatsAggregator aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(css
-          .iterator().next().getStatsObj().iterator().next().getStatsData().getSetField(),
-          useDensityFunctionForNDVEstimation, ndvTuner);
-      ColumnStatisticsObj statsObj = aggregator.aggregate(entry.getKey(), partNames, css);
-      colStats.add(statsObj);
+    // 2. Aggregate stats for each column in a separate thread
+    if (map.size()< 1) {
+      //stats are absent in RDBMS
+      LOG.debug("No stats data found for: dbName=" +dbName +" tblName=" + tableName +
+          " partNames= " + partNames + " colNames=" + colNames );
+      return colStats;
+    }
+    final ExecutorService pool = Executors.newFixedThreadPool(Math.min(map.size(), 16),
+        new ThreadFactoryBuilder().setDaemon(true).setNameFormat("aggr-col-stats-%d").build());
+    final List<Future<ColumnStatisticsObj>> futures = Lists.newLinkedList();
+
+    long start = System.currentTimeMillis();
+    for (final Entry<String, List<ColumnStatistics>> entry : map.entrySet()) {
+      futures.add(pool.submit(new Callable<ColumnStatisticsObj>() {
+        @Override
+        public ColumnStatisticsObj call() throws Exception {
+          List<ColumnStatistics> css = entry.getValue();
+          ColumnStatsAggregator aggregator = ColumnStatsAggregatorFactory.getColumnStatsAggregator(css
+              .iterator().next().getStatsObj().iterator().next().getStatsData().getSetField(),
+              useDensityFunctionForNDVEstimation, ndvTuner);
+          ColumnStatisticsObj statsObj = aggregator.aggregate(entry.getKey(), partNames, css);
+          return statsObj;
+        }}));
+    }
+    pool.shutdown();
+    for (Future<ColumnStatisticsObj> future : futures) {
+      try {
+        colStats.add(future.get());
+      } catch (InterruptedException | ExecutionException e) {
+        pool.shutdownNow();
+        LOG.debug(e.toString());
+        throw new MetaException(e.toString());
+      }
     }
+    LOG.debug("Time for aggr col stats in seconds: {} Threads used: {}",
+      ((System.currentTimeMillis() - (double)start))/1000, Math.min(map.size(), 16));
     return colStats;
   }
 
+
   /**
    * Produce a hash for the storage descriptor
    * @param sd storage descriptor to hash

http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
index ce98a6e..697cc2e 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
@@ -81,19 +81,15 @@ import org.apache.hadoop.hive.metastore.api.Type;
 import org.apache.hadoop.hive.metastore.api.UnknownDBException;
 import org.apache.hadoop.hive.metastore.api.UnknownPartitionException;
 import org.apache.hadoop.hive.metastore.api.UnknownTableException;
-import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger;
-import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerFactory;
 import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
-import org.apache.hadoop.util.StringUtils;
 import org.apache.hive.common.util.HiveStringUtils;
 import org.apache.thrift.TException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import com.google.common.annotations.VisibleForTesting;
-import com.google.common.collect.Lists;
 
 // TODO filter->expr
 // TODO functionCache
@@ -277,6 +273,7 @@ public class CachedStore implements RawStore, Configurable {
   synchronized void startCacheUpdateService() {
     if (cacheUpdateMaster == null) {
       cacheUpdateMaster = Executors.newScheduledThreadPool(1, new ThreadFactory() {
+        @Override
         public Thread newThread(Runnable r) {
           Thread t = Executors.defaultThreadFactory().newThread(r);
           t.setName("CachedStore-CacheUpdateService: Thread-" + t.getId());
@@ -321,7 +318,7 @@ public class CachedStore implements RawStore, Configurable {
 
   static class CacheUpdateMasterWork implements Runnable {
 
-    private CachedStore cachedStore;
+    private final CachedStore cachedStore;
 
     public CacheUpdateMasterWork(CachedStore cachedStore) {
       this.cachedStore = cachedStore;
@@ -1540,63 +1537,51 @@ public class CachedStore implements RawStore, Configurable {
   }
 
   @Override
-  public AggrStats get_aggr_stats_for(String dbName, String tblName, List<String> partNames,
-      List<String> colNames) throws MetaException, NoSuchObjectException {
-    List<ColumnStatisticsObj> colStats = new ArrayList<ColumnStatisticsObj>(colNames.size());
-    for (String colName : colNames) {
-      ColumnStatisticsObj colStat =
-          mergeColStatsForPartitions(HiveStringUtils.normalizeIdentifier(dbName),
-              HiveStringUtils.normalizeIdentifier(tblName), partNames, colName);
-      if (colStat == null) {
-        // Stop and fall back to underlying RawStore
-        colStats = null;
-        break;
-      } else {
-        colStats.add(colStat);
-      }
-    }
-    if (colStats == null) {
-      return rawStore.get_aggr_stats_for(dbName, tblName, partNames, colNames);
-    } else {
+    public AggrStats get_aggr_stats_for(String dbName, String tblName, List<String> partNames,
+	  List<String> colNames) throws MetaException, NoSuchObjectException {
+	  List<ColumnStatisticsObj> colStats = mergeColStatsForPartitions(
+	    HiveStringUtils.normalizeIdentifier(dbName), HiveStringUtils.normalizeIdentifier(tblName),
+	    partNames, colNames);
       return new AggrStats(colStats, partNames.size());
-    }
-  }
 
-  private ColumnStatisticsObj mergeColStatsForPartitions(String dbName, String tblName,
-      List<String> partNames, String colName) throws MetaException {
+	  }
+
+  private List<ColumnStatisticsObj> mergeColStatsForPartitions(String dbName, String tblName,
+      List<String> partNames, List<String> colNames) throws MetaException {
     final boolean useDensityFunctionForNDVEstimation = HiveConf.getBoolVar(getConf(),
         HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION);
     final double ndvTuner = HiveConf.getFloatVar(getConf(),
         HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER);
     Map<String, List<ColumnStatistics>> map = new HashMap<>();
-    List<ColumnStatistics> list = new ArrayList<>();
-    boolean areAllPartsFound = true;
-    for (String partName : partNames) {
-      String colStatsCacheKey = CacheUtils.buildKey(dbName, tblName, partNameToVals(partName),
-          colName);
-      List<ColumnStatisticsObj> singleObj = new ArrayList<>();
-      ColumnStatisticsObj colStatsForPart = SharedCache
-          .getCachedPartitionColStats(colStatsCacheKey);
-      if (colStatsForPart != null) {
-        singleObj.add(colStatsForPart);
-        ColumnStatisticsDesc css = new ColumnStatisticsDesc(false, dbName, tblName);
-        css.setPartName(partName);
-        list.add(new ColumnStatistics(css, singleObj));
-      } else {
-        areAllPartsFound = false;
+
+    for (String colName : colNames) {
+      List<ColumnStatistics> colStats = new ArrayList<>();
+      for (String partName : partNames) {
+        String colStatsCacheKey = CacheUtils.buildKey(dbName, tblName, partNameToVals(partName),
+            colName);
+        List<ColumnStatisticsObj> colStat = new ArrayList<>();
+        ColumnStatisticsObj colStatsForPart = SharedCache
+            .getCachedPartitionColStats(colStatsCacheKey);
+        if (colStatsForPart != null) {
+          colStat.add(colStatsForPart);
+          ColumnStatisticsDesc csDesc = new ColumnStatisticsDesc(false, dbName, tblName);
+          csDesc.setPartName(partName);
+          colStats.add(new ColumnStatistics(csDesc, colStat));
+        } else {
+          LOG.debug("Stats not found in CachedStore for: dbName={} tblName={} partName={} colName={}",
+            dbName, tblName,partName, colName);
+        }
       }
+      map.put(colName, colStats);
     }
-    map.put(colName, list);
-    List<String> colNames = new ArrayList<>();
-    colNames.add(colName);
     // Note that enableBitVector does not apply here because ColumnStatisticsObj
     // itself will tell whether
     // bitvector is null or not and aggr logic can automatically apply.
-    return MetaStoreUtils
-        .aggrPartitionStats(map, dbName, tblName, partNames, colNames, areAllPartsFound,
-            useDensityFunctionForNDVEstimation, ndvTuner).iterator().next();
+    return MetaStoreUtils.aggrPartitionStats(map, dbName, tblName, partNames, colNames,
+        useDensityFunctionForNDVEstimation, ndvTuner);
   }
 
+
   @Override
   public long cleanupEvents() {
     return rawStore.cleanupEvents();

http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
index 6fae3e5..04a1eb5 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DateColumnStatsAggregator.java
@@ -232,12 +232,12 @@ public class DateColumnStatsAggregator extends ColumnStatsAggregator implements
       extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap,
           adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
     }
+    LOG.debug("Ndv estimatation for {} is {} # of partitions requested: {} # of partitions found: {}", colName,
+        columnStatisticsData.getDateStats().getNumDVs(),partNames.size(), css.size());
     statsObj.setStatsData(columnStatisticsData);
-    LOG.debug("Ndv estimatation for " + colName + " is "
-        + columnStatisticsData.getDateStats().getNumDVs());
     return statsObj;
   }
-  
+
   private long diff(Date d1, Date d2) {
     return d1.getDaysSinceEpoch() - d2.getDaysSinceEpoch();
   }
@@ -264,9 +264,10 @@ public class DateColumnStatsAggregator extends ColumnStatsAggregator implements
         extractedAdjustedStatsMap.entrySet());
     // get the lowValue
     Collections.sort(list, new Comparator<Map.Entry<String, DateColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, DateColumnStatsData> o1,
           Map.Entry<String, DateColumnStatsData> o2) {
-        return diff(o1.getValue().getLowValue(), o2.getValue().getLowValue()) < 0 ? -1 : 1;
+        return o1.getValue().getLowValue().compareTo(o2.getValue().getLowValue());
       }
     });
     double minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -286,9 +287,10 @@ public class DateColumnStatsAggregator extends ColumnStatsAggregator implements
 
     // get the highValue
     Collections.sort(list, new Comparator<Map.Entry<String, DateColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, DateColumnStatsData> o1,
           Map.Entry<String, DateColumnStatsData> o2) {
-        return diff(o1.getValue().getHighValue(), o2.getValue().getHighValue()) < 0 ? -1 : 1;
+        return o1.getValue().getHighValue().compareTo(o2.getValue().getHighValue());
       }
     });
     minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -317,9 +319,10 @@ public class DateColumnStatsAggregator extends ColumnStatsAggregator implements
     // get the ndv
     long ndv = 0;
     Collections.sort(list, new Comparator<Map.Entry<String, DateColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, DateColumnStatsData> o1,
           Map.Entry<String, DateColumnStatsData> o2) {
-        return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1;
+        return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
       }
     });
     long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();

http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java
index c5e72eb..d220e7f 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DecimalColumnStatsAggregator.java
@@ -40,7 +40,7 @@ import org.slf4j.LoggerFactory;
 
 public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implements
     IExtrapolatePartStatus {
-  
+
   private static final Logger LOG = LoggerFactory.getLogger(DecimalColumnStatsAggregator.class);
 
   @Override
@@ -253,9 +253,9 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen
       extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap,
           adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
     }
+    LOG.debug("Ndv estimatation for {} is {} # of partitions requested: {} # of partitions found: {}", colName,
+        columnStatisticsData.getDecimalStats().getNumDVs(),partNames.size(), css.size());
     statsObj.setStatsData(columnStatisticsData);
-    LOG.debug("Ndv estimatation for " + colName + " is "
-        + columnStatisticsData.getDecimalStats().getNumDVs());
     return statsObj;
   }
 
@@ -273,6 +273,7 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen
         extractedAdjustedStatsMap.entrySet());
     // get the lowValue
     Collections.sort(list, new Comparator<Map.Entry<String, DecimalColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, DecimalColumnStatsData> o1,
           Map.Entry<String, DecimalColumnStatsData> o2) {
         return o1.getValue().getLowValue().compareTo(o2.getValue().getLowValue());
@@ -295,6 +296,7 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen
 
     // get the highValue
     Collections.sort(list, new Comparator<Map.Entry<String, DecimalColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, DecimalColumnStatsData> o1,
           Map.Entry<String, DecimalColumnStatsData> o2) {
         return o1.getValue().getHighValue().compareTo(o2.getValue().getHighValue());
@@ -328,9 +330,10 @@ public class DecimalColumnStatsAggregator extends ColumnStatsAggregator implemen
     long ndvMin = 0;
     long ndvMax = 0;
     Collections.sort(list, new Comparator<Map.Entry<String, DecimalColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, DecimalColumnStatsData> o1,
           Map.Entry<String, DecimalColumnStatsData> o2) {
-        return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1;
+        return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
       }
     });
     long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();

http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
index e55c412..1b44dd9 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/DoubleColumnStatsAggregator.java
@@ -228,8 +228,8 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
       extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap,
           adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
     }
-    LOG.debug("Ndv estimatation for " + colName + " is "
-        + columnStatisticsData.getDoubleStats().getNumDVs());
+    LOG.debug("Ndv estimatation for {} is {}. # of partitions requested: {}. # of partitions found: {}", colName,
+        columnStatisticsData.getDoubleStats().getNumDVs(),partNames.size(), css.size());
     statsObj.setStatsData(columnStatisticsData);
     return statsObj;
   }
@@ -248,9 +248,10 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
         extractedAdjustedStatsMap.entrySet());
     // get the lowValue
     Collections.sort(list, new Comparator<Map.Entry<String, DoubleColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, DoubleColumnStatsData> o1,
           Map.Entry<String, DoubleColumnStatsData> o2) {
-        return o1.getValue().getLowValue() < o2.getValue().getLowValue() ? -1 : 1;
+        return Double.compare(o1.getValue().getLowValue(), o2.getValue().getLowValue());
       }
     });
     double minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -270,9 +271,10 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
 
     // get the highValue
     Collections.sort(list, new Comparator<Map.Entry<String, DoubleColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, DoubleColumnStatsData> o1,
           Map.Entry<String, DoubleColumnStatsData> o2) {
-        return o1.getValue().getHighValue() < o2.getValue().getHighValue() ? -1 : 1;
+        return Double.compare(o1.getValue().getHighValue(), o2.getValue().getHighValue());
       }
     });
     minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -303,9 +305,10 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
     long ndvMin = 0;
     long ndvMax = 0;
     Collections.sort(list, new Comparator<Map.Entry<String, DoubleColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, DoubleColumnStatsData> o1,
           Map.Entry<String, DoubleColumnStatsData> o2) {
-        return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1;
+        return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
       }
     });
     long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();
@@ -341,5 +344,5 @@ public class DoubleColumnStatsAggregator extends ColumnStatsAggregator implement
     extrapolateDoubleData.setNumDVs(ndv);
     extrapolateData.setDoubleStats(extrapolateDoubleData);
   }
-  
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
index 2ee09f3..802ad1a 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/LongColumnStatsAggregator.java
@@ -229,9 +229,9 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
       extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap,
           adjustedStatsMap, densityAvgSum / adjustedStatsMap.size());
     }
+    LOG.debug("Ndv estimatation for {} is {} # of partitions requested: {} # of partitions found: {}", colName,
+        columnStatisticsData.getLongStats().getNumDVs(),partNames.size(), css.size());
     statsObj.setStatsData(columnStatisticsData);
-    LOG.debug("Ndv estimatation for " + colName + " is "
-        + columnStatisticsData.getLongStats().getNumDVs());
     return statsObj;
   }
 
@@ -249,9 +249,10 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
         extractedAdjustedStatsMap.entrySet());
     // get the lowValue
     Collections.sort(list, new Comparator<Map.Entry<String, LongColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, LongColumnStatsData> o1,
           Map.Entry<String, LongColumnStatsData> o2) {
-        return o1.getValue().getLowValue() < o2.getValue().getLowValue() ? -1 : 1;
+        return Long.compare(o1.getValue().getLowValue(), o2.getValue().getLowValue());
       }
     });
     double minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -271,9 +272,10 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
 
     // get the highValue
     Collections.sort(list, new Comparator<Map.Entry<String, LongColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, LongColumnStatsData> o1,
           Map.Entry<String, LongColumnStatsData> o2) {
-        return o1.getValue().getHighValue() < o2.getValue().getHighValue() ? -1 : 1;
+        return Long.compare(o1.getValue().getHighValue(), o2.getValue().getHighValue());
       }
     });
     minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -302,9 +304,10 @@ public class LongColumnStatsAggregator extends ColumnStatsAggregator implements
     // get the ndv
     long ndv = 0;
     Collections.sort(list, new Comparator<Map.Entry<String, LongColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, LongColumnStatsData> o1,
           Map.Entry<String, LongColumnStatsData> o2) {
-        return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1;
+        return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
       }
     });
     long lowerBound = list.get(list.size() - 1).getValue().getNumDVs();

http://git-wip-us.apache.org/repos/asf/hive/blob/9e5dca9f/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
----------------------------------------------------------------------
diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
index 2ea2fcc..e1a781f 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/columnstats/aggr/StringColumnStatsAggregator.java
@@ -136,7 +136,6 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
         for (ColumnStatistics cs : css) {
           String partName = cs.getStatsDesc().getPartName();
           ColumnStatisticsObj cso = cs.getStatsObjIterator().next();
-          StringColumnStatsData newData = cso.getStatsData().getStringStats();
           adjustedIndexMap.put(partName, (double) indexMap.get(partName));
           adjustedStatsMap.put(partName, cso.getStatsData());
         }
@@ -201,8 +200,8 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
       extrapolate(columnStatisticsData, partNames.size(), css.size(), adjustedIndexMap,
           adjustedStatsMap, -1);
     }
-    LOG.debug("Ndv estimatation for " + colName + " is "
-        + columnStatisticsData.getStringStats().getNumDVs());
+    LOG.debug("Ndv estimatation for {} is {} # of partitions requested: {} # of partitions found: {}", colName,
+        columnStatisticsData.getStringStats().getNumDVs(),partNames.size(), css.size());
     statsObj.setStatsData(columnStatisticsData);
     return statsObj;
   }
@@ -221,9 +220,10 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
         extractedAdjustedStatsMap.entrySet());
     // get the avgLen
     Collections.sort(list, new Comparator<Map.Entry<String, StringColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, StringColumnStatsData> o1,
           Map.Entry<String, StringColumnStatsData> o2) {
-        return o1.getValue().getAvgColLen() < o2.getValue().getAvgColLen() ? -1 : 1;
+        return Double.compare(o1.getValue().getAvgColLen(), o2.getValue().getAvgColLen());
       }
     });
     double minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -243,9 +243,10 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
 
     // get the maxLen
     Collections.sort(list, new Comparator<Map.Entry<String, StringColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, StringColumnStatsData> o1,
           Map.Entry<String, StringColumnStatsData> o2) {
-        return o1.getValue().getMaxColLen() < o2.getValue().getMaxColLen() ? -1 : 1;
+        return Long.compare(o1.getValue().getMaxColLen(), o2.getValue().getMaxColLen());
       }
     });
     minInd = adjustedIndexMap.get(list.get(0).getKey());
@@ -274,9 +275,10 @@ public class StringColumnStatsAggregator extends ColumnStatsAggregator implement
     // get the ndv
     long ndv = 0;
     Collections.sort(list, new Comparator<Map.Entry<String, StringColumnStatsData>>() {
+      @Override
       public int compare(Map.Entry<String, StringColumnStatsData> o1,
           Map.Entry<String, StringColumnStatsData> o2) {
-        return o1.getValue().getNumDVs() < o2.getValue().getNumDVs() ? -1 : 1;
+       return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs());
       }
     });
     minInd = adjustedIndexMap.get(list.get(0).getKey());