You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ai...@apache.org on 2018/08/22 17:27:08 UTC
[2/2] hive git commit: HIVE-20246: Configurable collecting stats by
using DO_NOT_UPDATE_STATS table property (Alice Fan, reviewed by Aihua Xu)
HIVE-20246: Configurable collecting stats by using DO_NOT_UPDATE_STATS table property (Alice Fan, reviewed by Aihua Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ff255432
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ff255432
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ff255432
Branch: refs/heads/master
Commit: ff25543282532613c9cfa768a8bf8ee728b3734e
Parents: 1f9c70e
Author: Aihua Xu <ai...@apache.org>
Authored: Wed Aug 22 10:05:12 2018 -0700
Committer: Aihua Xu <ai...@apache.org>
Committed: Wed Aug 22 10:05:12 2018 -0700
----------------------------------------------------------------------
.../hadoop/hive/metastore/HiveMetaStore.java | 27 ++++++++--
.../hive/metastore/TestHiveMetaStore.java | 57 +++++++++++++++++++-
2 files changed, 79 insertions(+), 5 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ff255432/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
index 067eb5a..e971d0f 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
@@ -3174,8 +3174,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
part.setCreateTime((int) time);
part.putToParameters(hive_metastoreConstants.DDL_TIME, Long.toString(time));
- if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) &&
- !MetaStoreServerUtils.isView(tbl)) {
+ if (canUpdateStats(tbl)) {
MetaStoreServerUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, envContext, true);
}
@@ -3791,6 +3790,27 @@ public class HiveMetaStore extends ThriftHiveMetastore {
return result;
}
+ /**
+ * Verify if update stats while altering partition(s)
+ * For the following three cases HMS will not update partition stats
+ * 1) Table property 'DO_NOT_UPDATE_STATS' = True
+ * 2) HMS configuration property 'STATS_AUTO_GATHER' = False
+ * 3) Is View
+ */
+ private boolean canUpdateStats(Table tbl) {
+ Map<String,String> tblParams = tbl.getParameters();
+ boolean updateStatsTbl = true;
+ if ((tblParams != null) && tblParams.containsKey(StatsSetupConst.DO_NOT_UPDATE_STATS)) {
+ updateStatsTbl = !Boolean.valueOf(tblParams.get(StatsSetupConst.DO_NOT_UPDATE_STATS));
+ }
+ if (!MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) ||
+ MetaStoreServerUtils.isView(tbl) ||
+ !updateStatsTbl) {
+ return false;
+ }
+ return true;
+ }
+
private void initializeAddedPartition(
final Table tbl, final Partition part, boolean madeDir) throws MetaException {
initializeAddedPartition(tbl, new PartitionSpecProxy.SimplePartitionWrapperIterator(part), madeDir);
@@ -3798,8 +3818,7 @@ public class HiveMetaStore extends ThriftHiveMetastore {
private void initializeAddedPartition(
final Table tbl, final PartitionSpecProxy.PartitionIterator part, boolean madeDir) throws MetaException {
- if (MetastoreConf.getBoolVar(conf, ConfVars.STATS_AUTO_GATHER) &&
- !MetaStoreServerUtils.isView(tbl)) {
+ if (canUpdateStats(tbl)) {
MetaStoreServerUtils.updatePartitionStatsFast(part, tbl, wh, madeDir, false, null, true);
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ff255432/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
----------------------------------------------------------------------
diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
index 60beab6..4937d9d 100644
--- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
+++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.hive.metastore;
-import java.lang.reflect.Field;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
@@ -38,6 +37,8 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
+import java.lang.reflect.*;
+import static org.mockito.Mockito.mock;
import com.google.common.collect.Sets;
import org.apache.hadoop.hive.metastore.client.builder.DatabaseBuilder;
@@ -57,6 +58,7 @@ import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.hive.common.StatsSetupConst;
import org.apache.hadoop.hive.metastore.api.AggrStats;
import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
@@ -95,6 +97,8 @@ import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
+import static org.mockito.Mockito.never;
+import static org.mockito.Mockito.verify;
public abstract class TestHiveMetaStore {
private static final Logger LOG = LoggerFactory.getLogger(TestHiveMetaStore.class);
@@ -3098,4 +3102,55 @@ public abstract class TestHiveMetaStore {
int size = allUuids.size();
assertEquals(numAPICallsPerThread * parallelCalls, size);
}
+
+ /**
+ * While altering partition(s), verify DO NOT calculate partition statistics if
+ * <ol>
+ * <li>table property DO_NOT_UPDATE_STATS is true</li>
+ * <li>STATS_AUTO_GATHER is false</li>
+ * <li>Is View</li>
+ * </ol>
+ */
+ @Test
+ public void testUpdatePartitionStat_doesNotUpdateStats() throws Exception {
+ final String DB_NAME = "db1";
+ final String TABLE_NAME = "tbl1";
+ Table tbl = new TableBuilder()
+ .setDbName(DB_NAME)
+ .setTableName(TABLE_NAME)
+ .addCol("id", "int")
+ .addTableParam(StatsSetupConst.DO_NOT_UPDATE_STATS, "true")
+ .build(null);
+ List<String> vals = new ArrayList<>(2);
+ vals.add("col1");
+ vals.add("col2");
+ Partition part = new Partition();
+ part.setDbName(DB_NAME);
+ part.setTableName(TABLE_NAME);
+ part.setValues(vals);
+ part.setParameters(new HashMap<>());
+ part.setSd(tbl.getSd().deepCopy());
+ part.getSd().setSerdeInfo(tbl.getSd().getSerdeInfo());
+ part.getSd().setLocation(tbl.getSd().getLocation() + "/partCol=1");
+ Warehouse wh = mock(Warehouse.class);
+ //Execute initializeAddedPartition() and it should not trigger updatePartitionStatsFast() as DO_NOT_UPDATE_STATS is true
+ HiveMetaStore.HMSHandler hms = new HiveMetaStore.HMSHandler("", conf, false);
+ Method m = hms.getClass().getDeclaredMethod("initializeAddedPartition", Table.class, Partition.class, boolean.class);
+ m.setAccessible(true);
+ //Invoke initializeAddedPartition();
+ m.invoke(hms, tbl, part, false);
+ verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation());
+
+ //Remove tbl's DO_NOT_UPDATE_STATS & set STATS_AUTO_GATHER = false
+ tbl.unsetParameters();
+ MetastoreConf.setBoolVar(conf, ConfVars.STATS_AUTO_GATHER, false);
+ m.invoke(hms, tbl, part, false);
+ verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation());
+
+ //Set STATS_AUTO_GATHER = true and set tbl as a VIRTUAL_VIEW
+ MetastoreConf.setBoolVar(conf, ConfVars.STATS_AUTO_GATHER, true);
+ tbl.setTableType("VIRTUAL_VIEW");
+ m.invoke(hms, tbl, part, false);
+ verify(wh, never()).getFileStatusesForLocation(part.getSd().getLocation());
+ }
}