You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by ha...@apache.org on 2016/09/06 17:44:38 UTC
hive git commit: HIVE-14462 : Reduce number of partition check calls
in add_partitions (Rajesh Balamohan via Ashutosh Chauhan) Signed-off-by:
Ashutosh Chauhan
Repository: hive
Updated Branches:
refs/heads/master c661e947f -> ddda38be4
HIVE-14462 : Reduce number of partition check calls in add_partitions (Rajesh Balamohan via Ashutosh Chauhan)
Signed-off-by: Ashutosh Chauhan <ha...@apache.org>
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/ddda38be
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/ddda38be
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/ddda38be
Branch: refs/heads/master
Commit: ddda38be4cc7a9cec987671e9631c10b2a833738
Parents: c661e94
Author: Rajesh Balamohan <rbalamohan at apache dot org>
Authored: Mon Aug 8 03:32:00 2016 -0800
Committer: Ashutosh Chauhan <ha...@apache.org>
Committed: Tue Sep 6 10:44:03 2016 -0700
----------------------------------------------------------------------
.../org/apache/hadoop/hive/ql/exec/DDLTask.java | 6 +-
.../hadoop/hive/ql/metadata/CheckResult.java | 28 ++---
.../hive/ql/metadata/HiveMetaStoreChecker.java | 49 +++++++--
.../ql/metadata/TestHiveMetaStoreChecker.java | 110 ++++++++++---------
4 files changed, 115 insertions(+), 78 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/ddda38be/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
index b19ac49..5722305 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
@@ -1770,7 +1770,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
}
private void msckAddPartitionsOneByOne(Hive db, Table table,
- List<CheckResult.PartitionResult> partsNotInMs, List<String> repairOutput) {
+ Set<CheckResult.PartitionResult> partsNotInMs, List<String> repairOutput) {
for (CheckResult.PartitionResult part : partsNotInMs) {
try {
db.createPartition(table, Warehouse.makeSpecFromName(part.getPartitionName()));
@@ -1829,7 +1829,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
HiveMetaStoreChecker checker = new HiveMetaStoreChecker(db);
String[] names = Utilities.getDbTableName(msckDesc.getTableName());
checker.checkMetastore(names[0], names[1], msckDesc.getPartSpecs(), result);
- List<CheckResult.PartitionResult> partsNotInMs = result.getPartitionsNotInMs();
+ Set<CheckResult.PartitionResult> partsNotInMs = result.getPartitionsNotInMs();
if (msckDesc.isRepairPartitions() && !partsNotInMs.isEmpty()) {
AbstractList<String> vals = null;
String settingStr = HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_MSCK_PATH_VALIDATION);
@@ -1960,7 +1960,7 @@ public class DDLTask extends Task<DDLWork> implements Serializable {
* @throws IOException
* In case the writing fails
*/
- private boolean writeMsckResult(List<? extends Object> result, String msg,
+ private boolean writeMsckResult(Set<? extends Object> result, String msg,
Writer out, boolean wrote) throws IOException {
if (!result.isEmpty()) {
http://git-wip-us.apache.org/repos/asf/hive/blob/ddda38be/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java
index ec9deeb..36b9250 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java
@@ -17,23 +17,23 @@
*/
package org.apache.hadoop.hive.ql.metadata;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.Set;
+import java.util.TreeSet;
/**
* Result class used by the HiveMetaStoreChecker.
*/
public class CheckResult {
- private List<String> tablesNotOnFs = new ArrayList<String>();
- private List<String> tablesNotInMs = new ArrayList<String>();
- private List<PartitionResult> partitionsNotOnFs = new ArrayList<PartitionResult>();
- private List<PartitionResult> partitionsNotInMs = new ArrayList<PartitionResult>();
+ private Set<String> tablesNotOnFs = new TreeSet<String>();
+ private Set<String> tablesNotInMs = new TreeSet<String>();
+ private Set<PartitionResult> partitionsNotOnFs = new TreeSet<PartitionResult>();
+ private Set<PartitionResult> partitionsNotInMs = new TreeSet<PartitionResult>();
/**
* @return a list of tables not found on the filesystem.
*/
- public List<String> getTablesNotOnFs() {
+ public Set<String> getTablesNotOnFs() {
return tablesNotOnFs;
}
@@ -41,14 +41,14 @@ public class CheckResult {
* @param tablesNotOnFs
* a list of tables not found on the filesystem.
*/
- public void setTablesNotOnFs(List<String> tablesNotOnFs) {
+ public void setTablesNotOnFs(Set<String> tablesNotOnFs) {
this.tablesNotOnFs = tablesNotOnFs;
}
/**
* @return a list of tables not found in the metastore.
*/
- public List<String> getTablesNotInMs() {
+ public Set<String> getTablesNotInMs() {
return tablesNotInMs;
}
@@ -56,14 +56,14 @@ public class CheckResult {
* @param tablesNotInMs
* a list of tables not found in the metastore.
*/
- public void setTablesNotInMs(List<String> tablesNotInMs) {
+ public void setTablesNotInMs(Set<String> tablesNotInMs) {
this.tablesNotInMs = tablesNotInMs;
}
/**
* @return a list of partitions not found on the fs
*/
- public List<PartitionResult> getPartitionsNotOnFs() {
+ public Set<PartitionResult> getPartitionsNotOnFs() {
return partitionsNotOnFs;
}
@@ -71,14 +71,14 @@ public class CheckResult {
* @param partitionsNotOnFs
* a list of partitions not found on the fs
*/
- public void setPartitionsNotOnFs(List<PartitionResult> partitionsNotOnFs) {
+ public void setPartitionsNotOnFs(Set<PartitionResult> partitionsNotOnFs) {
this.partitionsNotOnFs = partitionsNotOnFs;
}
/**
* @return a list of partitions not found in the metastore
*/
- public List<PartitionResult> getPartitionsNotInMs() {
+ public Set<PartitionResult> getPartitionsNotInMs() {
return partitionsNotInMs;
}
@@ -86,7 +86,7 @@ public class CheckResult {
* @param partitionsNotInMs
* a list of partitions not found in the metastore
*/
- public void setPartitionsNotInMs(List<PartitionResult> partitionsNotInMs) {
+ public void setPartitionsNotInMs(Set<PartitionResult> partitionsNotInMs) {
this.partitionsNotInMs = partitionsNotInMs;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ddda38be/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
----------------------------------------------------------------------
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
index 13d9651..047589a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/HiveMetaStoreChecker.java
@@ -33,6 +33,8 @@ import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
+import com.google.common.collect.Sets;
+import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.FileStatus;
@@ -113,10 +115,10 @@ public class HiveMetaStoreChecker {
// check the specified partitions
checkTable(dbName, tableName, partitions, result);
}
- Collections.sort(result.getPartitionsNotInMs());
- Collections.sort(result.getPartitionsNotOnFs());
- Collections.sort(result.getTablesNotInMs());
- Collections.sort(result.getTablesNotOnFs());
+ LOG.info("Number of partitionsNotInMs=" + result.getPartitionsNotInMs()
+ + ", partitionsNotOnFs=" + result.getPartitionsNotOnFs()
+ + ", tablesNotInMs=" + result.getTablesNotInMs()
+ + ", tablesNotOnFs=" + result.getTablesNotOnFs());
} catch (MetaException e) {
throw new HiveException(e);
} catch (TException e) {
@@ -317,11 +319,17 @@ public class HiveMetaStoreChecker {
// remove the partition paths we know about
allPartDirs.removeAll(partPaths);
+ Set<String> partColNames = Sets.newHashSet();
+ for(FieldSchema fSchema : table.getPartCols()) {
+ partColNames.add(fSchema.getName());
+ }
+
// we should now only have the unexpected folders left
for (Path partPath : allPartDirs) {
FileSystem fs = partPath.getFileSystem(conf);
String partitionName = getPartitionName(fs.makeQualified(tablePath),
- partPath);
+ partPath, partColNames);
+ LOG.debug("PartitionName: " + partitionName);
if (partitionName != null) {
PartitionResult pr = new PartitionResult();
@@ -331,6 +339,7 @@ public class HiveMetaStoreChecker {
result.getPartitionsNotInMs().add(pr);
}
}
+ LOG.debug("Number of partitions not in metastore : " + result.getPartitionsNotInMs().size());
}
/**
@@ -340,19 +349,37 @@ public class HiveMetaStoreChecker {
* Path of the table.
* @param partitionPath
* Path of the partition.
+ * @param partCols
+ * Set of partition columns from table definition
* @return Partition name, for example partitiondate=2008-01-01
*/
- private String getPartitionName(Path tablePath, Path partitionPath) {
+ static String getPartitionName(Path tablePath, Path partitionPath,
+ Set<String> partCols) {
String result = null;
Path currPath = partitionPath;
+ LOG.debug("tablePath:" + tablePath + ", partCols: " + partCols);
+
while (currPath != null && !tablePath.equals(currPath)) {
- if (result == null) {
- result = currPath.getName();
- } else {
- result = currPath.getName() + Path.SEPARATOR + result;
- }
+ // format: partition=p_val
+ // Add only when table partition colName matches
+ String[] parts = currPath.getName().split("=");
+ if (parts != null && parts.length > 0) {
+ if (parts.length != 2) {
+ LOG.warn(currPath.getName() + " is not a valid partition name");
+ return result;
+ }
+ String partitionName = parts[0];
+ if (partCols.contains(partitionName)) {
+ if (result == null) {
+ result = currPath.getName();
+ } else {
+ result = currPath.getName() + Path.SEPARATOR + result;
+ }
+ }
+ }
currPath = currPath.getParent();
+ LOG.debug("currPath=" + currPath);
}
return result;
}
http://git-wip-us.apache.org/repos/asf/hive/blob/ddda38be/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
----------------------------------------------------------------------
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
index 3f26bcd..cda6e30 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/metadata/TestHiveMetaStoreChecker.java
@@ -24,6 +24,7 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
+import com.google.common.collect.Lists;
import junit.framework.TestCase;
import org.apache.hadoop.fs.FileSystem;
@@ -111,19 +112,19 @@ public class TestHiveMetaStoreChecker extends TestCase {
CheckResult result = new CheckResult();
checker.checkMetastore(dbName, null, null, result);
// we haven't added anything so should return an all ok
- assertEquals(Collections.<String>emptyList(), result.getTablesNotInMs());
- assertEquals(Collections.<String>emptyList(), result.getTablesNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs());
// check table only, should not exist in ms
result = new CheckResult();
checker.checkMetastore(dbName, tableName, null, result);
assertEquals(1, result.getTablesNotInMs().size());
- assertEquals(tableName, result.getTablesNotInMs().get(0));
- assertEquals(Collections.<String>emptyList(), result.getTablesNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotInMs());
+ assertEquals(tableName, result.getTablesNotInMs().iterator().next());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs());
Database db = new Database();
db.setName(dbName);
@@ -139,18 +140,18 @@ public class TestHiveMetaStoreChecker extends TestCase {
// first check all (1) tables
result = new CheckResult();
checker.checkMetastore(dbName, null, null, result);
- assertEquals(Collections.<String>emptyList(), result.getTablesNotInMs());
- assertEquals(Collections.<String>emptyList(), result.getTablesNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs());
// then let's check the one we know about
result = new CheckResult();
checker.checkMetastore(dbName, tableName, null, result);
- assertEquals(Collections.<String>emptyList(), result.getTablesNotInMs());
- assertEquals(Collections.<String>emptyList(), result.getTablesNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs());
// remove the table folder
fs = table.getPath().getFileSystem(hive.getConf());
@@ -159,11 +160,11 @@ public class TestHiveMetaStoreChecker extends TestCase {
// now this shouldn't find the path on the fs
result = new CheckResult();
checker.checkMetastore(dbName, tableName, null, result);
- assertEquals(Collections.<String>emptyList(), result.getTablesNotInMs());;
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());;
assertEquals(1, result.getTablesNotOnFs().size());
- assertEquals(tableName, result.getTablesNotOnFs().get(0));
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotInMs());
+ assertEquals(tableName, result.getTablesNotOnFs().iterator().next());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs());
// put it back and one additional table
fs.mkdirs(table.getPath());
@@ -176,10 +177,10 @@ public class TestHiveMetaStoreChecker extends TestCase {
result = new CheckResult();
checker.checkMetastore(dbName, null, null, result);
assertEquals(1, result.getTablesNotInMs().size());
- assertEquals(fakeTable.getName(), result.getTablesNotInMs().get(0));
- assertEquals(Collections.<String>emptyList(), result.getTablesNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotInMs());
+ assertEquals(fakeTable.getName(), Lists.newArrayList(result.getTablesNotInMs()).get(0));
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs());
// create a new external table
hive.dropTable(dbName, tableName);
@@ -189,10 +190,10 @@ public class TestHiveMetaStoreChecker extends TestCase {
// should return all ok
result = new CheckResult();
checker.checkMetastore(dbName, null, null, result);
- assertEquals(Collections.<String>emptyList(), result.getTablesNotInMs());
- assertEquals(Collections.<String>emptyList(), result.getTablesNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs());
}
public void testPartitionsCheck() throws HiveException, MetaException,
@@ -218,13 +219,26 @@ public class TestHiveMetaStoreChecker extends TestCase {
CheckResult result = new CheckResult();
checker.checkMetastore(dbName, tableName, null, result);
// all is well
- assertEquals(Collections.<String>emptyList(), result.getTablesNotInMs());
- assertEquals(Collections.<String>emptyList(), result.getTablesNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs());
List<Partition> partitions = hive.getPartitions(table);
assertEquals(2, partitions.size());
+ // add a fake partition dir on fs to ensure that it does not get added
+ fs = partitions.get(0).getDataLocation().getFileSystem(hive.getConf());
+ Path fakePart = new Path(table.getDataLocation().toString(),
+ "fakedate=2009-01-01/fakecity=sanjose");
+ fs.mkdirs(fakePart);
+ fs.deleteOnExit(fakePart);
+ checker.checkMetastore(dbName, tableName, null, result);
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
+ assertEquals(0, result.getPartitionsNotOnFs().size());
+ assertEquals(0, result.getPartitionsNotInMs().size());
+ assertEquals(2, partitions.size()); //no additional partitions got added
+
Partition partToRemove = partitions.get(0);
// As this partition (partdate=2008-01-01/partcity=london) is the only
// partition under (partdate=2008-01-01)
@@ -236,27 +250,24 @@ public class TestHiveMetaStoreChecker extends TestCase {
result = new CheckResult();
checker.checkMetastore(dbName, tableName, null, result);
// missing one partition on fs
- assertEquals(Collections.<String>emptyList(), result.getTablesNotInMs());
- assertEquals(Collections.<String>emptyList(), result.getTablesNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
assertEquals(1, result.getPartitionsNotOnFs().size());
- assertEquals(partToRemove.getName(), result.getPartitionsNotOnFs().get(0)
+ assertEquals(partToRemove.getName(), result.getPartitionsNotOnFs().iterator().next()
.getPartitionName());
- assertEquals(partToRemove.getTable().getTableName(), result
- .getPartitionsNotOnFs().get(0).getTableName());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotInMs());
+ assertEquals(partToRemove.getTable().getTableName(),
+ result.getPartitionsNotOnFs().iterator().next().getTableName());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs());
List<Map<String, String>> partsCopy = new ArrayList<Map<String, String>>();
partsCopy.add(partitions.get(1).getSpec());
// check only the partition that exists, all should be well
result = new CheckResult();
checker.checkMetastore(dbName, tableName, partsCopy, result);
- assertEquals(Collections.<String>emptyList(), result.getTablesNotInMs());
- assertEquals(Collections.<String>emptyList(), result.getTablesNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotInMs());
-
- // put the other one back
- fs.mkdirs(partToRemovePath);
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs());
// old test is moved to msck_repair_2.q
@@ -265,12 +276,11 @@ public class TestHiveMetaStoreChecker extends TestCase {
hive.createTable(table);
result = new CheckResult();
checker.checkMetastore(dbName, null, null, result);
- assertEquals(Collections.<String>emptyList(), result.getTablesNotInMs());
- assertEquals(Collections.<String>emptyList(), result.getTablesNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotOnFs());
- assertEquals(Collections.<String>emptyList(), result.getPartitionsNotInMs()); //--0e
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotInMs());
+ assertEquals(Collections.<String>emptySet(), result.getTablesNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotOnFs());
+ assertEquals(Collections.<String>emptySet(), result.getPartitionsNotInMs()); //--0e
System.err.println("Test completed - partition check");
-
}
public void testDataDeletion() throws HiveException, MetaException,