You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by an...@apache.org on 2021/01/05 11:09:00 UTC

[hive] branch master updated: HIVE-24526: Get grouped locations of external table data using metatool. (Arko Sharma, reviewed by Pravin Kumar Sinha )

This is an automated email from the ASF dual-hosted git repository.

anishek pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new d5ea2f3  HIVE-24526: Get grouped locations of external table data using metatool. (Arko Sharma, reviewed by Pravin Kumar Sinha )
d5ea2f3 is described below

commit d5ea2f3bb81cd992ce2cf6ad1da23fc4db67c471
Author: Anishek Agarwal <an...@gmail.com>
AuthorDate: Tue Jan 5 16:38:42 2021 +0530

    HIVE-24526: Get grouped locations of external table data using metatool. (Arko Sharma, reviewed by Pravin Kumar Sinha )
---
 .../metastore/tools/metatool/TestHiveMetaTool.java | 312 +++++++++-
 .../metastore/tools/metatool/HiveMetaTool.java     |   4 +
 .../tools/metatool/HiveMetaToolCommandLine.java    |  59 +-
 .../tools/metatool/MetaToolTaskDiffExtTblLocs.java | 161 +++++
 .../tools/metatool/MetaToolTaskListExtTblLocs.java | 668 +++++++++++++++++++++
 .../metatool/TestHiveMetaToolCommandLine.java      |  30 +-
 .../metatool/TestMetaToolTaskListExtTblLocs.java   | 291 +++++++++
 7 files changed, 1518 insertions(+), 7 deletions(-)

diff --git a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaTool.java b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaTool.java
index 81b7ff0..22e3fe0 100644
--- a/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaTool.java
+++ b/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaTool.java
@@ -19,15 +19,24 @@
 package org.apache.hadoop.hive.metastore.tools.metatool;
 
 import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
 import java.io.OutputStream;
 import java.io.PrintStream;
+import java.nio.file.Files;
+import java.nio.file.Paths;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
+import java.util.Set;
 
 
-
+import org.json.JSONObject;
+import org.json.JSONArray;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient;
 import org.apache.hadoop.hive.metastore.api.Database;
@@ -35,12 +44,25 @@ import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.SerDeInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.utils.TestTxnDbUtil;
+import org.apache.hadoop.hive.metastore.txn.TxnStore;
+import org.apache.hadoop.hive.metastore.txn.TxnUtils;
+import org.apache.commons.io.FileUtils;
+import org.apache.hadoop.fs.FileUtil;
+import org.apache.hadoop.hive.ql.io.HiveInputFormat;
+import org.apache.hadoop.hive.ql.processors.CommandProcessorException;
+import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.hive.ql.QueryState;
+import org.apache.hadoop.hive.ql.Driver;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.avro.AvroSerdeUtils.AvroTableProperties;
 import org.apache.hadoop.util.StringUtils;
 import org.apache.thrift.TException;
+import org.junit.Assert;
 import static org.junit.Assert.assertTrue;
 import static org.junit.Assert.assertEquals;
+import com.google.gson.JsonParser;
+import org.json.JSONObject;
 import org.junit.Before;
 import org.junit.After;
 import org.junit.Test;
@@ -57,7 +79,12 @@ public class TestHiveMetaTool {
 
   private HiveMetaStoreClient client;
   private OutputStream os;
-
+  protected Driver d;
+  protected TxnStore txnHandler;
+  private static HiveConf hiveConf;
+  private static final String TEST_DATA_DIR = new File(System.getProperty("java.io.tmpdir") +
+          File.separator + TestHiveMetaTool.class.getCanonicalName() + "-" + System.currentTimeMillis()
+  ).getPath().replaceAll("\\\\", "/");
   @Before
   public void setUp() throws Exception {
 
@@ -66,19 +93,60 @@ public class TestHiveMetaTool {
       os = new ByteArrayOutputStream();
       System.setOut(new PrintStream(os));
 
-      HiveConf hiveConf = new HiveConf(HiveMetaTool.class);
+      hiveConf = new HiveConf(HiveMetaTool.class);
       client = new HiveMetaStoreClient(hiveConf);
 
       createDatabase();
       createTable();
 
       client.close();
+      Path workDir = new Path(System.getProperty("test.tmp.dir",
+              "target" + File.separator + "test" + File.separator + "tmp"));
+      hiveConf.set("mapred.local.dir", workDir + File.separator + this.getClass().getSimpleName()
+              + File.separator + "mapred" + File.separator + "local");
+      hiveConf.set("mapred.system.dir", workDir + File.separator + this.getClass().getSimpleName()
+              + File.separator + "mapred" + File.separator + "system");
+      hiveConf.set("mapreduce.jobtracker.staging.root.dir", workDir + File.separator + this.getClass().getSimpleName()
+              + File.separator + "mapred" + File.separator + "staging");
+      hiveConf.set("mapred.temp.dir", workDir + File.separator + this.getClass().getSimpleName()
+              + File.separator + "mapred" + File.separator + "temp");
+      hiveConf.set(HiveConf.ConfVars.PREEXECHOOKS.varname, "");
+      hiveConf.set(HiveConf.ConfVars.POSTEXECHOOKS.varname, "");
+      hiveConf.set(HiveConf.ConfVars.METASTOREWAREHOUSE.varname, getWarehouseDir());
+      hiveConf.setVar(HiveConf.ConfVars.HIVEINPUTFORMAT, HiveInputFormat.class.getName());
+      hiveConf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
+                      "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
+      hiveConf.setBoolVar(HiveConf.ConfVars.MERGE_CARDINALITY_VIOLATION_CHECK, true);
+      HiveConf.setBoolVar(hiveConf, HiveConf.ConfVars.MERGE_SPLIT_UPDATE, true);
+      hiveConf.setBoolVar(HiveConf.ConfVars.HIVESTATSCOLAUTOGATHER, false);
+      hiveConf.setBoolean("mapred.input.dir.recursive", true);
+      TestTxnDbUtil.setConfValues(hiveConf);
+      txnHandler = TxnUtils.getTxnStore(hiveConf);
+      TestTxnDbUtil.prepDb(hiveConf);
+      File f = new File(getWarehouseDir());
+      if (f.exists()) {
+        FileUtil.fullyDelete(f);
+      }
+      if (!(new File(getWarehouseDir()).mkdirs())) {
+        throw new RuntimeException("Could not create " + getWarehouseDir());
+      }
+      SessionState ss = SessionState.start(hiveConf);
+      ss.applyAuthorizationPolicy();
+      d = new Driver(new QueryState.Builder().withHiveConf(hiveConf).nonIsolated().build());
+      d.setMaxRows(10000);
     } catch (Exception e) {
       System.err.println("Unable to setup the hive metatool test");
       System.err.println(StringUtils.stringifyException(e));
       throw new Exception(e);
     }
   }
+  protected String getWarehouseDir() {
+    return getTestDataDir() + "/warehouse";
+  }
+
+  private String getTestDataDir() {
+    return TEST_DATA_DIR;
+  }
 
   private void createDatabase() throws Exception {
     if (client.getAllDatabases().contains(DB_NAME)) {
@@ -142,17 +210,255 @@ public class TestHiveMetaTool {
     checkAvroSchemaURLProps(AVRO_URI);
   }
 
+  /*
+   * Tests -listExtTblLocs option on various input combinations.
+   */
+  @Test
+  public void testListExtTblLocs() throws Exception {
+    String extTblLocation = getTestDataDir() + "/ext";
+    String outLocation = getTestDataDir() + "/extTblOutput/";
+    Configuration conf = MetastoreConf.newMetastoreConf();
+    MetastoreConf.setVar(conf, MetastoreConf.ConfVars.WAREHOUSE_EXTERNAL, getWarehouseDir());
+    MetaToolTaskListExtTblLocs.msConf = conf;
+
+    // Case 1 : Check default locations
+    // Inputs : db1, db2 in default locations, db3 in custom location
+    // Expected outputs: default locations for db1, db2 and custom location for db3 after aggregation
+    runStatementOnDriver("create database db1");
+    runStatementOnDriver("create database db2");
+    runStatementOnDriver("create database db3");
+    runStatementOnDriver("create external table db1.ext(a int) partitioned by (p int)");
+    runStatementOnDriver("create external table db2.ext(a int) partitioned by (p int)");
+    runStatementOnDriver("create external table db3.ext(a int) partitioned by (p int) " +
+            "location '" + getTestDataDir() + "/ext/tblLoc'");
+    runStatementOnDriver("alter table db3.ext add partition(p = 0) location '" + getTestDataDir() + "/part'" );
+    runStatementOnDriver("alter table db3.ext add partition(p = 1) location '" + getTestDataDir() + "/part'" );
+    JSONObject outJS = getListExtTblLocs("db*", outLocation);
+    //confirm default locations
+    Set<String> outLocationSet = outJS.keySet();
+    String expectedOutLoc1 = getAbsolutePath(getWarehouseDir() + "/db1.db");
+    Assert.assertTrue(outLocationSet.contains(expectedOutLoc1));
+    Assert.assertEquals(outLocationSet.size(), 4);
+    JSONArray outArr = outJS.getJSONArray(expectedOutLoc1);
+    Assert.assertEquals(outArr.length(), 1);
+    Assert.assertTrue(outArr.getString(0).equals("db1.ext"));
+    String expectedOutLoc2 = getAbsolutePath(getWarehouseDir() + "/db2.db");
+    Assert.assertTrue(outLocationSet.contains(expectedOutLoc2));
+    outArr = outJS.getJSONArray(expectedOutLoc2);
+    Assert.assertEquals(outArr.length(), 1);
+    Assert.assertTrue(outArr.getString(0).equals("db2.ext"));
+    String expectedOutLoc3 = getAbsolutePath(getTestDataDir() + "/part");
+    Assert.assertTrue(outLocationSet.contains(expectedOutLoc3));
+    outArr = outJS.getJSONArray(expectedOutLoc3);
+    Assert.assertEquals(outArr.length(), 2);
+    Assert.assertTrue(outArr.getString(0).equals("db3.ext.p=0"));
+    Assert.assertTrue(outArr.getString(1).equals("db3.ext.p=1"));
+    String expectedOutLoc4 = getAbsolutePath(getTestDataDir() + "/ext/tblLoc");
+    Assert.assertTrue(outLocationSet.contains(expectedOutLoc4));
+    outArr = outJS.getJSONArray(expectedOutLoc4);
+    Assert.assertEquals(outArr.length(), 1);
+    Assert.assertTrue(outArr.getString(0).equals("db3.ext p(0/2)"));
+
+
+    // Case 2 : Check with special chars in partition-names : including quotes, timestamp formats, spaces, backslash etc.
+    // Also checks count of partitions in tbl-location.
+    // inputs   (default database)
+    //          ../ext/t1 - table1 location containing 3/5 partitions
+    //          ../ext/t2 - table2 location containining 2/4 partitions
+    //          ../ext/dir1/dir2/dir3 - 2 partitions of table1, 1 partition of table2, table loc of table3 with 0 partitions.
+    //          ../ext    - partitions of table3
+    // expected output : [../ext/t1, ../ext/t2, ../ext/dir1/dir2/dir3/t1_parts (2 partitions), ../ext/dir1/dir2/dir3/t2_parts(2 partitions),
+    //                     .../ext/dir1/dir2/dir3/t3 (0 parittions), ../ext/t3_parts (1 partition) ]
+    //                   Doesn't contain default database location as there are no entities in default location in this case,
+    //                   all data is under some custom location (../ext)
+    runStatementOnDriver("drop table ext");
+    runStatementOnDriver("create external table ext(a int) partitioned by (p varchar(3)) " +
+            "location '" + getTestDataDir() + "/ext/t1'");
+    runStatementOnDriver("create external table ext2(a int) partitioned by (flt string, dbl string) " +
+            "location '" + getTestDataDir() + "/ext/t2'");
+    runStatementOnDriver("create external table ext3(a int) partitioned by (dt string, timeSt string) "
+            + "location '" + getTestDataDir() + "/ext/dir1/dir2/dir3/t3'");
+    runStatementOnDriver("alter table ext add partition(p = 'A')");
+    runStatementOnDriver("alter table ext add partition(p = 'B')");
+    runStatementOnDriver("alter table ext add partition(p = 'UK')" );
+    runStatementOnDriver("alter table ext2 add partition(flt = '0.0', dbl = '0')");
+    runStatementOnDriver("alter table ext2 add partition(flt = '0.1', dbl = '1.1')");
+    runStatementOnDriver("alter table ext3 add partition(dt = '2020-12-01', timeSt = '23:23:23') location '"
+            + getTestDataDir() + "/ext/t3_parts'" );
+    runStatementOnDriver("alter table ext3 add partition(dt = '2020-12-02', timeSt = '22:22:22') location '"
+            + getTestDataDir() + "/ext/t3_parts'" );
+    runStatementOnDriver("alter table ext3 add partition(dt = '2020-12-03', timeSt = '21:21:21.1234') location '"
+            + getTestDataDir() + "/ext/t3_parts'" );
+    runStatementOnDriver("alter table ext add partition(p = \'A\\\\\') location '"
+            + getTestDataDir() + "/ext/dir1/dir2/dir3/t1_parts'" );
+    runStatementOnDriver("alter table ext add partition(p = \' A\"\') location '"
+            + getTestDataDir() + "/ext/dir1/dir2/dir3/t1_parts'" );
+    runStatementOnDriver("alter table ext2 add partition(flt = '0.1', dbl='3.22') location '"
+            + getTestDataDir() + "/ext/dir1/dir2/dir3/t2_parts'");
+    runStatementOnDriver("alter table ext2 add partition(flt = '0.22', dbl = '2.22') location '"
+            + getTestDataDir() + "/ext/dir1/dir2/dir3/t2_parts'");
+
+
+    outJS = getListExtTblLocs("default", outLocation);
+    expectedOutLoc1 = getAbsolutePath(extTblLocation + "/t1");
+    expectedOutLoc2 = getAbsolutePath(extTblLocation + "/t2");
+    expectedOutLoc3 = getAbsolutePath(extTblLocation + "/dir1/dir2/dir3/t1_parts");
+    expectedOutLoc4 = getAbsolutePath(extTblLocation + "/dir1/dir2/dir3/t2_parts");
+    String expectedOutLoc5 = getAbsolutePath(extTblLocation + "/dir1/dir2/dir3/t3");
+    String expectedOutLoc6 = getAbsolutePath(extTblLocation + "/t3_parts");
+
+    outLocationSet = outJS.keySet();
+    Assert.assertEquals(outLocationSet.size(), 6);
+    Assert.assertTrue(outLocationSet.contains(expectedOutLoc1));
+    outArr = outJS.getJSONArray(expectedOutLoc1); //t1
+    Assert.assertEquals(outArr.length(), 1);
+    Assert.assertTrue(outArr.getString(0).equals("default.ext p(3/5)"));
+    Assert.assertTrue(outLocationSet.contains(expectedOutLoc2));
+    outArr = outJS.getJSONArray(expectedOutLoc2); //t2
+    Assert.assertEquals(outArr.length(), 1);
+    Assert.assertTrue(outArr.getString(0).equals("default.ext2 p(2/4)"));
+    Assert.assertTrue(outLocationSet.contains(expectedOutLoc3)); //t1_parts
+    outArr = outJS.getJSONArray(expectedOutLoc3);
+    Assert.assertEquals(outArr.length(), 2);
+    Assert.assertTrue(outArr.getString(0).equals("default.ext.p= A%22"));  //spaces, quotes
+    Assert.assertTrue(outArr.getString(1).equals("default.ext.p=A%5C")); //backslash
+    Assert.assertTrue(outLocationSet.contains(expectedOutLoc4)); //t2_parts
+    outArr = outJS.getJSONArray(expectedOutLoc4);
+    Assert.assertEquals(outArr.length(), 2);
+    Assert.assertTrue(outArr.getString(0).equals("default.ext2.flt=0.1/dbl=3.22")); //periods, slash
+    Assert.assertTrue(outArr.getString(1).equals("default.ext2.flt=0.22/dbl=2.22"));
+    Assert.assertTrue(outLocationSet.contains(expectedOutLoc5)); //t3
+    outArr = outJS.getJSONArray(expectedOutLoc5);
+    Assert.assertEquals(outArr.length(), 1);
+    Assert.assertTrue(outArr.getString(0).equals("default.ext3 p(0/3)"));
+    Assert.assertTrue(outLocationSet.contains(expectedOutLoc6)); //t3_parts
+    outArr = outJS.getJSONArray(expectedOutLoc6);
+    Assert.assertEquals(outArr.length(), 3);
+    Assert.assertTrue(outArr.getString(0).equals("default.ext3.dt=2020-12-01/timest=23%3A23%3A23")); //date, timestamp formats
+    Assert.assertTrue(outArr.getString(1).equals("default.ext3.dt=2020-12-02/timest=22%3A22%3A22"));
+    Assert.assertTrue(outArr.getString(2).equals("default.ext3.dt=2020-12-03/timest=21%3A21%3A21.1234"));
+  }
+
+  /*
+   * Tests -diffExtTblLocs option on various input combinations.
+   */
+  @Test
+  public void testDiffExtTblLocs() throws Exception {
+    String extTblLocation = getTestDataDir() + "/ext";
+    String outLocation = getTestDataDir() + "/extTblOutput";
+    Configuration conf = MetastoreConf.newMetastoreConf();
+    MetastoreConf.setVar(conf, MetastoreConf.ConfVars.WAREHOUSE_EXTERNAL, getWarehouseDir());
+    MetaToolTaskListExtTblLocs.msConf = conf;
+
+    //create first file using -listExtTblLocs
+    runStatementOnDriver("create database diffDb");
+    runStatementOnDriver("create external table diffDb.ext1(a int) partitioned by (p int)");
+    runStatementOnDriver("create external table diffDb.ext2(a int) partitioned by (p int)");
+    runStatementOnDriver("create external table diffDb.ext3(a int) partitioned by (p int) " +
+            "location '" + getTestDataDir() + "/ext/tblLoc'");
+    runStatementOnDriver("alter table diffDb.ext1 add partition(p = 0) location '" + getTestDataDir() + "/part'" );
+    runStatementOnDriver("alter table diffDb.ext1 add partition(p = 1) location '" + getTestDataDir() + "/part'" );
+    String outLocation1 = outLocation + "1";
+    getListExtTblLocs("diffDb", outLocation1);
+
+    //create second file using -listExtTblLocs after dropping a table, dropping a partition and adding a different partition
+    runStatementOnDriver("drop table diffDb.ext2");
+    runStatementOnDriver("alter table diffDb.ext1 drop partition(p = 0)" );
+    runStatementOnDriver("alter table diffDb.ext1 add partition(p = 3) location '" + getTestDataDir() + "/part'" );
+    String outLocation2 = outLocation + "2";
+    getListExtTblLocs("diffDb", outLocation2);
+
+    //run diff on the above two files
+    JSONObject outJS = getDiffExtTblLocs(outLocation1, outLocation2, outLocation);
+    Set<String> outLocationSet = outJS.keySet();
+    String defaultDbLoc = getAbsolutePath(getWarehouseDir() + "/diffdb.db");
+    Assert.assertEquals(outLocationSet.size(), 2);
+    Assert.assertTrue(outLocationSet.contains(defaultDbLoc));
+    JSONArray outArr = outJS.getJSONArray(defaultDbLoc);
+    Assert.assertEquals(outArr.length(), 1);
+    Assert.assertTrue(outArr.getString(0).equals("- diffdb.ext2")); // dropped ext2 from default location
+    String partLoc = getAbsolutePath(getTestDataDir() + "/part");
+    Assert.assertTrue(outLocationSet.contains(partLoc));
+    outArr = outJS.getJSONArray(partLoc);
+    Assert.assertEquals(outArr.length(), 2); //two entries - 1 for added partition and 1 for dropped partition
+    Assert.assertTrue(outArr.getString(0).equals("+ diffdb.ext1.p=3"));
+    Assert.assertTrue(outArr.getString(1).equals("- diffdb.ext1.p=0"));
+  }
+
+  private String getAbsolutePath(String extTblLocation) {
+    return "file:" + extTblLocation;
+  }
+
+  private JSONObject getListExtTblLocs(String dbName, String outLocation) throws IOException {
+    File f = new File(outLocation);
+    if (f.exists()) {
+      FileUtil.fullyDelete(f);
+    }
+    if (!(new File(outLocation).mkdirs())) {
+      throw new RuntimeException("Could not create " + outLocation);
+    }
+    HiveMetaTool.main(new String[] {"-listExtTblLocs", dbName, outLocation});
+    for (File outFile : f.listFiles()) {
+      String contents = new String(Files.readAllBytes(Paths.get(outFile.getAbsolutePath())));
+      return new JSONObject(contents);
+    }
+    return null;
+  }
+
+  private JSONObject getDiffExtTblLocs(String fileLoc1, String fileLoc2, String outLocation) throws IOException {
+    File f = new File(outLocation);
+    if (f.exists()) {
+      FileUtil.fullyDelete(f);
+    }
+    if (!(new File(outLocation).mkdirs())) {
+      throw new RuntimeException("Could not create " + outLocation);
+    }
+    File f1 = new File(fileLoc1);
+    File f2 = new File(fileLoc2);
+    for (File outFile1 : f1.listFiles()) {
+      for (File outFile2 : f2.listFiles()) {
+        HiveMetaTool.main(new String[] {"-diffExtTblLocs", outFile1.getAbsolutePath(), outFile2.getAbsolutePath(), outLocation});
+        for(File outFile : f.listFiles()) {
+          String contents = new String(Files.readAllBytes(Paths.get(outFile.getAbsolutePath())));
+          return new JSONObject(contents);
+        }
+      }
+    }
+    return null;
+  }
+
   private void checkAvroSchemaURLProps(String expectedUri) throws TException {
     Table table = client.getTable(DB_NAME, TABLE_NAME);
     assertEquals(expectedUri, table.getParameters().get(AvroTableProperties.SCHEMA_URL.getPropName()));
     assertEquals(expectedUri, table.getSd().getParameters().get(AvroTableProperties.SCHEMA_URL.getPropName()));
   }
 
+  protected List<String> runStatementOnDriver(String stmt) throws Exception {
+    try {
+      d.run(stmt);
+    } catch (CommandProcessorException e) {
+      throw new RuntimeException(stmt + " failed: " + e);
+    }
+    List<String> rs = new ArrayList<>();
+    d.getResults(rs);
+    return rs;
+  }
+
   @After
   public void tearDown() throws Exception {
     try {
       client.dropTable(DB_NAME, TABLE_NAME);
       client.dropDatabase(DB_NAME);
+      try {
+        if (d != null) {
+          d.close();
+          d.destroy();
+          d = null;
+        }
+      } finally {
+        TestTxnDbUtil.cleanDb(hiveConf);
+        FileUtils.deleteDirectory(new File(getTestDataDir()));
+      }
 
       client.close();
     } catch (Throwable e) {
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaTool.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaTool.java
index 760d78d..913146e 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaTool.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaTool.java
@@ -50,6 +50,10 @@ public final class HiveMetaTool {
         task = new MetaToolTaskExecuteJDOQLQuery();
       } else if (cl.isUpdateLocation()) {
         task = new MetaToolTaskUpdateLocation();
+      } else if (cl.isListExtTblLocs()) {
+        task = new MetaToolTaskListExtTblLocs();
+      } else if (cl.isDiffExtTblLocs()) {
+        task = new MetaToolTaskDiffExtTblLocs();
       } else {
         throw new IllegalArgumentException("No task was specified!");
       }
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaToolCommandLine.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaToolCommandLine.java
index 1223f0d..ce43a8c 100644
--- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaToolCommandLine.java
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/HiveMetaToolCommandLine.java
@@ -58,6 +58,25 @@ class HiveMetaToolCommandLine {
       .create("updateLocation");
 
   @SuppressWarnings("static-access")
+  private static final Option LIST_EXT_TBL_LOCS = OptionBuilder
+          .withArgName("dbName> " + " <output-loc")
+          .hasArgs(2)
+          .withDescription("Generates a file containing a list of directories which cover external table data locations " +
+                  "for the specified database. A database name or pattern must be specified, on which the tool will be run." +
+                  "The output is generated at the specified location."
+                  )
+          .create("listExtTblLocs");
+
+  @SuppressWarnings("static-access")
+  private static final Option DIFF_EXT_TBL_LOCS = OptionBuilder
+          .withArgName("file1> " + " <file2> " + "<output-loc")
+          .hasArgs(3)
+          .withDescription("Generates the difference between two output-files created using -listExtTblLocs option at the" +
+                  " specified location. Output contains locations(keys) unique to each input file. For keys common to both " +
+                  "input-files, those entities are listed which are deleted from the first file and introduced in the second."
+          )
+          .create("diffExtTblLocs");
+
   private static final Option DRY_RUN = OptionBuilder
       .withDescription("Perform a dry run of updateLocation changes.When run with the dryRun option updateLocation " +
           "changes are displayed but not persisted. dryRun is valid only with the updateLocation option.")
@@ -93,6 +112,8 @@ class HiveMetaToolCommandLine {
     OPTIONS.addOption(LIST_FS_ROOT);
     OPTIONS.addOption(EXECUTE_JDOQL);
     OPTIONS.addOption(UPDATE_LOCATION);
+    OPTIONS.addOption(LIST_EXT_TBL_LOCS);
+    OPTIONS.addOption(DIFF_EXT_TBL_LOCS);
     OPTIONS.addOption(DRY_RUN);
     OPTIONS.addOption(SERDE_PROP_KEY);
     OPTIONS.addOption(TABLE_PROP_KEY);
@@ -102,6 +123,8 @@ class HiveMetaToolCommandLine {
   private boolean listFSRoot;
   private String jdoqlQuery;
   private String[] updateLocationParams;
+  private String[] listExtTblLocsParams;
+  private String[] diffExtTblLocsParams;
   private boolean dryRun;
   private String serdePropKey;
   private String tablePropKey;
@@ -137,14 +160,18 @@ class HiveMetaToolCommandLine {
     listFSRoot = cl.hasOption(LIST_FS_ROOT.getOpt());
     jdoqlQuery = cl.getOptionValue(EXECUTE_JDOQL.getOpt());
     updateLocationParams = cl.getOptionValues(UPDATE_LOCATION.getOpt());
+    listExtTblLocsParams = cl.getOptionValues(LIST_EXT_TBL_LOCS.getOpt());
+    diffExtTblLocsParams = cl.getOptionValues(DIFF_EXT_TBL_LOCS.getOpt());
     dryRun = cl.hasOption(DRY_RUN.getOpt());
     serdePropKey = cl.getOptionValue(SERDE_PROP_KEY.getOpt());
     tablePropKey = cl.getOptionValue(TABLE_PROP_KEY.getOpt());
     help = cl.hasOption(HELP.getOpt());
 
-    int commandCount = (isListFSRoot() ? 1 : 0) + (isExecuteJDOQL() ? 1 : 0) + (isUpdateLocation() ? 1 : 0);
+    int commandCount = (isListFSRoot() ? 1 : 0) + (isExecuteJDOQL() ? 1 : 0) + (isUpdateLocation() ? 1 : 0) +
+          (isListExtTblLocs() ? 1 : 0) + (isDiffExtTblLocs() ? 1 : 0);
     if (commandCount != 1) {
-      throw new IllegalArgumentException("exectly one of -listFSRoot, -executeJDOQL, -updateLocation must be set");
+      throw new IllegalArgumentException("exactly one of -listFSRoot, -executeJDOQL, -updateLocation, " +
+              "-listExtTblLocs, -diffExtTblLocs must be set");
     }
 
     if (updateLocationParams != null && updateLocationParams.length != 2) {
@@ -152,6 +179,16 @@ class HiveMetaToolCommandLine {
           updateLocationParams.length + " arguments");
     }
 
+    if (listExtTblLocsParams != null && listExtTblLocsParams.length != 2) {
+      throw new IllegalArgumentException("HiveMetaTool:listExtTblLocs takes in 2 arguments but was passed " +
+              listExtTblLocsParams.length + " arguments");
+    }
+
+    if (diffExtTblLocsParams != null && diffExtTblLocsParams.length != 3) {
+      throw new IllegalArgumentException("HiveMetaTool:diffExtTblLocs takes in 3 arguments but was passed " +
+              diffExtTblLocsParams.length + " arguments");
+    }
+
     if ((dryRun || serdePropKey != null || tablePropKey != null) && !isUpdateLocation()) {
       throw new IllegalArgumentException("-dryRun, -serdePropKey, -tablePropKey may be used only for the " +
           "-updateLocation command");
@@ -176,6 +213,8 @@ class HiveMetaToolCommandLine {
         "\tlistFSRoot    : " + listFSRoot + "\n" +
         "\tjdoqlQuery    : " + jdoqlQuery + "\n" +
         "\tupdateLocation: " + Arrays.toString(updateLocationParams) + "\n" +
+        "\tlistExtTblLocs: " + Arrays.toString(listExtTblLocsParams) + "\n" +
+        "\tdiffExtTblLocs: " + Arrays.toString(diffExtTblLocsParams) + "\n" +
         "\tdryRun        : " + dryRun + "\n" +
         "\tserdePropKey  : " + serdePropKey + "\n" +
         "\ttablePropKey  : " + tablePropKey);
@@ -197,10 +236,26 @@ class HiveMetaToolCommandLine {
     return updateLocationParams != null;
   }
 
+  boolean isListExtTblLocs() {
+    return listExtTblLocsParams != null;
+  }
+
+  boolean isDiffExtTblLocs() {
+    return diffExtTblLocsParams != null;
+  }
+
   String[] getUpddateLocationParams() {
     return updateLocationParams;
   }
 
+  String[] getListExtTblLocsParams() {
+    return listExtTblLocsParams;
+  }
+
+  String[] getDiffExtTblLocsParams() {
+    return diffExtTblLocsParams;
+  }
+
   boolean isDryRun() {
     return dryRun;
   }
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/MetaToolTaskDiffExtTblLocs.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/MetaToolTaskDiffExtTblLocs.java
new file mode 100644
index 0000000..90b676d
--- /dev/null
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/MetaToolTaskDiffExtTblLocs.java
@@ -0,0 +1,161 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.tools.metatool;
+
+import org.codehaus.jettison.json.JSONArray;
+import org.codehaus.jettison.json.JSONException;
+import org.codehaus.jettison.json.JSONObject;
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class MetaToolTaskDiffExtTblLocs extends MetaToolTask {
+  @Override
+  void execute() {
+    String[] args = getCl().getDiffExtTblLocsParams();
+    try {
+      File file1 = new File(args[0]);
+      File file2 = new File(args[1]);
+      String ouputDir = args[2];
+      String outFileName = "diff_" + System.currentTimeMillis();
+      System.out.println("Writing diff to " + outFileName);
+      if (!file1.exists()) {
+        System.out.println("Input " + args[0] + " does not exist.");
+        return;
+      }
+      if (!file2.exists()) {
+        System.out.println("Input " + args[1] + " does not exist.");
+        return;
+      }
+      JSONObject jsonObject = getDiffJson(file1, file2);
+      FileWriter fw = new FileWriter(ouputDir + "/" + outFileName);
+      PrintWriter pw = new PrintWriter(fw);
+      pw.println(jsonObject.toString(4).replace("\\", ""));
+      pw.close();
+    } catch (Exception e) {
+      System.out.println("Generating diff failed: \n" + e.getMessage());
+    }
+  }
+
+  private JSONObject getDiffJson(File file1, File file2) throws IOException, JSONException {
+    JSONObject inJson1 = new JSONObject(new String(Files.readAllBytes(Paths.get(file1.getAbsolutePath()))));
+    JSONObject inJson2 = new JSONObject(new String(Files.readAllBytes(Paths.get(file2.getAbsolutePath()))));
+    Map<String, HashSet<String>> modifiedLocations = new HashMap<>();
+    Set<String> keySet1 = getKeySet(inJson1);
+    Set<String> keySet2 = getKeySet(inJson2);
+    Set<String> uniqueLocationsFile1 = getSetDifference(keySet1, keySet2);
+    Set<String> uniqueLocationsFile2 = getSetDifference(keySet2, keySet1);
+    for (String loc : keySet1) {
+      if (!uniqueLocationsFile1.contains(loc)) {
+        //common key, we need to compare the values
+        JSONArray valArr1 = inJson1.getJSONArray(loc);
+        JSONArray valArr2 = inJson2.getJSONArray(loc);
+        for (int i = 0; i < valArr1.length(); i++) {
+          String val1 = valArr1.getString(i);
+          boolean absentFromSecondKey = true;
+          for (int j = 0; j < valArr2.length(); j++) {
+            String val2 = valArr2.getString(j);
+            if (val1.equalsIgnoreCase(val2)) {
+              absentFromSecondKey = false;
+              break;
+            }
+          }
+          if (absentFromSecondKey) {
+            if (modifiedLocations.containsKey(loc)) {
+              modifiedLocations.get(loc).add(asDeleted(val1));
+            } else {
+              modifiedLocations.put(loc, new HashSet<>());
+              modifiedLocations.get(loc).add(asDeleted(val1));
+            }
+          }
+        }
+        for (int i = 0; i < valArr2.length(); i++) {
+          String val2 = valArr2.getString(i);
+          boolean absentFromFirstKey = true;
+          for (int j = 0; j < valArr1.length(); j++) {
+            String val1 = valArr1.getString(j);
+            if (val1.equalsIgnoreCase(val2)) {
+              absentFromFirstKey = false;
+              break;
+            }
+          }
+          if (absentFromFirstKey) {
+            if (modifiedLocations.containsKey(loc)) {
+              modifiedLocations.get(loc).add(asAdded(val2));
+            } else {
+              modifiedLocations.put(loc, new HashSet<>());
+              modifiedLocations.get(loc).add(asAdded(val2));
+            }
+          }
+        }
+      }
+    }
+    JSONObject jsonObject = new JSONObject();
+    if(!uniqueLocationsFile1.isEmpty() || !uniqueLocationsFile2.isEmpty()) {
+      jsonObject.put("Locations only in " + file1.getName(), uniqueLocationsFile1);
+      jsonObject.put("Locations only in " + file2.getName(), uniqueLocationsFile2);
+    }
+    for(String commonLoc : modifiedLocations.keySet()) {
+      List<String> modifiedEntries = new ArrayList<>();
+      for (String entry : modifiedLocations.get(commonLoc)) {
+        modifiedEntries.add(entry);
+      }
+      Collections.sort(modifiedEntries);
+      jsonObject.put(commonLoc, modifiedEntries);
+    }
+    return jsonObject;
+  }
+
+  private Set<String> getKeySet(JSONObject jsonObject) {
+    Iterator<String> keyIter = jsonObject.keys();
+    Set<String> keySet = new HashSet();
+    while (keyIter.hasNext()) {
+      keySet.add(keyIter.next());
+    }
+    return keySet;
+  }
+
+  private Set<String> getSetDifference(Set<String> keySet1, Set<String> keySet2) {
+    Set<String> diffSet = new HashSet();
+    for(String elem : keySet1) {
+      if(!keySet2.contains(elem)) {
+        diffSet.add(elem);
+      }
+    }
+    return diffSet;
+  }
+
+  private String asDeleted(String str) {
+    return "- " + str;
+  }
+
+  private String asAdded(String str) {
+    return "+ " + str;
+  }
+}
\ No newline at end of file
diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/MetaToolTaskListExtTblLocs.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/MetaToolTaskListExtTblLocs.java
new file mode 100644
index 0000000..f9d34ee
--- /dev/null
+++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/tools/metatool/MetaToolTaskListExtTblLocs.java
@@ -0,0 +1,668 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.tools.metatool;
+
+import com.google.common.annotations.VisibleForTesting;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.ContentSummary;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.hive.metastore.ObjectStore;
+import org.apache.hadoop.hive.metastore.TableType;
+import org.apache.hadoop.hive.metastore.Warehouse;
+import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils;
+import org.apache.thrift.TException;
+import org.codehaus.jettison.json.JSONException;
+import org.codehaus.jettison.json.JSONArray;
+import org.codehaus.jettison.json.JSONObject;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.FileNotFoundException;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.HashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.Set;
+import java.util.TreeSet;
+
+public class MetaToolTaskListExtTblLocs extends MetaToolTask {
+  private static final Logger LOG = LoggerFactory.getLogger(MetaToolTaskListExtTblLocs.class);
+  private static Configuration conf;
+  private final Map<String, HashSet<String>> coverageList = new HashMap<>(); //maps each output-location to the set of input-locations covered by it
+  private final Map<String, DataLocation> inputLocations = new HashMap<>(); //maps each input-location to a DataLocation object which specifies it's properties
+
+  @Override
+  void execute() {
+    String[] loc = getCl().getListExtTblLocsParams();
+    try{
+      generateExternalTableInfo(loc[0], loc[1]);
+    } catch (IOException | TException | JSONException e) {
+      System.out.println("Generating external table locations failed: \n" + e.getMessage());
+    }
+  }
+
+  private void generateExternalTableInfo(String dbPattern, String outputDir) throws TException, IOException,
+          JSONException {
+    ObjectStore objectStore = getObjectStore();
+    conf = msConf != null ? msConf : objectStore.getConf();
+    Warehouse wh = new Warehouse(conf);
+    String defaultCatalog = MetaStoreUtils.getDefaultCatalog(conf);
+    List<String> databases = objectStore.getDatabases(defaultCatalog, dbPattern);
+    System.out.println("Number of databases found for given pattern: " + databases.size());
+    //maintain the set of leaves of the tree as a sorted set
+    Set<String> leafLocations = new TreeSet<>();
+    for (String db : databases) {
+      List<String> tables = objectStore.getAllTables(defaultCatalog, db);
+      Path defaultDbExtPath = wh.getDefaultExternalDatabasePath(db);
+      String defaultDbExtLocation = defaultDbExtPath.toString();
+      boolean isDefaultPathEmpty = true;
+      for(String tblName : tables) {
+        Table t = objectStore.getTable(defaultCatalog, db, tblName);
+        if(TableType.EXTERNAL_TABLE.name().equalsIgnoreCase(t.getTableType())) {
+          String tblLocation = t.getSd().getLocation();
+          Path tblPath = new Path(tblLocation);
+          if(isPathWithinSubtree(tblPath, defaultDbExtPath)) {
+            if(isDefaultPathEmpty) {
+              isDefaultPathEmpty = false;
+              //default paths should always be included, so we add them as special leaves to the tree
+              addDefaultPath(defaultDbExtLocation, db);
+              leafLocations.add(defaultDbExtLocation);
+            }
+            HashSet<String> coveredByDefault = coverageList.get(defaultDbExtLocation);
+            coveredByDefault.add(tblLocation);
+          } else if (!isCovered(leafLocations, tblPath)) {
+            leafLocations.add(tblLocation);
+          }
+          DataLocation dataLocation = new DataLocation(db, tblName, 0, 0,
+                  null);
+          inputLocations.put(tblLocation, dataLocation);
+          dataLocation.setSizeExtTblData(getDataSize(tblPath, conf));
+          //retrieving partition locations outside table-location
+          Map<String, String> partitionLocations = objectStore.getPartitionLocations(defaultCatalog, db, tblName,
+                  tblLocation, -1);
+          dataLocation.setTotalPartitions(partitionLocations.size());
+          for (String partitionName : partitionLocations.keySet()) {
+            String partLocation = partitionLocations.get(partitionName);
+            //null value means partition is in table location, we do not add it to input in this case.
+            if(partLocation == null) {
+              dataLocation.incrementNumPartsInTblLoc();
+            }
+            else {
+              partLocation = partLocation + Path.SEPARATOR +
+                      Warehouse.makePartName(Warehouse.makeSpecFromName(partitionName), false);
+              Path partPath = new Path(partLocation);
+              long partDataSize = getDataSize(partPath, conf);
+              if (isPathWithinSubtree(partPath, defaultDbExtPath)) {
+                if (isDefaultPathEmpty) {
+                  isDefaultPathEmpty = false;
+                  addDefaultPath(defaultDbExtLocation, db);
+                  leafLocations.add(defaultDbExtLocation);
+                }
+                if (isPathWithinSubtree(partPath, tblPath)) {
+                  //even in non-null case, handle the corner case where location is set to table-location
+                  //In this case, partition would be covered by table location itself, so we need not add to input
+                  dataLocation.incrementNumPartsInTblLoc();
+                } else {
+                  DataLocation partObj = new DataLocation(db, tblName, 0, 0, partitionName);
+                  partObj.setSizeExtTblData(partDataSize);
+                  inputLocations.put(partLocation, partObj);
+                  coverageList.get(defaultDbExtLocation).add(partLocation);
+                }
+              } else {
+                if (isPathWithinSubtree(partPath, tblPath)) {
+                  dataLocation.incrementNumPartsInTblLoc();
+                } else {
+                  //only in this case, partition location is neither inside table nor in default location.
+                  //So we add it to the graph  as a separate leaf.
+                  DataLocation partObj = new DataLocation(db, tblName, 0, 0, partitionName);
+                  partObj.setSizeExtTblData(partDataSize);
+                  inputLocations.put(partLocation, partObj);
+                  if(!isCovered(leafLocations, partPath)) {
+                    leafLocations.add(partLocation);
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    if(!leafLocations.isEmpty()) {
+      removeNestedStructure(leafLocations);
+      createOutputList(leafLocations, outputDir, dbPattern);
+    }
+    else {
+      System.out.println("No external tables found to process.");
+    }
+  }
+
+  private void addDefaultPath(String defaultDbExtLocation, String dbName) {
+    coverageList.put(defaultDbExtLocation, new HashSet<>());
+    DataLocation defaultDatalocation = new DataLocation(dbName, null, 0, 0, null);
+    //mark default leaves to always be included in output-list
+    defaultDatalocation.setIncludeByDefault(true);
+    inputLocations.put(defaultDbExtLocation, defaultDatalocation);
+  }
+
+  private long getDataSize(Path location, Configuration conf) throws IOException {
+    if(location == null) {
+      return 0;
+    }
+    if(MetastoreConf.getBoolVar(conf, MetastoreConf.ConfVars.HIVE_IN_TEST)) {
+      return testDatasizes == null ? 0 : testDatasizes.containsKey(location.toString()) ?
+              testDatasizes.get(location.toString()) : 0;
+    }
+    FileSystem fs = location.getFileSystem(conf);
+    if (fs != null && fs.getUri().getScheme().equals("hdfs")) {
+      try {
+        ContentSummary cs = fs.getContentSummary(location);
+        return cs.getLength();
+      } catch (FileNotFoundException e) {
+        //no data yet in data location but we proceed since data may be added later.
+      }
+    }
+    return 0;
+  }
+
+  private boolean isPathWithinSubtree(Path path, Path subtree) {
+    int subtreeDepth = subtree.depth();
+    while(path != null){
+      if (subtreeDepth > path.depth()) {
+        return false;
+      }
+      if(subtree.equals(path)){
+        return true;
+      }
+      path = path.getParent();
+    }
+    return false;
+  }
+
+
+  /*
+   * Method to determine if an existing location covers the given location and record the coverage in output.
+   */
+  private boolean isCovered(Set<String> locations, Path path) {
+    Path originalPath = new Path(path.toString());
+    while(path != null){
+      if(locations.contains(path.toString())){
+        addCoverage(path, originalPath, true);
+        return true;
+      }
+      path = path.getParent();
+    }
+    return false;
+  }
+
+  /*
+   * Method to cover a child node using a parent.
+   * Removes the child and marks all nodes covered by the child as being covered by the parent.
+   */
+  private void addCoverage(Path parentPath, Path childPath, boolean addChild) {
+    String childLoc = childPath.toString();
+    String parentLoc = parentPath.toString();
+    //If the path to be covered should be included by default, then we do not cover it.
+    //This is because default paths should be individually listed, not covered under some parent.
+    if(inputLocations.containsKey(childLoc) && inputLocations.get(childLoc).shouldIncludeByDefault()) {
+      return;
+    }
+    HashSet<String> pathsUnderChild = coverageList.get(childLoc);
+    coverageList.remove(childLoc);
+    if(coverageList.get(parentLoc) == null) {
+      coverageList.put(parentLoc, new HashSet<>());
+    }
+    HashSet pathsUnderParent = coverageList.get(parentLoc);
+    if(addChild) {
+      pathsUnderParent.add(childPath.toString());
+    }
+    if(pathsUnderChild != null) {
+      pathsUnderParent.addAll(pathsUnderChild);
+    }
+  }
+
+  /*
+   * Transforms a collection so that no element is an ancestor of another.
+   */
+  private void removeNestedStructure(Set<String> locations) {
+    List<String> locationList = new ArrayList<>();
+    locationList.addAll(locations);
+    for(int i = 0; i < locationList.size(); i++) {
+      String currLoc = locationList.get(i);
+      Path currPath = new Path(currLoc);
+      for(int j = i + 1; j < locationList.size(); j++) {
+        String nextLoc = locationList.get(j);
+        Path nextPath = new Path (nextLoc);
+        if(isPathWithinSubtree(nextPath, currPath)) {
+          addCoverage(currPath, nextPath, true);
+          locations.remove(nextLoc);
+          i = j;
+        }
+        else {
+          i = j - 1;
+          break;
+        }
+      }
+    }
+  }
+
+  /*
+   * Method to write the output to the given location.
+   * We construct a tree out of external table - locations and use it to determine suitable directories covering all locations.
+   */
+  private void createOutputList(Set<String> locations, String outputDir, String dbPattern) throws IOException, JSONException {
+    ExternalTableGraphNode rootNode = constructTree(locations);
+    //Traverse through the tree in breadth-first manner and decide which nodes to include.
+    //For every node, either cover all leaves in its subtree using itself
+    // or delegate this duty to its child nodes.
+    Queue<ExternalTableGraphNode> queue = new LinkedList<>();
+    queue.add(rootNode);
+    while(!queue.isEmpty()){
+      ExternalTableGraphNode current = queue.remove();
+      if(current.isLeaf()) {
+        // in this case, the leaf needs to be added to the solution, i.e. marked as being covered.
+        // This was done during graph construction, so we continue.
+        continue;
+      }
+      int nonTrivialCoverage = 0;
+      List<ExternalTableGraphNode> childNodes = current.getChildNodes();
+      boolean processChildrenByDefault = false;
+      for(ExternalTableGraphNode child : childNodes) {
+        if (child.getNumLeavesCovered() > 1) {
+          nonTrivialCoverage += child.getNumLeavesCovered();
+        }
+        if (child.shouldIncludeByDefault()) {
+          processChildrenByDefault = true;
+          break;
+        }
+      }
+      boolean addCurrToSolution = false;
+      if(!processChildrenByDefault) {
+        addCurrToSolution = true;
+        if (!current.shouldIncludeByDefault()) {
+          //ensure that we do not have extra data in the current node for it to be included.
+          long currDataSize = getDataSize(new Path(current.getLocation()), conf);
+          int numLeavesCovered = current.getNumLeavesCovered();
+          //only add current node if it doesn't have extra data and non-trivial coverage is less than half.
+          //Also we do not add current node if there is just a single path(numLeavesCovered = 1); in this case we proceed to the leaf.
+          addCurrToSolution &= currDataSize == current.getChildDataSizes() &&
+                  ((nonTrivialCoverage < (numLeavesCovered + 1) / 2) && numLeavesCovered != 1);
+        }
+      }
+      if(processChildrenByDefault) {
+        queue.addAll(childNodes);
+      } else if (addCurrToSolution) {
+        addToSolution(current);
+      } else {
+        queue.addAll(childNodes);
+      }
+    }
+    String outFileName = "externalTableLocations_" + dbPattern + "_" + System.currentTimeMillis() + ".txt";
+    System.out.println("Writing output to " + outFileName);
+    FileWriter fw = new FileWriter(outputDir + "/" + outFileName);
+    PrintWriter pw = new PrintWriter(fw);
+    JSONObject jsonObject = new JSONObject();
+    for(String outputLocation : coverageList.keySet()) {
+      HashSet<String> coveredLocations = coverageList.get(outputLocation);
+      JSONArray outputEntities = listOutputEntities(coveredLocations);
+      jsonObject.put(outputLocation, outputEntities);
+    }
+    String result = jsonObject.toString(4).replace("\\","");
+    pw.println(result);
+    pw.close();
+  }
+
+  /*
+   * Returns a comma separated list of entities(tables or partition names) covered by to a location.
+   * Table-name followed by "*" indicates that all partitions are inside table location.
+   * Otherwise, we record the number of partitions covered by table location.
+   */
+  private JSONArray listOutputEntities(HashSet<String> locations) {
+    List<String> listEntities = new ArrayList<>();
+    for(String loc : locations) {
+      DataLocation data = inputLocations.get(loc);
+      String tblName = data.getTblName();
+      if(tblName == null) {
+        continue;
+      }
+      String out = data.getDbName() + "." + tblName;
+      String partName = data .getPartName();
+      if (partName == null) {
+        int numPartInTblLoc = data.getNumPartitionsInTblLoc();
+        int totPartitions = data.getTotalPartitions();
+        if (totPartitions > 0 && numPartInTblLoc == totPartitions) {
+          out = out + ".*";
+        }
+        else if (totPartitions > 0) {
+          out = out + " p(" + numPartInTblLoc + "/" + totPartitions + ")";
+        }
+      }
+      else {
+        out = out + "." + partName;
+      }
+      listEntities.add(out);
+    }
+    Collections.sort(listEntities);
+    return new JSONArray(listEntities);
+  }
+
+  private ExternalTableGraphNode constructTree(Set<String> locations) {
+    ExternalTableGraphNode rootNode = null;
+    Map<String, ExternalTableGraphNode> locationGraph = new HashMap<>();
+    // Every location is represented by a leaf in the tree.
+    // We traverse through the input locations and construct the tree.
+    for (String leaf : locations) {
+      ExternalTableGraphNode currNode = new ExternalTableGraphNode(leaf, new ArrayList<>(), true, 0);
+      if(inputLocations.containsKey(leaf)) {
+        if(inputLocations.get(leaf).shouldIncludeByDefault()) {
+          currNode.setIncludeByDefault(true);
+        }
+        currNode.setDataSize(inputLocations.get(leaf).getSizeExtTblData());
+      }
+      locationGraph.put(leaf, currNode);
+      //initialize coverage-lists of leaves
+      if (coverageList.get(leaf) == null) {
+        coverageList.put(leaf, new HashSet<>());
+      }
+      //mark the leaf as being covered by itself
+      HashSet currCoverage = coverageList.get(leaf);
+      currCoverage.add(leaf);
+      //set the number of leaves covered. Nested locations could have been covered earlier during preprocessing,
+      //so we set it to the size of it's coverage set.
+      currNode.setNumLeavesCovered(currCoverage.size());
+      Path parent = new Path(leaf).getParent();
+      ExternalTableGraphNode parNode;
+      //traverse upward to the root in order to construct the graph
+      while (parent != null) {
+        String parentLoc = parent.toString();
+        if (!locationGraph.containsKey(parentLoc)) {
+          //if parent doesn't exist in graph then create it
+          parNode = new ExternalTableGraphNode(parentLoc, new ArrayList<>(), false, 0);
+          locationGraph.put(parentLoc, parNode);
+        }
+        else {
+          parNode = locationGraph.get(parentLoc);
+          parNode.setIsLeaf(false);
+        }
+        if(currNode.getParent() == null) {
+          parNode.addChild(currNode);
+          currNode.setParent(parNode);
+        }
+        else {
+          break;
+        }
+        currNode = parNode;
+        parent = parent.getParent();
+      }
+      if (parent == null && rootNode == null) {
+        rootNode = currNode;
+        rootNode.setParent(rootNode);
+      }
+    }
+    rootNode.updateNumLeavesCovered();
+    rootNode.updateIncludeByDefault();
+    rootNode.updateDataSize();
+    return rootNode;
+  }
+
+  private void addToSolution(ExternalTableGraphNode node) {
+    //since this node is in the solution, all its children should be covered using this node.
+    if(!node.isLeaf()) {
+      addCoverageRecursive(node);
+    }
+  }
+
+  private void addCoverageRecursive(ExternalTableGraphNode node) {
+    for(ExternalTableGraphNode child : node.getChildNodes()) {
+      if(child.isLeaf()) {
+        addCoverage(new Path(node.getLocation()), new Path(child.getLocation()), true);
+      }
+      else {
+        addCoverageRecursive(child);
+        addCoverage(new Path(node.getLocation()), new Path(child.getLocation()), false);
+      }
+    }
+  }
+
+  @VisibleForTesting
+  static Configuration msConf = null;
+
+  @VisibleForTesting
+  Map<String, Long> testDatasizes = null;
+
+  @VisibleForTesting
+  public Map<String, HashSet<String>> runTest(Set<String> inputList, Map<String, Long> sizes)  {
+    try {
+      conf = msConf;
+      testDatasizes = sizes;
+      coverageList.clear();
+      removeNestedStructure(inputList);
+      createOutputList(inputList, "test", "test");
+    } catch (Exception e) {
+      LOG.error("MetaToolTask failed on ListExtTblLocs test: ", e);
+    }
+    return coverageList;
+  }
+
+  /*
+   * Class denoting every external table data location.
+   * Each location can be either a table location(in this case, partition-name is not set) or
+   * a partition location which is outside table location.
+   * If the location is a table location, we store additional data like how many partitions are there in the table 
+   * and how many of them are there in the table loc itself.
+   */
+  private class DataLocation {
+    private String dbName;
+    private String tblName;
+    private int numPartitionsInTblLoc;
+    private String partName;
+    private int totalPartitions;
+    // 'sizeExtTblData' stores the size of useful data in a directory.
+    // This can be compared with total directory-size to ascertain amount of extra data in it.
+    long sizeExtTblData;
+    boolean includeByDefault;
+
+    private DataLocation (String dbName, String tblName, int totalPartitions, int numPartitionsInTblLoc,
+                          String partName) {
+      this.dbName = dbName;
+      this.tblName = tblName;
+      this.totalPartitions = totalPartitions;
+      this.numPartitionsInTblLoc = numPartitionsInTblLoc;
+      this.partName = partName;
+      this.sizeExtTblData = 0;
+    }
+
+    private void incrementNumPartsInTblLoc() {
+      this.numPartitionsInTblLoc++;
+    }
+    
+    private String getPartName() {
+      return this.partName;
+    }
+
+    private String getDbName() {
+      return this.dbName;
+    }
+    
+    private String getTblName() {
+      return this.tblName;
+    }
+    
+    private int getNumPartitionsInTblLoc() {
+      return this.numPartitionsInTblLoc;
+    }
+
+    private int getTotalPartitions() {
+      return this.totalPartitions;
+    }
+    
+    private long getSizeExtTblData() {
+      return this.sizeExtTblData;
+    }
+
+    private boolean shouldIncludeByDefault() {
+      return this.includeByDefault;
+    }
+
+    private void setTotalPartitions(int totalPartitions) {
+      this.totalPartitions = totalPartitions;
+    }
+
+    private void setSizeExtTblData(long sizeExtTblData) {
+      this.sizeExtTblData = sizeExtTblData;
+    }
+
+    private void setIncludeByDefault(boolean includeByDefault) {
+      this.includeByDefault = includeByDefault;
+    }
+  }
+
+  private class ExternalTableGraphNode {
+    private String location;
+    private List<ExternalTableGraphNode> childNodes;
+    private ExternalTableGraphNode parent;
+    private boolean isLeaf;
+    private boolean includeByDefault;
+    private int numLeavesCovered;
+    private long dataSize;
+
+    private ExternalTableGraphNode(String location, List<ExternalTableGraphNode> childNodes, boolean isLeaf, long dataSize) {
+      this.location = location;
+      this.childNodes = childNodes;
+      this.isLeaf = isLeaf;
+      this.parent = null;
+      this.includeByDefault = false;
+      this.dataSize = dataSize;
+    }
+
+    private void addChild(ExternalTableGraphNode child) {
+      this.childNodes.add(child);
+    }
+    
+    private List<ExternalTableGraphNode> getChildNodes() {
+      return this.childNodes;
+    }
+
+    private boolean isLeaf() {
+      return this.isLeaf;
+    }
+
+    public void setIsLeaf(boolean isLeaf) {
+      this.isLeaf = isLeaf;
+    }
+
+    private void setNumLeavesCovered(int numLeavesCovered) {
+      this.numLeavesCovered = numLeavesCovered;
+    }
+
+    private int getNumLeavesCovered() {
+      return this.numLeavesCovered;
+    }
+
+    private String getLocation() {
+      return this.location;
+    }
+
+    private void setParent(ExternalTableGraphNode node) {
+      this.parent = node;
+    }
+
+    private ExternalTableGraphNode getParent() {
+      return this.parent;
+    }
+
+    private boolean shouldIncludeByDefault() {
+      return this.includeByDefault;
+    }
+
+    private void setIncludeByDefault(boolean includeByDefault) {
+      this.includeByDefault = includeByDefault;
+    }
+
+    private void setDataSize(long dataSize) {
+      this.dataSize = dataSize;
+    }
+
+    private long getDataSize() {
+      return this.dataSize;
+    }
+
+    private void updateNumLeavesCovered() {
+      if(this.isLeaf) {
+        return;
+      }
+      this.numLeavesCovered = 0;
+      for(ExternalTableGraphNode currChild : childNodes) {
+        currChild.updateNumLeavesCovered();
+        this.numLeavesCovered += currChild.getNumLeavesCovered();
+      }
+    }
+
+    /*
+     * Method to mark all the paths in the subtree rooted at current node which need to be included by default.
+     * If some leaf has this property, then we mark the path from root to that leaf.
+     */
+    private void updateIncludeByDefault() {
+      if(this.isLeaf) {
+        return;
+      }
+      for(ExternalTableGraphNode currChild : childNodes) {
+        currChild.updateIncludeByDefault();
+      }
+      for(ExternalTableGraphNode currChild : childNodes) {
+        if(currChild.shouldIncludeByDefault()) {
+          this.includeByDefault = true;
+          break;
+        }
+      }
+    }
+
+    /*
+     * Method to update the datasize of subtree rooted at a particular node recursively.
+     */
+    private void updateDataSize() {
+      if(this.isLeaf) {
+        return;
+      }
+      for(ExternalTableGraphNode currChild : childNodes) {
+        currChild.updateDataSize();
+      }
+      this.dataSize += this.getChildDataSizes();
+    }
+
+    /*
+     * Method to return sum of data-sizes of child nodes of a particular node
+     */
+    private long getChildDataSizes() {
+      long sumChildDataSizes = 0;
+      for(ExternalTableGraphNode currChild : childNodes) {
+        sumChildDataSizes += currChild.getDataSize();
+      }
+      return sumChildDataSizes;
+    }
+  }
+}
diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaToolCommandLine.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaToolCommandLine.java
index 9563bd6..ab090c9 100644
--- a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaToolCommandLine.java
+++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestHiveMetaToolCommandLine.java
@@ -44,6 +44,8 @@ public class TestHiveMetaToolCommandLine {
     assertNull(cl.getJDOQLQuery());
     assertFalse(cl.isUpdateLocation());
     assertNull(cl.getUpddateLocationParams());
+    assertFalse(cl.isListExtTblLocs());
+    assertNull(cl.getListExtTblLocsParams());
     assertFalse(cl.isDryRun());
     assertNull(cl.getSerdePropKey());
     assertNull(cl.getTablePropKey());
@@ -57,6 +59,8 @@ public class TestHiveMetaToolCommandLine {
     assertEquals("select a from b", cl.getJDOQLQuery());
     assertFalse(cl.isUpdateLocation());
     assertNull(cl.getUpddateLocationParams());
+    assertFalse(cl.isListExtTblLocs());
+    assertNull(cl.getListExtTblLocsParams());
     assertFalse(cl.isDryRun());
     assertNull(cl.getSerdePropKey());
     assertNull(cl.getTablePropKey());
@@ -73,6 +77,8 @@ public class TestHiveMetaToolCommandLine {
     assertTrue(cl.isUpdateLocation());
     assertEquals("hdfs://new.loc", cl.getUpddateLocationParams()[0]);
     assertEquals("hdfs://old.loc", cl.getUpddateLocationParams()[1]);
+    assertFalse(cl.isListExtTblLocs());
+    assertNull(cl.getListExtTblLocsParams());
     assertTrue(cl.isDryRun());
     assertEquals("abc", cl.getSerdePropKey());
     assertEquals("def", cl.getTablePropKey());
@@ -81,7 +87,7 @@ public class TestHiveMetaToolCommandLine {
   @Test
   public void testNoTask() throws ParseException {
     exception.expect(IllegalArgumentException.class);
-    exception.expectMessage("exectly one of -listFSRoot, -executeJDOQL, -updateLocation must be set");
+    exception.expectMessage("exactly one of -listFSRoot, -executeJDOQL, -updateLocation, -listExtTblLocs, -diffExtTblLocs must be set");
 
     new HiveMetaToolCommandLine(new String[] {});
   }
@@ -89,7 +95,7 @@ public class TestHiveMetaToolCommandLine {
   @Test
   public void testMultipleTask() throws ParseException {
     exception.expect(IllegalArgumentException.class);
-    exception.expectMessage("exectly one of -listFSRoot, -executeJDOQL, -updateLocation must be set");
+    exception.expectMessage("exactly one of -listFSRoot, -executeJDOQL, -updateLocation, -listExtTblLocs, -diffExtTblLocs must be set");
 
     new HiveMetaToolCommandLine(new String[] {"-listFSRoot", "-executeJDOQL", "select a from b"});
   }
@@ -103,6 +109,26 @@ public class TestHiveMetaToolCommandLine {
   }
 
   @Test
+  public void testListExtTblLocsOneArgument() throws ParseException {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("HiveMetaTool:listExtTblLocs takes in 2 arguments but was passed 1 arguments");
+
+    new HiveMetaToolCommandLine(new String[] {"-listExtTblLocs", "db1"});
+  }
+
+  @Test
+  public void testDiffExtTblLocsArgCount() throws ParseException {
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("HiveMetaTool:diffExtTblLocs takes in 3 arguments but was passed 1 arguments");
+    new HiveMetaToolCommandLine(new String[] {"-diffExtTblLocs", "file1"});
+
+    exception.expect(IllegalArgumentException.class);
+    exception.expectMessage("HiveMetaTool:diffExtTblLocs takes in 3 arguments but was passed 2 arguments");
+    new HiveMetaToolCommandLine(new String[] {"-diffExtTblLocs", "file1", "file2"});
+
+  }
+
+  @Test
   public void testDryRunNotAllowed() throws ParseException {
     exception.expect(IllegalArgumentException.class);
     exception.expectMessage("-dryRun, -serdePropKey, -tablePropKey may be used only for the -updateLocation command");
diff --git a/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestMetaToolTaskListExtTblLocs.java b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestMetaToolTaskListExtTblLocs.java
new file mode 100644
index 0000000..4eb3111
--- /dev/null
+++ b/standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/tools/metatool/TestMetaToolTaskListExtTblLocs.java
@@ -0,0 +1,291 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.metastore.tools.metatool;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest;
+import org.apache.hadoop.hive.metastore.conf.MetastoreConf;
+import org.junit.Assert;
+import org.junit.Test;
+import org.junit.experimental.categories.Category;
+
+import java.util.Set;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.TreeSet;
+
+
+/* Unit tests for MetaToolTaskListExtTblLocs. */
+@Category(MetastoreUnitTest.class)
+public class TestMetaToolTaskListExtTblLocs {
+
+  /*
+   * Test grouping of locations. No extra data assumed.
+   */
+  @Test
+  public void testGroupLocations() {
+    Set<String> inputLocations = new TreeSet<>();
+    Configuration conf = MetastoreConf.newMetastoreConf();
+    MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.HIVE_IN_TEST, true);
+    MetaToolTaskListExtTblLocs.msConf = conf;
+    MetaToolTaskListExtTblLocs task = new MetaToolTaskListExtTblLocs();
+
+    //Case 1: Multiple unpartitioned external tables, expected o/p: 1 location
+    inputLocations.add("/warehouse/customLocation/t1");
+    inputLocations.add("/warehouse/customLocation/t2");
+    inputLocations.add("/warehouse/customLocation/t3");
+    Map<String, HashSet<String>> output = task.runTest(inputLocations, null);
+    Assert.assertEquals(1, output.size());
+    String expectedOutput = "/warehouse/customLocation";
+    Assert.assertTrue(output.containsKey(expectedOutput));
+    HashSet<String> coveredLocs = output.get(expectedOutput);
+    Assert.assertEquals(3, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/t1"));
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/t2"));
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/t3"));
+
+    //Case 2 : inputs at multiple depths
+    // inputs   ../ext/b0 - contains 1 location
+    //          ../ext/p=0 - contains 1 location
+    //          ../ext/b1/b2/b3 - contains 3 locations (p1, p2, p3)
+    // expected output : [../ext/b1/b2/b3 containing 3 elements, t1, p0]
+    inputLocations.clear();
+    inputLocations.add("/warehouse/customLocation/ext/b0");
+    inputLocations.add("/warehouse/customLocation/ext/p=0");
+    inputLocations.add("/warehouse/customLocation/ext/b1/b2/b3/p=1");
+    inputLocations.add("/warehouse/customLocation/ext/b1/b2/b3/p=2");
+    inputLocations.add("/warehouse/customLocation/ext/b1/b2/b3/p=3");
+    output = task.runTest(inputLocations, null);
+    Assert.assertEquals(3, output.size());
+    String expectedOutput1 = "/warehouse/customLocation/ext/b0";
+    Assert.assertTrue(output.containsKey(expectedOutput1));
+    coveredLocs = output.get(expectedOutput1);
+    Assert.assertEquals(1, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b0"));
+    String expectedOutput2 = "/warehouse/customLocation/ext/p=0";
+    Assert.assertTrue(output.containsKey(expectedOutput2));
+    coveredLocs = output.get(expectedOutput2);
+    Assert.assertEquals(1, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/p=0"));
+    String expectedOutput3 = "/warehouse/customLocation/ext/b1/b2/b3";
+    Assert.assertTrue(output.containsKey(expectedOutput3));
+    coveredLocs = output.get(expectedOutput3);
+    Assert.assertEquals(3, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b1/b2/b3/p=1"));
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b1/b2/b3/p=2"));
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b1/b2/b3/p=3"));
+
+    //Case 3 : root with a lot of leaves 
+    // inputs   ../ext/ - contains 4 locations
+    //          ../ext/b1 - contains 3 locations
+    // expected output : [../ext covering all locations] since root (ext) has more than half of locations
+    inputLocations.clear();
+    inputLocations.add("/warehouse/customLocation/ext/p=0");
+    inputLocations.add("/warehouse/customLocation/ext/p=1");
+    inputLocations.add("/warehouse/customLocation/ext/p=2");
+    inputLocations.add("/warehouse/customLocation/ext/p=3");
+    inputLocations.add("/warehouse/customLocation/ext/b1/p=4");
+    inputLocations.add("/warehouse/customLocation/ext/b1/p=5");
+    inputLocations.add("/warehouse/customLocation/ext/b1/p=6");
+    output = task.runTest(inputLocations, null);
+    Assert.assertEquals(1, output.size());
+    expectedOutput = "/warehouse/customLocation/ext";
+    Assert.assertTrue(output.containsKey(expectedOutput));
+    coveredLocs = output.get(expectedOutput);
+    Assert.assertEquals(7, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.containsAll(inputLocations));
+
+    //Case 4 : root with a lot of trivial locations (non leaf)
+    // inputs   ../ext/ - contains 4 trivial locations
+    //          ../ext/b1 - contains 3 locations
+    // expected output : [../ext covering all locations] since non trivial (grouped) locations under ext is less than half 
+    inputLocations.clear();
+    inputLocations.add("/warehouse/customLocation/ext/dir01/dir02/p=0");
+    inputLocations.add("/warehouse/customLocation/ext/dir11/dir12/p=1");
+    inputLocations.add("/warehouse/customLocation/ext/dir21/dir22/p=2");
+    inputLocations.add("/warehouse/customLocation/ext/dir31/dir32/p=3");
+    inputLocations.add("/warehouse/customLocation/ext/b1/p=4");
+    inputLocations.add("/warehouse/customLocation/ext/b1/p=5");
+    inputLocations.add("/warehouse/customLocation/ext/b1/p=6");
+    output = task.runTest(inputLocations, null);
+    Assert.assertEquals(1, output.size());
+    expectedOutput = "/warehouse/customLocation/ext";
+    Assert.assertTrue(output.containsKey(expectedOutput));
+    coveredLocs = output.get(expectedOutput);
+    Assert.assertEquals(7, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.containsAll(inputLocations));
+
+    //Case 5 : several grouped locations and 1 outlier at root
+    // inputs   ../ext/b0 - contains 4 locations
+    //          ../ext/b1 - contains 3 locations
+    // expected output : [../ext/b0, ../ext/b1, p=7 ]
+    inputLocations.clear();
+    inputLocations.add("/warehouse/customLocation/ext/b0/p=0");
+    inputLocations.add("/warehouse/customLocation/ext/b0/p=1");
+    inputLocations.add("/warehouse/customLocation/ext/b0/p=2");
+    inputLocations.add("/warehouse/customLocation/ext/b0/p=3");
+    inputLocations.add("/warehouse/customLocation/ext/b1/p=4");
+    inputLocations.add("/warehouse/customLocation/ext/b1/p=5");
+    inputLocations.add("/warehouse/customLocation/ext/b1/p=6");
+    inputLocations.add("/warehouse/customLocation/ext/p=7");
+    output = task.runTest(inputLocations, null);
+    Assert.assertEquals(3, output.size());
+    expectedOutput1 = "/warehouse/customLocation/ext/b0";
+    Assert.assertTrue(output.containsKey(expectedOutput1));
+    coveredLocs = output.get(expectedOutput1);
+    Assert.assertEquals(4, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b0/p=0"));
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b0/p=1"));
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b0/p=2"));
+    expectedOutput2 = "/warehouse/customLocation/ext/b1";
+    Assert.assertTrue(output.containsKey(expectedOutput2));
+    coveredLocs = output.get(expectedOutput2);
+    Assert.assertEquals(3, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b1/p=4"));
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b1/p=5"));
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b1/p=6"));
+    expectedOutput3 = "/warehouse/customLocation/ext/p=7";
+    Assert.assertTrue(output.containsKey(expectedOutput3));
+    coveredLocs = output.get(expectedOutput3);
+    Assert.assertEquals(1, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/p=7"));
+
+    //Case 6 : inputs with nested structure
+    // inputs   ../ext/b0 - contains 4 locations
+    //          ../ext/b1 
+    //          ../ext/b1/b2 - contains 4 locations
+    // expected output : [../ext/b0, ../ext/b1 ] : (no extra location for b2 since covered by b1 itself)
+    inputLocations.clear();
+    inputLocations.add("/warehouse/customLocation/ext/b0/p=0");
+    inputLocations.add("/warehouse/customLocation/ext/b0/p=1");
+    inputLocations.add("/warehouse/customLocation/ext/b0/p=2");
+    inputLocations.add("/warehouse/customLocation/ext/b0/p=3");
+    inputLocations.add("/warehouse/customLocation/ext/b1");
+    inputLocations.add("/warehouse/customLocation/ext/b1/b2/p=7");
+    inputLocations.add("/warehouse/customLocation/ext/b1/b2/p=8");
+    inputLocations.add("/warehouse/customLocation/ext/b1/b2/p=9");
+    output = task.runTest(inputLocations, null);
+    Assert.assertEquals(2, output.size());
+    expectedOutput1 = "/warehouse/customLocation/ext/b0";
+    Assert.assertTrue(output.containsKey(expectedOutput1));
+    coveredLocs = output.get(expectedOutput1);
+    Assert.assertEquals(4, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b0/p=0"));
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b0/p=1"));
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b0/p=2"));
+    expectedOutput2 = "/warehouse/customLocation/ext/b1";
+    Assert.assertTrue(output.containsKey(expectedOutput2));
+    coveredLocs = output.get(expectedOutput2);
+    Assert.assertEquals(4, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b1"));
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b1/b2/p=7"));
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b1/b2/p=8"));
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b1/b2/p=9"));
+  }
+
+  @Test
+  public void testGroupLocationsDummyDataSizes() {
+    Set<String> inputLocations = new TreeSet<>();
+    Configuration conf = MetastoreConf.newMetastoreConf();
+    MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.HIVE_IN_TEST, true);
+    MetaToolTaskListExtTblLocs.msConf = conf;
+    MetaToolTaskListExtTblLocs task = new MetaToolTaskListExtTblLocs();
+
+    //Case 1: Multiple unpartitioned external tables, expected o/p without extra data: 1 location (tested in testGroupLocations#1)
+    //        But say there is some data at ../customLocation, then we list all the 3 paths
+    inputLocations.add("/warehouse/customLocation/t1");
+    inputLocations.add("/warehouse/customLocation/t2");
+    inputLocations.add("/warehouse/customLocation/t3");
+    Map<String, Long> dataSizes = new HashMap<>();
+    dataSizes.put("/warehouse/customLocation", Long.valueOf(100)); //Simulate 100 bytes extra data at customLocation
+    Map<String, HashSet<String>> output = task.runTest(inputLocations, dataSizes);
+    Assert.assertEquals(3, output.size());
+    String expectedOutput1 = "/warehouse/customLocation/t1";
+    Assert.assertTrue(output.containsKey(expectedOutput1));
+    HashSet<String> coveredLocs = output.get(expectedOutput1);
+    Assert.assertEquals(1, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/t1"));
+
+    //Case 2 : inputs at multiple depths
+    // inputs   ../ext/b0 - contains 1 location
+    //          ../ext/p=0 - contains 1 location
+    //          ../ext/b1/b2/b3 - contains 3 locations (p1, p2, p3)
+    // expected output without extra data  : [../ext/b1/b2/b3 containing 3 elements, t1, p0]  (tested in testGroupLocations#2)
+    // expected output with extra data at ../ext/b1/b2/b3 : [p1, p2, p3, t1, p0]
+    inputLocations.clear();
+    dataSizes.clear();
+    inputLocations.add("/warehouse/customLocation/ext/b0");
+    inputLocations.add("/warehouse/customLocation/ext/p=0");
+    inputLocations.add("/warehouse/customLocation/ext/b1/b2/b3/p=1");
+    inputLocations.add("/warehouse/customLocation/ext/b1/b2/b3/p=2");
+    inputLocations.add("/warehouse/customLocation/ext/b1/b2/b3/p=3");
+    dataSizes.put("/warehouse/customLocation/ext/b1/b2/b3", Long.valueOf(100));  // simulate 100 bytes of extra data at ../b3
+    output = task.runTest(inputLocations, dataSizes);
+    Assert.assertEquals(5, output.size());
+    expectedOutput1 = "/warehouse/customLocation/ext/b0";
+    Assert.assertTrue(output.containsKey(expectedOutput1));
+    coveredLocs = output.get(expectedOutput1);
+    Assert.assertEquals(1, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b0"));
+    String expectedOutput2 = "/warehouse/customLocation/ext/p=0";
+    Assert.assertTrue(output.containsKey(expectedOutput2));
+    coveredLocs = output.get(expectedOutput2);
+    Assert.assertEquals(1, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/p=0"));
+    String expectedOutput3 = "/warehouse/customLocation/ext/b1/b2/b3/p=1";
+    Assert.assertTrue(output.containsKey(expectedOutput3));
+    coveredLocs = output.get(expectedOutput3);
+    Assert.assertEquals(1, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b1/b2/b3/p=1"));
+    String expectedOutput4 = "/warehouse/customLocation/ext/b1/b2/b3/p=2";
+    Assert.assertTrue(output.containsKey(expectedOutput4));
+    coveredLocs = output.get(expectedOutput4);
+    Assert.assertEquals(1, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b1/b2/b3/p=2"));
+    String expectedOutput5 = "/warehouse/customLocation/ext/b1/b2/b3/p=3";
+    Assert.assertTrue(output.containsKey(expectedOutput5));
+    coveredLocs = output.get(expectedOutput5);
+    Assert.assertEquals(1, coveredLocs.size());
+    Assert.assertTrue(coveredLocs.contains("/warehouse/customLocation/ext/b1/b2/b3/p=3"));
+
+    //Case 3 : intermediate directory has extra data
+    // inputs   ../ext/ - contains 4 locations
+    //          ../ext/b1 - contains 3 locations
+    // expected output without extra data : [../ext covering all locations] (tested in testGroupLocations#3)
+    // We simulate extra data at ../ext/b1. So, expected output is the list of all locations.
+    inputLocations.clear();
+    dataSizes.clear();
+    inputLocations.add("/warehouse/customLocation/ext/p=0");
+    inputLocations.add("/warehouse/customLocation/ext/p=1");
+    inputLocations.add("/warehouse/customLocation/ext/p=2");
+    inputLocations.add("/warehouse/customLocation/ext/p=3");
+    inputLocations.add("/warehouse/customLocation/ext/b1/p=4");
+    inputLocations.add("/warehouse/customLocation/ext/b1/p=5");
+    inputLocations.add("/warehouse/customLocation/ext/b1/p=6");
+    dataSizes.put("/warehouse/customLocation/ext/b1", Long.valueOf(100));  // simulate 100 bytes of extra data at ..ext/b1
+    dataSizes.put("/warehouse/customLocation/ext", Long.valueOf(100));// since ext/b1 contains 100 bytes, ../ext also has 100 bytes
+    output = task.runTest(inputLocations, dataSizes);
+    Assert.assertEquals(7, output.size());
+    Assert.assertTrue(output.keySet().containsAll(inputLocations));
+    for(String outLoc : output.keySet()) {
+      Assert.assertTrue(output.get(outLoc).contains(outLoc));
+    }
+  }
+}
+