You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/11/22 02:46:38 UTC

[29/35] hive git commit: HIVE-15072: Schematool should recognize missing tables in metastore (Naveen Gangam via Chaoyu Tang)

HIVE-15072: Schematool should recognize missing tables in metastore (Naveen Gangam via Chaoyu Tang)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/929ebbaa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/929ebbaa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/929ebbaa

Branch: refs/heads/hive-14535
Commit: 929ebbaa095a807bfdf3169e4f57dac7b98c2191
Parents: d2cb327
Author: Chaoyu Tang <ct...@cloudera.com>
Authored: Sun Nov 20 20:20:37 2016 -0500
Committer: Chaoyu Tang <ct...@cloudera.com>
Committed: Sun Nov 20 20:20:37 2016 -0500

----------------------------------------------------------------------
 .../org/apache/hive/beeline/HiveSchemaTool.java | 110 +++++++++++++++++++
 .../org/apache/hive/beeline/TestSchemaTool.java |  40 ++++++-
 2 files changed, 149 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/929ebbaa/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
----------------------------------------------------------------------
diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
index d82c224..3402470 100644
--- a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
+++ b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
@@ -49,12 +49,16 @@ import java.io.FileWriter;
 import java.io.IOException;
 import java.io.PrintStream;
 import java.sql.Connection;
+import java.sql.DatabaseMetaData;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Statement;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.List;
 import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 public class HiveSchemaTool {
   private String userName = null;
@@ -300,6 +304,7 @@ public class HiveSchemaTool {
   public void doValidate() throws HiveMetaException {
     System.out.print("Starting metastore validation");
     validateSequences();
+    validateSchemaTables();
 
     System.out.print("Done with metastore validation");
   }
@@ -365,6 +370,111 @@ public class HiveSchemaTool {
     }
   }
 
+  boolean validateSchemaTables() throws HiveMetaException {
+    ResultSet rs              = null;
+    DatabaseMetaData metadata = null;
+    List<String> dbTables     = new ArrayList<String>();
+    List<String> schemaTables = new ArrayList<String>();
+    List<String> subScripts   = new ArrayList<String>();
+    Connection hmsConn        = getConnectionToMetastore(false);
+    String version            = getMetaStoreSchemaVersion(hmsConn);
+    hmsConn                   = getConnectionToMetastore(false);
+
+    System.out.println("Validating tables in the schema for version " + version);
+    try {
+      metadata       = hmsConn.getMetaData();
+      String[] types = {"TABLE"};
+      rs             = metadata.getTables(null, null, "%", types);
+      String table   = null;
+
+      while (rs.next()) {
+        table = rs.getString("TABLE_NAME");
+        dbTables.add(table.toLowerCase());
+        LOG.debug("Found table " + table + " in HMS dbstore");
+      }
+    } catch (SQLException e) {
+      throw new HiveMetaException(e);
+    } finally {
+      if (rs != null) {
+        try {
+          rs.close();
+        } catch (SQLException e) {
+          throw new HiveMetaException("Failed to close resultset", e);
+        }
+      }
+
+      if (hmsConn != null) {
+        try {
+          hmsConn.close();
+        } catch (SQLException e) {
+          throw new HiveMetaException("Failed to close metastore connection", e);
+        }
+      }
+    }
+
+    // parse the schema file to determine the tables that are expected to exist
+    // we are using oracle schema because it is simpler to parse, no quotes or backticks etc
+    String baseDir    = new File(metaStoreSchemaInfo.getMetaStoreScriptDir()).getParent();
+    String schemaFile = baseDir + "/oracle/hive-schema-" + version + ".oracle.sql";
+
+    try {
+      LOG.info("Parsing schema script " + schemaFile);
+      subScripts.addAll(findCreateTable(schemaFile, schemaTables));
+      while (subScripts.size() > 0) {
+        schemaFile = baseDir + "/oracle/" + subScripts.remove(0);
+        LOG.info("Parsing subscript " + schemaFile);
+        subScripts.addAll(findCreateTable(schemaFile, schemaTables));
+      }
+    } catch (Exception e) {
+      return false;
+    }
+
+    System.out.println("Expected (from schema definition) " + schemaTables.size() +
+        " tables, Found (from HMS metastore) " + dbTables.size() + " tables");
+
+    // now diff the lists
+    schemaTables.removeAll(dbTables);
+    if (schemaTables.size() > 0) {
+      System.out.println(schemaTables.size() + " tables [ " + Arrays.toString(schemaTables.toArray())
+          + " ] are missing from the database schema.");
+      return false;
+    } else {
+      System.out.println("Schema table validation successful");
+      return true;
+    }
+  }
+
+  private List<String> findCreateTable(String path, List<String> tableList) {
+    Matcher matcher                       = null;
+    String line                           = null;
+    List<String> subs                     = new ArrayList<String>();
+    final String NESTED_SCRIPT_IDENTIFIER = "@";
+    Pattern regexp                        = Pattern.compile("(CREATE TABLE(IF NOT EXISTS)*) (\\S+).*");
+
+    try (
+      BufferedReader reader = new BufferedReader(new FileReader(path));
+    ){
+      while ((line = reader.readLine()) != null) {
+        if (line.startsWith(NESTED_SCRIPT_IDENTIFIER)) {
+          int endIndex = (line.indexOf(";") > -1 ) ? line.indexOf(";") : line.length();
+          // remove the trailing SEMI-COLON if any
+          subs.add(line.substring(NESTED_SCRIPT_IDENTIFIER.length(), endIndex));
+          continue;
+        }
+        matcher = regexp.matcher(line);
+        if (matcher.find()) {
+          String table = matcher.group(3);
+          tableList.add(table.toLowerCase());
+          LOG.debug("Found table " + table + " in the schema");
+        }
+      }
+    } catch (IOException ex){
+      ex.printStackTrace();
+    }
+
+    return subs;
+  }
+
   /**
    *  Run pre-upgrade scripts corresponding to a given upgrade script,
    *  if any exist. The errors from pre-upgrade are ignored.

http://git-wip-us.apache.org/repos/asf/hive/blob/929ebbaa/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
index 8aa4173..2209c83 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
@@ -104,6 +104,44 @@ public class TestSchemaTool extends TestCase {
   }
 
   /**
+   * Test to validate that all tables exist in the HMS metastore.
+   * @throws Exception
+   */
+  public void testValidateSchemaTables() throws Exception {
+    schemaTool.doInit("2.0.0");
+
+    boolean isValid = (boolean)schemaTool.validateSchemaTables();
+    assertTrue(isValid);
+
+    // upgrade to 2.2.0 schema and re-validate
+    schemaTool.doUpgrade("2.2.0");
+    isValid = (boolean)schemaTool.validateSchemaTables();
+    assertTrue(isValid);
+
+    // Simulate a missing table scenario by renaming a couple of tables
+    String[] scripts = new String[] {
+        "RENAME TABLE SEQUENCE_TABLE to SEQUENCE_TABLE_RENAMED",
+        "RENAME TABLE NUCLEUS_TABLES to NUCLEUS_TABLES_RENAMED"
+    };
+
+    File scriptFile = generateTestScript(scripts);
+    schemaTool.runBeeLine(scriptFile.getPath());
+    isValid = schemaTool.validateSchemaTables();
+    assertFalse(isValid);
+
+    // Restored the renamed tables
+    scripts = new String[] {
+        "RENAME TABLE SEQUENCE_TABLE_RENAMED to SEQUENCE_TABLE",
+        "RENAME TABLE NUCLEUS_TABLES_RENAMED to NUCLEUS_TABLES"
+    };
+
+    scriptFile = generateTestScript(scripts);
+    schemaTool.runBeeLine(scriptFile.getPath());
+    isValid = schemaTool.validateSchemaTables();
+    assertTrue(isValid);
+   }
+
+  /**
    * Test dryrun of schema initialization
    * @throws Exception
    */
@@ -520,4 +558,4 @@ public class TestSchemaTool extends TestCase {
     out.close();
     return preUpgradeScript;
   }
-}
\ No newline at end of file
+}