You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by se...@apache.org on 2016/11/22 02:46:38 UTC
[29/35] hive git commit: HIVE-15072: Schematool should recognize
missing tables in metastore (Naveen Gangam via Chaoyu Tang)
HIVE-15072: Schematool should recognize missing tables in metastore (Naveen Gangam via Chaoyu Tang)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/929ebbaa
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/929ebbaa
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/929ebbaa
Branch: refs/heads/hive-14535
Commit: 929ebbaa095a807bfdf3169e4f57dac7b98c2191
Parents: d2cb327
Author: Chaoyu Tang <ct...@cloudera.com>
Authored: Sun Nov 20 20:20:37 2016 -0500
Committer: Chaoyu Tang <ct...@cloudera.com>
Committed: Sun Nov 20 20:20:37 2016 -0500
----------------------------------------------------------------------
.../org/apache/hive/beeline/HiveSchemaTool.java | 110 +++++++++++++++++++
.../org/apache/hive/beeline/TestSchemaTool.java | 40 ++++++-
2 files changed, 149 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/929ebbaa/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
----------------------------------------------------------------------
diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
index d82c224..3402470 100644
--- a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
+++ b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
@@ -49,12 +49,16 @@ import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintStream;
import java.sql.Connection;
+import java.sql.DatabaseMetaData;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
public class HiveSchemaTool {
private String userName = null;
@@ -300,6 +304,7 @@ public class HiveSchemaTool {
public void doValidate() throws HiveMetaException {
System.out.print("Starting metastore validation");
validateSequences();
+ validateSchemaTables();
System.out.print("Done with metastore validation");
}
@@ -365,6 +370,111 @@ public class HiveSchemaTool {
}
}
+ boolean validateSchemaTables() throws HiveMetaException {
+ ResultSet rs = null;
+ DatabaseMetaData metadata = null;
+ List<String> dbTables = new ArrayList<String>();
+ List<String> schemaTables = new ArrayList<String>();
+ List<String> subScripts = new ArrayList<String>();
+ Connection hmsConn = getConnectionToMetastore(false);
+ String version = getMetaStoreSchemaVersion(hmsConn);
+ hmsConn = getConnectionToMetastore(false);
+
+ System.out.println("Validating tables in the schema for version " + version);
+ try {
+ metadata = hmsConn.getMetaData();
+ String[] types = {"TABLE"};
+ rs = metadata.getTables(null, null, "%", types);
+ String table = null;
+
+ while (rs.next()) {
+ table = rs.getString("TABLE_NAME");
+ dbTables.add(table.toLowerCase());
+ LOG.debug("Found table " + table + " in HMS dbstore");
+ }
+ } catch (SQLException e) {
+ throw new HiveMetaException(e);
+ } finally {
+ if (rs != null) {
+ try {
+ rs.close();
+ } catch (SQLException e) {
+ throw new HiveMetaException("Failed to close resultset", e);
+ }
+ }
+
+ if (hmsConn != null) {
+ try {
+ hmsConn.close();
+ } catch (SQLException e) {
+ throw new HiveMetaException("Failed to close metastore connection", e);
+ }
+ }
+ }
+
+ // parse the schema file to determine the tables that are expected to exist
+ // we are using oracle schema because it is simpler to parse, no quotes or backticks etc
+ String baseDir = new File(metaStoreSchemaInfo.getMetaStoreScriptDir()).getParent();
+ String schemaFile = baseDir + "/oracle/hive-schema-" + version + ".oracle.sql";
+
+ try {
+ LOG.info("Parsing schema script " + schemaFile);
+ subScripts.addAll(findCreateTable(schemaFile, schemaTables));
+ while (subScripts.size() > 0) {
+ schemaFile = baseDir + "/oracle/" + subScripts.remove(0);
+ LOG.info("Parsing subscript " + schemaFile);
+ subScripts.addAll(findCreateTable(schemaFile, schemaTables));
+ }
+ } catch (Exception e) {
+ return false;
+ }
+
+ System.out.println("Expected (from schema definition) " + schemaTables.size() +
+ " tables, Found (from HMS metastore) " + dbTables.size() + " tables");
+
+ // now diff the lists
+ schemaTables.removeAll(dbTables);
+ if (schemaTables.size() > 0) {
+ System.out.println(schemaTables.size() + " tables [ " + Arrays.toString(schemaTables.toArray())
+ + " ] are missing from the database schema.");
+ return false;
+ } else {
+ System.out.println("Schema table validation successful");
+ return true;
+ }
+ }
+
+ private List<String> findCreateTable(String path, List<String> tableList) {
+ Matcher matcher = null;
+ String line = null;
+ List<String> subs = new ArrayList<String>();
+ final String NESTED_SCRIPT_IDENTIFIER = "@";
+ Pattern regexp = Pattern.compile("(CREATE TABLE(IF NOT EXISTS)*) (\\S+).*");
+
+ try (
+ BufferedReader reader = new BufferedReader(new FileReader(path));
+ ){
+ while ((line = reader.readLine()) != null) {
+ if (line.startsWith(NESTED_SCRIPT_IDENTIFIER)) {
+ int endIndex = (line.indexOf(";") > -1 ) ? line.indexOf(";") : line.length();
+ // remove the trailing SEMI-COLON if any
+ subs.add(line.substring(NESTED_SCRIPT_IDENTIFIER.length(), endIndex));
+ continue;
+ }
+ matcher = regexp.matcher(line);
+ if (matcher.find()) {
+ String table = matcher.group(3);
+ tableList.add(table.toLowerCase());
+ LOG.debug("Found table " + table + " in the schema");
+ }
+ }
+ } catch (IOException ex){
+ ex.printStackTrace();
+ }
+
+ return subs;
+ }
+
/**
* Run pre-upgrade scripts corresponding to a given upgrade script,
* if any exist. The errors from pre-upgrade are ignored.
http://git-wip-us.apache.org/repos/asf/hive/blob/929ebbaa/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
index 8aa4173..2209c83 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
@@ -104,6 +104,44 @@ public class TestSchemaTool extends TestCase {
}
/**
+ * Test to validate that all tables exist in the HMS metastore.
+ * @throws Exception
+ */
+ public void testValidateSchemaTables() throws Exception {
+ schemaTool.doInit("2.0.0");
+
+ boolean isValid = (boolean)schemaTool.validateSchemaTables();
+ assertTrue(isValid);
+
+ // upgrade to 2.2.0 schema and re-validate
+ schemaTool.doUpgrade("2.2.0");
+ isValid = (boolean)schemaTool.validateSchemaTables();
+ assertTrue(isValid);
+
+ // Simulate a missing table scenario by renaming a couple of tables
+ String[] scripts = new String[] {
+ "RENAME TABLE SEQUENCE_TABLE to SEQUENCE_TABLE_RENAMED",
+ "RENAME TABLE NUCLEUS_TABLES to NUCLEUS_TABLES_RENAMED"
+ };
+
+ File scriptFile = generateTestScript(scripts);
+ schemaTool.runBeeLine(scriptFile.getPath());
+ isValid = schemaTool.validateSchemaTables();
+ assertFalse(isValid);
+
+ // Restored the renamed tables
+ scripts = new String[] {
+ "RENAME TABLE SEQUENCE_TABLE_RENAMED to SEQUENCE_TABLE",
+ "RENAME TABLE NUCLEUS_TABLES_RENAMED to NUCLEUS_TABLES"
+ };
+
+ scriptFile = generateTestScript(scripts);
+ schemaTool.runBeeLine(scriptFile.getPath());
+ isValid = schemaTool.validateSchemaTables();
+ assertTrue(isValid);
+ }
+
+ /**
* Test dryrun of schema initialization
* @throws Exception
*/
@@ -520,4 +558,4 @@ public class TestSchemaTool extends TestCase {
out.close();
return preUpgradeScript;
}
-}
\ No newline at end of file
+}