You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by yc...@apache.org on 2016/11/22 15:23:32 UTC
hive git commit: HIVE-15073: Schematool should detect malformed URIs
(Yongzhi Chen, reviewed by Aihua Xu)
Repository: hive
Updated Branches:
refs/heads/master 4a235fc55 -> a6c4004a9
HIVE-15073: Schematool should detect malformed URIs (Yongzhi Chen, reviewed by Aihua Xu)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a6c4004a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a6c4004a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a6c4004a
Branch: refs/heads/master
Commit: a6c4004a97209a7b208037fe2de2d8dc4562e4ed
Parents: 4a235fc
Author: Yongzhi Chen <yc...@apache.org>
Authored: Thu Nov 17 17:30:52 2016 -0500
Committer: Yongzhi Chen <yc...@apache.org>
Committed: Tue Nov 22 09:55:38 2016 -0500
----------------------------------------------------------------------
.../org/apache/hive/beeline/HiveSchemaTool.java | 273 ++++++++++++++++++-
.../org/apache/hive/beeline/TestSchemaTool.java | 52 ++++
2 files changed, 324 insertions(+), 1 deletion(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/hive/blob/a6c4004a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
----------------------------------------------------------------------
diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
index 3402470..a2ab3e0 100644
--- a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
+++ b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
@@ -29,6 +29,7 @@ import org.apache.commons.cli.ParseException;
import org.apache.commons.io.output.NullOutputStream;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.metastore.HiveMetaException;
@@ -48,8 +49,10 @@ import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintStream;
+import java.net.URI;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
+import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
@@ -177,6 +180,274 @@ public class HiveSchemaTool {
}
}
+ boolean validateLocations(String defaultLocPrefix) throws HiveMetaException {
+ boolean rtn;
+ rtn = checkMetaStoreDBLocation(defaultLocPrefix);
+ rtn = checkMetaStoreTableLocation(defaultLocPrefix) && rtn;
+ rtn = checkMetaStorePartitionLocation(defaultLocPrefix) && rtn;
+ return rtn;
+ }
+
+ private String getNameOrID(ResultSet res, int nameInx, int idInx) throws SQLException {
+ String itemName = res.getString(nameInx);
+ return (itemName == null || itemName.isEmpty()) ? "ID: " + res.getString(idInx) : "Name: " + itemName;
+ }
+
+ // read schema version from metastore
+ private boolean checkMetaStoreDBLocation(String locHeader)
+ throws HiveMetaException {
+ String defaultPrefix = locHeader;
+ String dbLoc;
+ boolean isValid = true;
+ int numOfInvalid = 0;
+ Connection metastoreConn = getConnectionToMetastore(true);
+ if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
+ dbLoc = "select dbt.\"DB_ID\", dbt.\"NAME\", dbt.\"DB_LOCATION_URI\" from \"DBS\" dbt";
+ } else {
+ dbLoc = "select dbt.DB_ID, dbt.NAME, dbt.DB_LOCATION_URI from DBS dbt";
+ }
+ String locValue;
+ String dbName;
+ try(Statement stmt = metastoreConn.createStatement();
+ ResultSet res = stmt.executeQuery(dbLoc)) {
+ while (res.next()) {
+ locValue = res.getString(3);
+ if (locValue == null) {
+ System.err.println("NULL Location for DB with " + getNameOrID(res,2,1));
+ numOfInvalid++;
+ } else {
+ URI currentUri = null;
+ try {
+ currentUri = new Path(locValue).toUri();
+ } catch (Exception pe) {
+ System.err.println("Invalid Location for DB with " + getNameOrID(res,2,1));
+ System.err.println(pe.getMessage());
+ numOfInvalid++;
+ continue;
+ }
+
+ if (currentUri.getScheme() == null || currentUri.getScheme().isEmpty()) {
+ System.err.println("Missing Location scheme for DB with " + getNameOrID(res,2,1));
+ System.err.println("The Location is: " + locValue);
+ numOfInvalid++;
+ } else if (defaultPrefix != null && !defaultPrefix.isEmpty() && locValue.substring(0,defaultPrefix.length())
+ .compareToIgnoreCase(defaultPrefix) != 0) {
+ System.err.println("Mismatch root Location for DB with " + getNameOrID(res,2,1));
+ System.err.println("The Location is: " + locValue);
+ numOfInvalid++;
+ }
+ }
+ }
+
+ } catch (SQLException e) {
+ throw new HiveMetaException("Failed to get DB Location Info.", e);
+ }
+ finally {
+ try {
+ metastoreConn.close();
+ } catch (SQLException e) {
+ System.err.println("Failed to close the metastore connection");
+ e.printStackTrace(System.err);
+ }
+ }
+ if (numOfInvalid > 0) {
+ isValid = false;
+ System.err.println("Total number of invalid DB locations is: "+ numOfInvalid);
+ }
+ return isValid;
+ }
+
+ private boolean checkMetaStoreTableLocation(String locHeader)
+ throws HiveMetaException {
+ String defaultPrefix = locHeader;
+ String tabLoc, tabIDRange;
+ boolean isValid = true;
+ Connection metastoreConn = getConnectionToMetastore(true);
+ int numOfInvalid = 0;
+ if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
+ tabIDRange = "select max(\"TBL_ID\"), min(\"TBL_ID\") from \"TBLS\" ";
+ } else {
+ tabIDRange = "select max(TBL_ID), min(TBL_ID) from TBLS";
+ }
+
+ if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
+ tabLoc = "select tbl.\"TBL_ID\", tbl.\"TBL_NAME\", sd.\"LOCATION\", dbt.\"DB_ID\", dbt.\"NAME\" from \"TBLS\" tbl inner join " +
+ "\"SDS\" sd on tbl.\"SD_ID\" = sd.\"SD_ID\" and tbl.\"TBL_ID\" >= ? and tbl.\"TBL_ID\"<= ? " +
+ "inner join \"DBS\" dbt on tbl.\"DB_ID\" = dbt.\"DB_ID\" ";
+ } else {
+ tabLoc = "select tbl.TBL_ID, tbl.TBL_NAME, sd.LOCATION, dbt.DB_ID, dbt.NAME from TBLS tbl join SDS sd on tbl.SD_ID = sd.SD_ID and tbl.TBL_ID >= ? and tbl.TBL_ID <= ? inner join DBS dbt on tbl.DB_ID = dbt.DB_ID";
+ }
+ String locValue;
+ String tabName;
+ long maxID = 0, minID = 0, curID;
+ long rtnSize = 2000;
+
+ try {
+ Statement stmt = metastoreConn.createStatement();
+ ResultSet res = stmt.executeQuery(tabIDRange);
+ if (res.next()) {
+ maxID = res.getLong(1);
+ minID = res.getLong(2);
+ }
+ res.close();
+ stmt.close();
+ curID = minID;
+ PreparedStatement pStmt = metastoreConn.prepareStatement(tabLoc);
+ while (minID <= maxID) {
+ pStmt.setLong(1, minID);
+ pStmt.setLong(2, minID + rtnSize);
+ res = pStmt.executeQuery();
+ while (res.next()) {
+ locValue = res.getString(3);
+ if (locValue == null) {
+ System.err.println("In DB with " + getNameOrID(res,5,4));
+ System.err.println("NULL Location for TABLE with " + getNameOrID(res,2,1));
+ numOfInvalid++;
+ } else {
+ URI currentUri = null;
+ try {
+ currentUri = new Path(locValue).toUri();
+ } catch (Exception pe) {
+ System.err.println("In DB with " + getNameOrID(res,5,4));
+ System.err.println("Invalid location for Table with " + getNameOrID(res,2,1));
+ System.err.println(pe.getMessage());
+ numOfInvalid++;
+ continue;
+ }
+ if (currentUri.getScheme() == null || currentUri.getScheme().isEmpty()) {
+ System.err.println("In DB with " + getNameOrID(res,5,4));
+ System.err.println("Missing Location scheme for Table with " + getNameOrID(res,2,1));
+ System.err.println("The Location is: " + locValue);
+ numOfInvalid++;
+ } else if(defaultPrefix != null && !defaultPrefix.isEmpty() && locValue.substring(0,defaultPrefix.length())
+ .compareToIgnoreCase(defaultPrefix) != 0) {
+ System.err.println("In DB with " + getNameOrID(res,5,4));
+ System.err.println("Mismatch root Location for Table with " + getNameOrID(res,2,1));
+ System.err.println("The Location is: " + locValue);
+ numOfInvalid++;
+ }
+ }
+ }
+ res.close();
+ minID += rtnSize + 1;
+
+ }
+ pStmt.close();
+
+ } catch (SQLException e) {
+ throw new HiveMetaException("Failed to get Table Location Info.", e);
+ }
+ finally {
+ try {
+ metastoreConn.close();
+ } catch (SQLException e) {
+ System.err.println("Failed to close the metastore connection");
+ e.printStackTrace(System.err);
+ }
+ }
+ if (numOfInvalid > 0) {
+ isValid = false;
+ System.err.println("Total number of invalid TABLE locations is: "+ numOfInvalid);
+ }
+ return isValid;
+ }
+
+ private boolean checkMetaStorePartitionLocation(String locHeader)
+ throws HiveMetaException {
+ String defaultPrefix = locHeader;
+ String partLoc, partIDRange;
+ boolean isValid = true;
+ int numOfInvalid = 0;
+ Connection metastoreConn = getConnectionToMetastore(true);
+ if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
+ partIDRange = "select max(\"PART_ID\"), min(\"PART_ID\") from \"PARTITIONS\" ";
+ } else {
+ partIDRange = "select max(PART_ID), min(PART_ID) from PARTITIONS";
+ }
+
+ if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
+ partLoc = "select pt.\"PART_ID\", pt.\"PART_NAME\", sd.\"LOCATION\", tbl.\"TBL_ID\", tbl.\"TBL_NAME\",dbt.\"DB_ID\", dbt.\"NAME\" from \"PARTITIONS\" pt "
+ + "inner join \"SDS\" sd on pt.\"SD_ID\" = sd.\"SD_ID\" and pt.\"PART_ID\" >= ? and pt.\"PART_ID\"<= ? "
+ + " inner join \"TBLS\" tbl on pt.\"TBL_ID\" = tbl.\"TBL_ID\" inner join "
+ + "\"DBS\" dbt on tbl.\"DB_ID\" = dbt.\"DB_ID\" ";
+ } else {
+ partLoc = "select pt.PART_ID, pt.PART_NAME, sd.LOCATION, tbl.TBL_ID, tbl.TBL_NAME, dbt.DB_ID, dbt.NAME from PARTITIONS pt "
+ + "inner join SDS sd on pt.SD_ID = sd.SD_ID and pt.PART_ID >= ? and pt.PART_ID <= ? "
+ + "inner join TBLS tbl on tbl.TBL_ID = pt.TBL_ID inner join DBS dbt on tbl.DB_ID = dbt.DB_ID ";
+ }
+ String locValue;
+ String tabName;
+ long maxID = 0, minID = 0, curID;
+ long rtnSize = 2000;
+
+ try {
+ Statement stmt = metastoreConn.createStatement();
+ ResultSet res = stmt.executeQuery(partIDRange);
+ if (res.next()) {
+ maxID = res.getLong(1);
+ minID = res.getLong(2);
+ }
+ res.close();
+ stmt.close();
+ curID = minID;
+ PreparedStatement pStmt = metastoreConn.prepareStatement(partLoc);
+ while (minID <= maxID) {
+ pStmt.setLong(1, minID);
+ pStmt.setLong(2, minID + rtnSize);
+ res = pStmt.executeQuery();
+ while (res.next()) {
+ locValue = res.getString(3);
+ if (locValue == null) {
+ System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4));
+ System.err.println("NULL Location for PARTITION with " + getNameOrID(res,2,1));
+ numOfInvalid++;
+ } else {
+ URI currentUri = null;
+ try {
+ currentUri = new Path(locValue).toUri();
+ } catch (Exception pe) {
+ System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4));
+ System.err.println("Invalid location for PARTITON with " + getNameOrID(res,2,1));
+ System.err.println(pe.getMessage());
+ numOfInvalid++;
+ continue;
+ }
+ if (currentUri.getScheme() == null || currentUri.getScheme().isEmpty()) {
+ System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4));
+ System.err.println("Missing Location scheme for PARTITON with " + getNameOrID(res,2,1));
+ System.err.println("The Location is: " + locValue);
+ numOfInvalid++;
+ } else if (defaultPrefix != null && !defaultPrefix.isEmpty() && locValue.substring(0,defaultPrefix.length())
+ .compareToIgnoreCase(defaultPrefix) != 0) {
+ System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4));
+ System.err.println("Mismatch root Location for PARTITON with " + getNameOrID(res,2,1));
+ System.err.println("The Location is: " + locValue);
+ numOfInvalid++;
+ }
+ }
+ }
+ res.close();
+ minID += rtnSize + 1;
+ }
+ pStmt.close();
+ } catch (SQLException e) {
+ throw new HiveMetaException("Failed to get Partiton Location Info.", e);
+ }
+ finally {
+ try {
+ metastoreConn.close();
+ } catch (SQLException e) {
+ System.err.println("Failed to close the metastore connection");
+ e.printStackTrace(System.err);
+ }
+ }
+ if (numOfInvalid > 0) {
+ isValid = false;
+ System.err.println("Total number of invalid PARTITION locations is: "+ numOfInvalid);
+ }
+ return isValid;
+ }
+
// test the connection metastore using the config property
private void testConnectionToMetastore() throws HiveMetaException {
Connection conn = getConnectionToMetastore(true);
@@ -305,7 +576,7 @@ public class HiveSchemaTool {
System.out.print("Starting metastore validation");
validateSequences();
validateSchemaTables();
-
+ validateLocations(null);
System.out.print("Done with metastore validation");
}
http://git-wip-us.apache.org/repos/asf/hive/blob/a6c4004a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
index 2209c83..5dc17b9 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
@@ -529,6 +529,58 @@ public class TestSchemaTool extends TestCase {
assertEquals(expectedSQL, flattenedSql);
}
+ /**
+ * Test validate uri of locations
+ * @throws Exception
+ */
+ public void testValidateLocations() throws Exception {
+ schemaTool.doInit();
+ String defaultRoot = "hdfs://myhost.com:8020";
+ //check empty DB
+ boolean isValid = schemaTool.validateLocations(null);
+ assertTrue(isValid);
+ isValid = schemaTool.validateLocations(defaultRoot);
+ assertTrue(isValid);
+
+ String dbmydbLocation = defaultRoot + "/user/hive/warehouse/mydb";
+ // Test valid case
+ String[] scripts = new String[] {
+ "insert into DBS values(2, 'my db', 'hdfs://myhost.com:8020/user/hive/warehouse/mydb', 'mydb', 'public', 'role')",
+ "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+ "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+ "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT) values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL)",
+ "insert into PARTITiONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(1, 1441402388,0, 'd1=1/d2=1',2,2)"
+
+ };
+ File scriptFile = generateTestScript(scripts);
+ schemaTool.runBeeLine(scriptFile.getPath());
+ isValid = schemaTool.validateLocations(null);
+ assertTrue(isValid);
+ isValid = schemaTool.validateLocations(defaultRoot);
+ assertTrue(isValid);
+ scripts = new String[] {
+ "delete from PARTITIONS",
+ "delete from TBLS",
+ "delete from SDS",
+ "delete from DBS",
+ "insert into DBS values(2, 'my db', '/user/hive/warehouse/mydb', 'mydb', 'public', 'role')",
+ "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://yourhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+ "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','file:///user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+ "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT) values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL)",
+ "insert into PARTITiONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(1, 1441402388,0, 'd1=1/d2=1',2,2)",
+ "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (3000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','yourhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+ "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (5000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','file:///user/admin/2016_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+ "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT) values (3000 ,1435255431,2,0 ,'hive',0,3000,'mytal3000','MANAGED_TABLE',NULL,NULL)",
+ "insert into PARTITiONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(5000, 1441402388,0, 'd1=1/d2=5000',5000,2)"
+ };
+ scriptFile = generateTestScript(scripts);
+ schemaTool.runBeeLine(scriptFile.getPath());
+ isValid = schemaTool.validateLocations(null);
+ assertFalse(isValid);
+ isValid = schemaTool.validateLocations(defaultRoot);
+ assertFalse(isValid);
+ }
+
private File generateTestScript(String [] stmts) throws IOException {
File testScriptFile = File.createTempFile("schematest", ".sql");
testScriptFile.deleteOnExit();