You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by yc...@apache.org on 2016/11/22 15:23:32 UTC

hive git commit: HIVE-15073: Schematool should detect malformed URIs (Yongzhi Chen, reviewed by Aihua Xu)

Repository: hive
Updated Branches:
  refs/heads/master 4a235fc55 -> a6c4004a9


HIVE-15073: Schematool should detect malformed URIs (Yongzhi Chen, reviewed by Aihua Xu)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/a6c4004a
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/a6c4004a
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/a6c4004a

Branch: refs/heads/master
Commit: a6c4004a97209a7b208037fe2de2d8dc4562e4ed
Parents: 4a235fc
Author: Yongzhi Chen <yc...@apache.org>
Authored: Thu Nov 17 17:30:52 2016 -0500
Committer: Yongzhi Chen <yc...@apache.org>
Committed: Tue Nov 22 09:55:38 2016 -0500

----------------------------------------------------------------------
 .../org/apache/hive/beeline/HiveSchemaTool.java | 273 ++++++++++++++++++-
 .../org/apache/hive/beeline/TestSchemaTool.java |  52 ++++
 2 files changed, 324 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/a6c4004a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
----------------------------------------------------------------------
diff --git a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
index 3402470..a2ab3e0 100644
--- a/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
+++ b/beeline/src/java/org/apache/hive/beeline/HiveSchemaTool.java
@@ -29,6 +29,7 @@ import org.apache.commons.cli.ParseException;
 import org.apache.commons.io.output.NullOutputStream;
 import org.apache.commons.lang.StringUtils;
 import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.HiveMetaException;
@@ -48,8 +49,10 @@ import java.io.FileReader;
 import java.io.FileWriter;
 import java.io.IOException;
 import java.io.PrintStream;
+import java.net.URI;
 import java.sql.Connection;
 import java.sql.DatabaseMetaData;
+import java.sql.PreparedStatement;
 import java.sql.ResultSet;
 import java.sql.SQLException;
 import java.sql.Statement;
@@ -177,6 +180,274 @@ public class HiveSchemaTool {
     }
   }
 
+  boolean validateLocations(String defaultLocPrefix) throws HiveMetaException {
+    boolean rtn;
+    rtn = checkMetaStoreDBLocation(defaultLocPrefix);
+    rtn = checkMetaStoreTableLocation(defaultLocPrefix) && rtn;
+    rtn = checkMetaStorePartitionLocation(defaultLocPrefix) && rtn;
+    return rtn;
+  }
+
+  private String getNameOrID(ResultSet res, int nameInx, int idInx) throws SQLException {
+    String itemName = res.getString(nameInx);
+    return  (itemName == null || itemName.isEmpty()) ? "ID: " + res.getString(idInx) : "Name: " + itemName;
+  }
+
+  // read schema version from metastore
+  private boolean checkMetaStoreDBLocation(String locHeader)
+      throws HiveMetaException {
+    String defaultPrefix = locHeader;
+    String dbLoc;
+    boolean isValid = true;
+    int numOfInvalid = 0;
+    Connection metastoreConn = getConnectionToMetastore(true);
+    if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
+      dbLoc = "select dbt.\"DB_ID\", dbt.\"NAME\", dbt.\"DB_LOCATION_URI\" from \"DBS\" dbt";
+    } else {
+      dbLoc = "select dbt.DB_ID, dbt.NAME, dbt.DB_LOCATION_URI from DBS dbt";
+    }
+    String locValue;
+    String dbName;
+    try(Statement stmt = metastoreConn.createStatement();
+        ResultSet res = stmt.executeQuery(dbLoc)) {
+      while (res.next()) {
+        locValue = res.getString(3);
+        if (locValue == null) {
+          System.err.println("NULL Location for DB with " + getNameOrID(res,2,1));
+          numOfInvalid++;
+        } else {
+          URI currentUri = null;
+          try {
+            currentUri = new Path(locValue).toUri();
+          } catch (Exception pe) {
+            System.err.println("Invalid Location for DB with " + getNameOrID(res,2,1));
+            System.err.println(pe.getMessage());
+            numOfInvalid++;
+            continue;
+          }
+          
+          if (currentUri.getScheme() == null || currentUri.getScheme().isEmpty()) {
+            System.err.println("Missing Location scheme for DB with " + getNameOrID(res,2,1));
+            System.err.println("The Location is: " + locValue);
+            numOfInvalid++;
+          } else if (defaultPrefix != null && !defaultPrefix.isEmpty() && locValue.substring(0,defaultPrefix.length())
+              .compareToIgnoreCase(defaultPrefix) != 0) {
+            System.err.println("Mismatch root Location for DB with " + getNameOrID(res,2,1));
+            System.err.println("The Location is: " + locValue);
+            numOfInvalid++;
+          }
+        }
+      }
+
+    } catch (SQLException e) {
+      throw new HiveMetaException("Failed to get DB Location Info.", e);
+    }
+    finally {
+      try {
+        metastoreConn.close();
+      } catch (SQLException e) {
+        System.err.println("Failed to close the metastore connection");
+        e.printStackTrace(System.err);
+      }
+    }
+    if (numOfInvalid > 0) {
+      isValid = false;
+      System.err.println("Total number of invalid DB locations is: "+ numOfInvalid);
+    }
+    return isValid;
+  }
+
+  private boolean checkMetaStoreTableLocation(String locHeader)
+      throws HiveMetaException {
+    String defaultPrefix = locHeader;
+    String tabLoc, tabIDRange;
+    boolean isValid = true;
+    Connection metastoreConn = getConnectionToMetastore(true);
+    int numOfInvalid = 0;
+    if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
+      tabIDRange = "select max(\"TBL_ID\"), min(\"TBL_ID\") from \"TBLS\" ";
+    } else {
+      tabIDRange = "select max(TBL_ID), min(TBL_ID) from TBLS";
+    }
+
+    if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
+      tabLoc = "select tbl.\"TBL_ID\", tbl.\"TBL_NAME\", sd.\"LOCATION\", dbt.\"DB_ID\", dbt.\"NAME\" from \"TBLS\" tbl inner join " +
+    "\"SDS\" sd on tbl.\"SD_ID\" = sd.\"SD_ID\" and tbl.\"TBL_ID\" >= ? and tbl.\"TBL_ID\"<= ? " +
+    "inner join \"DBS\" dbt on tbl.\"DB_ID\" = dbt.\"DB_ID\" ";
+    } else {
+      tabLoc = "select tbl.TBL_ID, tbl.TBL_NAME, sd.LOCATION, dbt.DB_ID, dbt.NAME from TBLS tbl join SDS sd on tbl.SD_ID = sd.SD_ID and tbl.TBL_ID >= ? and tbl.TBL_ID <= ?  inner join DBS dbt on tbl.DB_ID = dbt.DB_ID";
+    }
+    String locValue;
+    String tabName;
+    long maxID = 0, minID = 0, curID;
+    long rtnSize = 2000;
+
+    try {
+      Statement stmt = metastoreConn.createStatement();
+      ResultSet res = stmt.executeQuery(tabIDRange);
+      if (res.next()) {
+        maxID = res.getLong(1);
+        minID = res.getLong(2);
+      }
+      res.close();
+      stmt.close();
+      curID = minID;
+      PreparedStatement pStmt = metastoreConn.prepareStatement(tabLoc);
+      while (minID <= maxID) {
+        pStmt.setLong(1, minID);
+        pStmt.setLong(2, minID + rtnSize);
+        res = pStmt.executeQuery();
+        while (res.next()) {
+          locValue = res.getString(3);
+          if (locValue == null) {
+            System.err.println("In DB with " + getNameOrID(res,5,4));
+            System.err.println("NULL Location for TABLE with " + getNameOrID(res,2,1));
+            numOfInvalid++;
+          } else {
+            URI currentUri = null;
+            try {
+              currentUri = new Path(locValue).toUri();
+            } catch (Exception pe) {
+              System.err.println("In DB with " + getNameOrID(res,5,4));
+              System.err.println("Invalid location for Table with " + getNameOrID(res,2,1));
+              System.err.println(pe.getMessage());
+              numOfInvalid++;
+              continue;
+            }
+            if (currentUri.getScheme() == null || currentUri.getScheme().isEmpty()) {
+              System.err.println("In DB with " + getNameOrID(res,5,4));
+              System.err.println("Missing Location scheme for Table with " + getNameOrID(res,2,1));
+              System.err.println("The Location is: " + locValue);
+              numOfInvalid++;
+            } else if(defaultPrefix != null && !defaultPrefix.isEmpty() && locValue.substring(0,defaultPrefix.length())
+                .compareToIgnoreCase(defaultPrefix) != 0) {
+              System.err.println("In DB with " + getNameOrID(res,5,4));
+              System.err.println("Mismatch root Location for Table with " + getNameOrID(res,2,1));
+              System.err.println("The Location is: " + locValue);
+              numOfInvalid++;
+            }
+          }
+        }
+        res.close();
+        minID += rtnSize + 1;
+
+      }
+      pStmt.close();
+
+    } catch (SQLException e) {
+      throw new HiveMetaException("Failed to get Table Location Info.", e);
+    }
+    finally {
+      try {
+        metastoreConn.close();
+      } catch (SQLException e) {
+        System.err.println("Failed to close the metastore connection");
+        e.printStackTrace(System.err);
+      }
+    }
+    if (numOfInvalid > 0) {
+      isValid = false;
+      System.err.println("Total number of invalid TABLE locations is: "+ numOfInvalid);
+    }
+    return isValid;
+  }
+
+  private boolean checkMetaStorePartitionLocation(String locHeader)
+      throws HiveMetaException {
+    String defaultPrefix = locHeader;
+    String partLoc, partIDRange;
+    boolean isValid = true;
+    int numOfInvalid = 0;
+    Connection metastoreConn = getConnectionToMetastore(true);
+    if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
+      partIDRange = "select max(\"PART_ID\"), min(\"PART_ID\") from \"PARTITIONS\" ";
+    } else {
+      partIDRange = "select max(PART_ID), min(PART_ID) from PARTITIONS";
+    }
+
+    if (getDbCommandParser(dbType).needsQuotedIdentifier()) {
+      partLoc = "select pt.\"PART_ID\", pt.\"PART_NAME\", sd.\"LOCATION\", tbl.\"TBL_ID\", tbl.\"TBL_NAME\",dbt.\"DB_ID\", dbt.\"NAME\" from \"PARTITIONS\" pt "
+           + "inner join \"SDS\" sd on pt.\"SD_ID\" = sd.\"SD_ID\" and pt.\"PART_ID\" >= ? and pt.\"PART_ID\"<= ? "
+           + " inner join \"TBLS\" tbl on pt.\"TBL_ID\" = tbl.\"TBL_ID\" inner join "
+           + "\"DBS\" dbt on tbl.\"DB_ID\" = dbt.\"DB_ID\" ";
+    } else {
+      partLoc = "select pt.PART_ID, pt.PART_NAME, sd.LOCATION, tbl.TBL_ID, tbl.TBL_NAME, dbt.DB_ID, dbt.NAME from PARTITIONS pt "
+          + "inner join SDS sd on pt.SD_ID = sd.SD_ID and pt.PART_ID >= ? and pt.PART_ID <= ?  "
+          + "inner join TBLS tbl on tbl.TBL_ID = pt.TBL_ID inner join DBS dbt on tbl.DB_ID = dbt.DB_ID ";
+    }
+    String locValue;
+    String tabName;
+    long maxID = 0, minID = 0, curID;
+    long rtnSize = 2000;
+
+    try {
+      Statement stmt = metastoreConn.createStatement();
+      ResultSet res = stmt.executeQuery(partIDRange);
+      if (res.next()) {
+        maxID = res.getLong(1);
+        minID = res.getLong(2);
+      }
+      res.close();
+      stmt.close();
+      curID = minID;
+      PreparedStatement pStmt = metastoreConn.prepareStatement(partLoc);
+      while (minID <= maxID) {
+        pStmt.setLong(1, minID);
+        pStmt.setLong(2, minID + rtnSize);
+        res = pStmt.executeQuery();
+        while (res.next()) {
+          locValue = res.getString(3);
+          if (locValue == null) {
+            System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4));
+            System.err.println("NULL Location for PARTITION with " + getNameOrID(res,2,1));
+            numOfInvalid++;
+          } else {
+            URI currentUri = null;
+            try {
+              currentUri = new Path(locValue).toUri();
+            } catch (Exception pe) {
+              System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4));
+              System.err.println("Invalid location for PARTITON with " + getNameOrID(res,2,1));
+              System.err.println(pe.getMessage());
+              numOfInvalid++;
+              continue;
+            }
+            if (currentUri.getScheme() == null || currentUri.getScheme().isEmpty()) {
+              System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4));
+              System.err.println("Missing Location scheme for PARTITON with " + getNameOrID(res,2,1));
+              System.err.println("The Location is: " + locValue);
+              numOfInvalid++;
+            } else if (defaultPrefix != null && !defaultPrefix.isEmpty() && locValue.substring(0,defaultPrefix.length())
+                .compareToIgnoreCase(defaultPrefix) != 0) {
+              System.err.println("In DB with " + getNameOrID(res,7,6) + ", TABLE with " + getNameOrID(res,5,4));
+              System.err.println("Mismatch root Location for PARTITON with " + getNameOrID(res,2,1));
+              System.err.println("The Location is: " + locValue);
+              numOfInvalid++;
+            }
+          }
+        }
+        res.close();
+        minID += rtnSize + 1;
+      }
+      pStmt.close();
+    } catch (SQLException e) {
+      throw new HiveMetaException("Failed to get Partiton Location Info.", e);
+    }
+    finally {
+      try {
+        metastoreConn.close();
+      } catch (SQLException e) {
+        System.err.println("Failed to close the metastore connection");
+        e.printStackTrace(System.err);
+      }
+    }
+    if (numOfInvalid > 0) {
+      isValid = false;
+      System.err.println("Total number of invalid PARTITION locations is: "+ numOfInvalid);
+    }
+    return isValid;
+  }
+
   // test the connection metastore using the config property
   private void testConnectionToMetastore() throws HiveMetaException {
     Connection conn = getConnectionToMetastore(true);
@@ -305,7 +576,7 @@ public class HiveSchemaTool {
     System.out.print("Starting metastore validation");
     validateSequences();
     validateSchemaTables();
-
+    validateLocations(null);
     System.out.print("Done with metastore validation");
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/a6c4004a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
----------------------------------------------------------------------
diff --git a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
index 2209c83..5dc17b9 100644
--- a/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
+++ b/itests/hive-unit/src/test/java/org/apache/hive/beeline/TestSchemaTool.java
@@ -529,6 +529,58 @@ public class TestSchemaTool extends TestCase {
     assertEquals(expectedSQL, flattenedSql);
   }
 
+  /**
+   * Test validate uri of locations
+   * @throws Exception
+   */
+  public void testValidateLocations() throws Exception {
+    schemaTool.doInit();
+    String defaultRoot = "hdfs://myhost.com:8020";
+    //check empty DB
+    boolean isValid = schemaTool.validateLocations(null);
+    assertTrue(isValid);
+    isValid = schemaTool.validateLocations(defaultRoot);
+    assertTrue(isValid);
+
+    String dbmydbLocation = defaultRoot + "/user/hive/warehouse/mydb";
+ // Test valid case
+    String[] scripts = new String[] {
+         "insert into DBS values(2, 'my db', 'hdfs://myhost.com:8020/user/hive/warehouse/mydb', 'mydb', 'public', 'role')",
+         "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+         "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://myhost.com:8020/user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+         "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT) values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL)",
+         "insert into PARTITiONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(1, 1441402388,0, 'd1=1/d2=1',2,2)"
+
+       };
+    File scriptFile = generateTestScript(scripts);
+    schemaTool.runBeeLine(scriptFile.getPath());
+    isValid = schemaTool.validateLocations(null);
+    assertTrue(isValid);
+    isValid = schemaTool.validateLocations(defaultRoot);
+    assertTrue(isValid);
+    scripts = new String[] {
+        "delete from PARTITIONS",
+        "delete from TBLS",
+        "delete from SDS",
+        "delete from DBS",
+        "insert into DBS values(2, 'my db', '/user/hive/warehouse/mydb', 'mydb', 'public', 'role')",
+        "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (1,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','hdfs://yourhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+        "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (2,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','file:///user/admin/2015_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+        "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT) values (2 ,1435255431,2,0 ,'hive',0,1,'mytal','MANAGED_TABLE',NULL,NULL)",
+        "insert into PARTITiONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(1, 1441402388,0, 'd1=1/d2=1',2,2)",
+        "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (3000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','yourhost.com:8020/user/hive/warehouse/mydb',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+        "insert into SDS(SD_ID,CD_ID,INPUT_FORMAT,IS_COMPRESSED,IS_STOREDASSUBDIRECTORIES,LOCATION,NUM_BUCKETS,OUTPUT_FORMAT,SERDE_ID) values (5000,null,'org.apache.hadoop.mapred.TextInputFormat','N','N','file:///user/admin/2016_11_18',-1,'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat',null)",
+        "insert into TBLS(TBL_ID,CREATE_TIME,DB_ID,LAST_ACCESS_TIME,OWNER,RETENTION,SD_ID,TBL_NAME,TBL_TYPE,VIEW_EXPANDED_TEXT,VIEW_ORIGINAL_TEXT) values (3000 ,1435255431,2,0 ,'hive',0,3000,'mytal3000','MANAGED_TABLE',NULL,NULL)",
+        "insert into PARTITiONS(PART_ID,CREATE_TIME,LAST_ACCESS_TIME, PART_NAME,SD_ID,TBL_ID) values(5000, 1441402388,0, 'd1=1/d2=5000',5000,2)"
+    };
+    scriptFile = generateTestScript(scripts);
+    schemaTool.runBeeLine(scriptFile.getPath());
+    isValid = schemaTool.validateLocations(null);
+    assertFalse(isValid);
+    isValid = schemaTool.validateLocations(defaultRoot);
+    assertFalse(isValid);
+  }
+
   private File generateTestScript(String [] stmts) throws IOException {
     File testScriptFile = File.createTempFile("schematest", ".sql");
     testScriptFile.deleteOnExit();