You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by pr...@apache.org on 2009/10/20 06:42:49 UTC

svn commit: r826950 - in /hadoop/hive/trunk: ./ ql/src/java/org/apache/hadoop/hive/ql/exec/ ql/src/java/org/apache/hadoop/hive/ql/metadata/ ql/src/java/org/apache/hadoop/hive/ql/parse/ ql/src/java/org/apache/hadoop/hive/ql/plan/ ql/src/test/queries/cli...

Author: prasadc
Date: Tue Oct 20 04:42:48 2009
New Revision: 826950

URL: http://svn.apache.org/viewvc?rev=826950&view=rev
Log:
Add 'repair' option to mack. (Cyrus Katrak via prasadc)

Added:
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/repair.q
    hadoop/hive/trunk/ql/src/test/results/clientpositive/repair.q.out
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=826950&r1=826949&r2=826950&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Tue Oct 20 04:42:48 2009
@@ -69,6 +69,8 @@
 
     HIVE-881. CTAS to work with fields terminator. (Ning Zhang via zshao)
 
+    HIVE-874. Add 'repair' option to mack. (Cyrus Katrak via prasadc).
+
   IMPROVEMENTS
 
     HIVE-760. Add version info to META-INF/MANIFEST.MF.

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=826950&r1=826949&r2=826950&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Tue Oct 20 04:42:48 2009
@@ -28,6 +28,7 @@
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Iterator;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
@@ -43,10 +44,15 @@
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
+import org.apache.hadoop.hive.metastore.Warehouse;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.InvalidOperationException;
+import org.apache.hadoop.hive.metastore.api.MetaException;
 import org.apache.hadoop.hive.metastore.api.Order;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
+import org.apache.hadoop.hive.ql.QueryPlan;
+import org.apache.hadoop.hive.ql.hooks.ReadEntity;
+import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 import org.apache.hadoop.hive.ql.metadata.CheckResult;
 import org.apache.hadoop.hive.ql.metadata.Hive;
 import org.apache.hadoop.hive.ql.metadata.HiveException;
@@ -80,9 +86,6 @@
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
 import org.apache.hadoop.hive.shims.ShimLoader;
 import org.apache.hadoop.util.StringUtils;
-import org.apache.hadoop.hive.ql.QueryPlan;
-import org.apache.hadoop.hive.ql.hooks.ReadEntity;
-import org.apache.hadoop.hive.ql.hooks.WriteEntity;
 
 /**
  * DDLTask implementation
@@ -227,14 +230,28 @@
    * @return Returns 0 when execution succeeds and above 0 if it fails.
    */
   private int msck(Hive db, MsckDesc msckDesc) {
-
     CheckResult result = new CheckResult();
+    List<String> repairOutput = new ArrayList<String>();
     try {
       HiveMetaStoreChecker checker = new HiveMetaStoreChecker(db);
       checker.checkMetastore(
         MetaStoreUtils.DEFAULT_DATABASE_NAME, msckDesc.getTableName(),
         msckDesc.getPartitionSpec(),
         result);
+      if(msckDesc.isRepairPartitions()) {
+        Table table = db.getTable(MetaStoreUtils.DEFAULT_DATABASE_NAME,
+            msckDesc.getTableName());
+        for (CheckResult.PartitionResult part : result.getPartitionsNotInMs()) {
+          try {
+            db.createPartition(table,
+                Warehouse.makeSpecFromName(part.getPartitionName()));
+            repairOutput.add("Repair: Added partition to metastore " + msckDesc.getTableName()
+                + ':' + part.getPartitionName());
+          } catch (Exception e) {
+            LOG.warn("Repair error, could not add partition to metastore: ", e);
+          }
+        }
+      }
     } catch (HiveException e) {
       LOG.warn("Failed to run metacheck: ", e);
       return 1;
@@ -242,7 +259,6 @@
       LOG.warn("Failed to run metacheck: ", e);
       return 1;
     } finally {
-
       BufferedWriter resultOut = null;
       try {
         FileSystem fs = msckDesc.getResFile().getFileSystem(conf);
@@ -258,6 +274,14 @@
             "Partitions not in metastore:", resultOut, firstWritten);
         firstWritten |= writeMsckResult(result.getPartitionsNotOnFs(),
             "Partitions missing from filesystem:", resultOut, firstWritten);
+        for (String rout : repairOutput) {
+          if (firstWritten) {
+            resultOut.write(terminator);
+          } else {
+            firstWritten = true;
+          }
+          resultOut.write(rout);
+        }
       } catch (IOException e) {
         LOG.warn("Failed to save metacheck output: ", e);
         return 1;

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java?rev=826950&r1=826949&r2=826950&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/CheckResult.java Tue Oct 20 04:42:48 2009
@@ -73,7 +73,7 @@
    * A basic description of a partition that is 
    * missing from either the fs or the ms.
    */
-  static class PartitionResult {
+  public static class PartitionResult {
     private String partitionName;
     private String tableName;
     

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=826950&r1=826949&r2=826950&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Tue Oct 20 04:42:48 2009
@@ -434,11 +434,17 @@
    */
   private void analyzeMetastoreCheck(CommonTree ast) throws SemanticException {
     String tableName = null;
+    boolean repair = false;
     if(ast.getChildCount() > 0) {
-      tableName = unescapeIdentifier(ast.getChild(0).getText());
+      repair = ast.getChild(0).getType() == HiveParser.KW_REPAIR;
+      if (!repair) {
+        tableName = unescapeIdentifier(ast.getChild(0).getText());
+      } else if (ast.getChildCount() > 1) {
+        tableName = unescapeIdentifier(ast.getChild(1).getText());
+      }
     }
     List<Map<String, String>> specs = getPartitionSpecs(ast);
-    MsckDesc checkDesc = new MsckDesc(tableName, specs, ctx.getResFile());
+    MsckDesc checkDesc = new MsckDesc(tableName, specs, ctx.getResFile(), repair);
     rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), checkDesc), conf));
   }
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g?rev=826950&r1=826949&r2=826950&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g Tue Oct 20 04:42:48 2009
@@ -347,8 +347,8 @@
 metastoreCheck
 @init { msgs.push("metastore check statement"); }
 @after { msgs.pop(); }
-    : KW_MSCK (KW_TABLE table=Identifier partitionSpec? (COMMA partitionSpec)*)?
-    -> ^(TOK_MSCK ($table partitionSpec*)?)
+    : KW_MSCK (repair=KW_REPAIR)? (KW_TABLE table=Identifier partitionSpec? (COMMA partitionSpec)*)?
+    -> ^(TOK_MSCK $repair? ($table partitionSpec*)?)
     ;     
     
 createFunctionStatement
@@ -1308,6 +1308,7 @@
 KW_FUNCTIONS: 'FUNCTIONS';
 KW_SHOW: 'SHOW';
 KW_MSCK: 'MSCK';
+KW_REPAIR: 'REPAIR';
 KW_DIRECTORY: 'DIRECTORY';
 KW_LOCAL: 'LOCAL';
 KW_TRANSFORM : 'TRANSFORM';

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java?rev=826950&r1=826949&r2=826950&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/MsckDesc.java Tue Oct 20 04:42:48 2009
@@ -10,18 +10,22 @@
   private String tableName;
   private List<Map<String, String>> partitionSpec;
   private Path resFile;
-  
+  private boolean repairPartitions;
+
   /**
    * Description of a msck command.
    * @param tableName Table to check, can be null.
    * @param partSpecs Partition specification, can be null. 
    * @param resFile Where to save the output of the command
+   * @param repairPartitions remove stale / add new partitions found during the check
    */
-  public MsckDesc(String tableName, List<Map<String, String>> partSpecs, Path resFile) {
+  public MsckDesc(String tableName, List<Map<String, String>> partSpecs, Path resFile,
+      boolean repairPartitions) {
     super();
     this.tableName = tableName;
     this.partitionSpec = partSpecs;
     this.resFile = resFile;
+    this.repairPartitions = repairPartitions;
   }
 
   /**
@@ -65,5 +69,18 @@
   public void setResFile(Path resFile) {
     this.resFile = resFile;
   }
-  
+
+  /**
+   * @return remove stale / add new partitions found during the check
+   */
+  public boolean isRepairPartitions() {
+    return repairPartitions;
+  }
+
+  /**
+   * @param remove stale / add new partitions found during the check
+   */
+  public void setRepairPartitions(boolean repairPartitions) {
+    this.repairPartitions = repairPartitions;
+  }
 }

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/repair.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/repair.q?rev=826950&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/repair.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/repair.q Tue Oct 20 04:42:48 2009
@@ -0,0 +1,16 @@
+DROP TABLE repairtable;
+
+CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING);
+
+MSCK TABLE repairtable;
+
+dfs -mkdir ../build/ql/test/data/warehouse/repairtable/p1=a/p2=a;
+dfs -mkdir ../build/ql/test/data/warehouse/repairtable/p1=b/p2=a;
+
+MSCK TABLE repairtable;
+
+MSCK REPAIR TABLE repairtable;
+
+MSCK TABLE repairtable;
+
+DROP TABLE repairtable;

Added: hadoop/hive/trunk/ql/src/test/results/clientpositive/repair.q.out
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/results/clientpositive/repair.q.out?rev=826950&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/results/clientpositive/repair.q.out (added)
+++ hadoop/hive/trunk/ql/src/test/results/clientpositive/repair.q.out Tue Oct 20 04:42:48 2009
@@ -0,0 +1,34 @@
+PREHOOK: query: DROP TABLE repairtable
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE repairtable
+POSTHOOK: type: DROPTABLE
+PREHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING)
+PREHOOK: type: CREATETABLE
+POSTHOOK: query: CREATE TABLE repairtable(col STRING) PARTITIONED BY (p1 STRING, p2 STRING)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: default@repairtable
+PREHOOK: query: MSCK TABLE repairtable
+PREHOOK: type: MSCK
+POSTHOOK: query: MSCK TABLE repairtable
+POSTHOOK: type: MSCK
+PREHOOK: query: MSCK TABLE repairtable
+PREHOOK: type: MSCK
+POSTHOOK: query: MSCK TABLE repairtable
+POSTHOOK: type: MSCK
+Partitions not in metastore:	repairtable:p1=b/p2=a	repairtable:p1=a/p2=a
+PREHOOK: query: MSCK REPAIR TABLE repairtable
+PREHOOK: type: MSCK
+POSTHOOK: query: MSCK REPAIR TABLE repairtable
+POSTHOOK: type: MSCK
+Partitions not in metastore:	repairtable:p1=b/p2=a	repairtable:p1=a/p2=a
+Repair: Added partition to metastore repairtable:p1=b/p2=a
+Repair: Added partition to metastore repairtable:p1=a/p2=a
+PREHOOK: query: MSCK TABLE repairtable
+PREHOOK: type: MSCK
+POSTHOOK: query: MSCK TABLE repairtable
+POSTHOOK: type: MSCK
+PREHOOK: query: DROP TABLE repairtable
+PREHOOK: type: DROPTABLE
+POSTHOOK: query: DROP TABLE repairtable
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: default@repairtable