You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hive.apache.org by na...@apache.org on 2010/06/09 03:37:49 UTC

svn commit: r952877 [1/2] - in /hadoop/hive/trunk: ./ common/src/java/org/apache/hadoop/hive/conf/ conf/ metastore/if/ metastore/src/gen-cpp/ metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/ metastore/src/gen-php/ metastore/src/gen-py/h...

Author: namit
Date: Wed Jun  9 01:37:48 2010
New Revision: 952877

URL: http://svn.apache.org/viewvc?rev=952877&view=rev
Log:
HIVE-1332. Support for archiving partitions
(Paul Yang via namit)


Added:
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java
    hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive1.q
    hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive2.q
    hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive3.q
    hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive4.q
    hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive5.q
    hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q
    hadoop/hive/trunk/ql/src/test/results/clientnegative/archive1.q.out
    hadoop/hive/trunk/ql/src/test/results/clientnegative/archive2.q.out
    hadoop/hive/trunk/ql/src/test/results/clientnegative/archive3.q.out
    hadoop/hive/trunk/ql/src/test/results/clientnegative/archive4.q.out
    hadoop/hive/trunk/ql/src/test/results/clientnegative/archive5.q.out
    hadoop/hive/trunk/ql/src/test/results/clientpositive/archive.q.out
    hadoop/hive/trunk/shims/src/0.20/java/org/apache/hadoop/hive/shims/HiveHarFileSystem.java
Modified:
    hadoop/hive/trunk/CHANGES.txt
    hadoop/hive/trunk/build-common.xml
    hadoop/hive/trunk/build.properties
    hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
    hadoop/hive/trunk/conf/hive-default.xml
    hadoop/hive/trunk/metastore/if/hive_metastore.thrift
    hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.cpp
    hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.h
    hadoop/hive/trunk/metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/Constants.java
    hadoop/hive/trunk/metastore/src/gen-php/hive_metastore_constants.php
    hadoop/hive/trunk/metastore/src/gen-py/hive_metastore/constants.py
    hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
    hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
    hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java
    hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java
    hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
    hadoop/hive/trunk/ql/src/test/templates/TestCliDriver.vm
    hadoop/hive/trunk/ql/src/test/templates/TestNegativeCliDriver.vm
    hadoop/hive/trunk/shims/build.xml
    hadoop/hive/trunk/shims/src/0.17/java/org/apache/hadoop/hive/shims/Hadoop17Shims.java
    hadoop/hive/trunk/shims/src/0.18/java/org/apache/hadoop/hive/shims/Hadoop18Shims.java
    hadoop/hive/trunk/shims/src/0.19/java/org/apache/hadoop/hive/shims/Hadoop19Shims.java
    hadoop/hive/trunk/shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java
    hadoop/hive/trunk/shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java
    hadoop/hive/trunk/shims/src/common/java/org/apache/hadoop/hive/shims/ShimLoader.java

Modified: hadoop/hive/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/CHANGES.txt?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/CHANGES.txt (original)
+++ hadoop/hive/trunk/CHANGES.txt Wed Jun  9 01:37:48 2010
@@ -84,6 +84,9 @@ Trunk -  Unreleased
     HIVE-1351. rcfilecat for debugging
     (Yongqiang He via namit)
 
+    HIVE-1332. Support for archiving partitions
+    (Paul Yang via namit)
+
   IMPROVEMENTS
     HIVE-983. Function from_unixtime takes long.
     (Ning Zhang via zshao)

Modified: hadoop/hive/trunk/build-common.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/build-common.xml?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/build-common.xml (original)
+++ hadoop/hive/trunk/build-common.xml Wed Jun  9 01:37:48 2010
@@ -203,6 +203,7 @@
   <!-- the normal classpath -->
   <path id="common-classpath">
     <pathelement location="${hadoop.jar}"/>
+    <pathelement location="${hadoop.tools.jar}"/>
     <pathelement location="${build.dir.hive}/classes"/>
     <fileset dir="${build.dir.hive}" includes="*/*.jar"/>
     <fileset dir="${hive.root}/lib" includes="*.jar"/>

Modified: hadoop/hive/trunk/build.properties
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/build.properties?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/build.properties (original)
+++ hadoop/hive/trunk/build.properties Wed Jun  9 01:37:48 2010
@@ -21,6 +21,7 @@ hadoop.version.ant-internal=${hadoop.ver
 hadoop.root.default=${build.dir.hadoop}/hadoop-${hadoop.version.ant-internal}
 hadoop.root=${hadoop.root.default}
 hadoop.jar=${hadoop.root}/hadoop-${hadoop.version.ant-internal}-core.jar
+hadoop.tools.jar=${hadoop.root}/hadoop-${hadoop.version.ant-internal}-tools.jar
 hadoop.test.jar=${hadoop.root}/hadoop-${hadoop.version.ant-internal}-test.jar
 jetty.test.jar=${hadoop.root}/lib/jetty-5.1.4.jar
 servlet.test.jar=${hadoop.root}/lib/servlet-api.jar

Modified: hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (original)
+++ hadoop/hive/trunk/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java Wed Jun  9 01:37:48 2010
@@ -124,9 +124,17 @@ public class HiveConf extends Configurat
     // datastore. Once reloaded, the  this value is reset to false. Used for
     // testing only.
     METASTOREFORCERELOADCONF("hive.metastore.force.reload.conf", false),
-
     METASTORESERVERMINTHREADS("hive.metastore.server.min.threads", 200),
     METASTORESERVERMAXTHREADS("hive.metastore.server.max.threads", Integer.MAX_VALUE),
+    // Intermediate dir suffixes used for archiving. Not important what they
+    // are, as long as collisions are avoided
+    METASTORE_INT_ORIGINAL("hive.metastore.archive.intermediate.original",
+        "_INTERMEDIATE_ORIGINAL"),
+    METASTORE_INT_ARCHIVED("hive.metastore.archive.intermediate.archived",
+        "_INTERMEDIATE_ARCHIVED"),
+    METASTORE_INT_EXTRACTED("hive.metastore.archive.intermediate.extracted",
+        "_INTERMEDIATE_EXTRACTED"),
+
     // CLI
     CLIIGNOREERRORS("hive.cli.errors.ignore", false),
 
@@ -238,6 +246,10 @@ public class HiveConf extends Configurat
     HIVEOPTBUCKETMAPJOIN("hive.optimize.bucketmapjoin", false), // optimize bucket map join
     HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false), // try to use sorted merge bucket map join
     HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true),
+
+    // For har files
+    HIVEARCHIVEENABLED("hive.archive.enabled", false),
+    HIVEHARPARENTDIRSETTABLE("hive.archive.har.parentdir.settable", false),
     ;
 
     public final String varname;

Modified: hadoop/hive/trunk/conf/hive-default.xml
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/conf/hive-default.xml?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/conf/hive-default.xml (original)
+++ hadoop/hive/trunk/conf/hive-default.xml Wed Jun  9 01:37:48 2010
@@ -563,4 +563,24 @@
   <description>The default partition name in case the dynamic partition column value is null/empty string or anyother values that cannot be escaped. This value must not contain any special character used in HDFS URI (e.g., ':', '%', '/' etc). The user has to be aware that the dynamic partition value should not contain this value to avoid confusions.</description>
 </property>
 
+<property>
+  <name>fs.har.impl</name>
+  <value>org.apache.hadoop.hive.shims.HiveHarFileSystem</value>
+  <description>The implementation for accessing Hadoop Archives. Note that this won't be applicable to Hadoop vers less than 0.20</description>
+</property>
+
+<property>
+  <name>hive.archive.enabled</name>
+  <value>false</value>
+  <description>Whether archiving operations are permitted</description>
+</property>
+
+<property>
+  <name>hive.archive.har.parentdir.settable</name>
+  <value>false</value>
+  <description>In new Hadoop versions, the parent directory must be set while 
+  creating a HAR. Because this functionality is hard to detect with just version 
+  numbers, this conf var needs to be set manually.</description>
+</property>
+
 </configuration>

Modified: hadoop/hive/trunk/metastore/if/hive_metastore.thrift
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/if/hive_metastore.thrift?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/if/hive_metastore.thrift (original)
+++ hadoop/hive/trunk/metastore/if/hive_metastore.thrift Wed Jun  9 01:37:48 2010
@@ -246,6 +246,15 @@ service ThriftHiveMetastore extends fb30
                           throws(1: MetaException o1)                     
 }
 
+// For storing info about archived partitions in parameters
+
+// Whether the partition is archived
+const string IS_ARCHIVED = "is_archived",
+// The original location of the partition, before archiving. After archiving,
+// this directory will contain the archive. When the partition
+// is dropped, this directory will be deleted
+const string ORIGINAL_LOCATION = "original_location",
+
 // these should be needed only for backward compatibility with filestore
 const string META_TABLE_COLUMNS   = "columns",
 const string META_TABLE_COLUMN_TYPES   = "columns.types",
@@ -261,3 +270,5 @@ const string FILE_INPUT_FORMAT    = "fil
 const string FILE_OUTPUT_FORMAT   = "file.outputformat",
 const string META_TABLE_STORAGE   = "storage_handler",
 
+
+

Modified: hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.cpp
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.cpp?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.cpp (original)
+++ hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.cpp Wed Jun  9 01:37:48 2010
@@ -12,6 +12,10 @@ const hive_metastoreConstants g_hive_met
 hive_metastoreConstants::hive_metastoreConstants() {
   DDL_TIME = "transient_lastDdlTime";
 
+  IS_ARCHIVED = "is_archived";
+
+  ORIGINAL_LOCATION = "original_location";
+
   META_TABLE_COLUMNS = "columns";
 
   META_TABLE_COLUMN_TYPES = "columns.types";

Modified: hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.h
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.h?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.h (original)
+++ hadoop/hive/trunk/metastore/src/gen-cpp/hive_metastore_constants.h Wed Jun  9 01:37:48 2010
@@ -15,6 +15,8 @@ class hive_metastoreConstants {
   hive_metastoreConstants();
 
   std::string DDL_TIME;
+  std::string IS_ARCHIVED;
+  std::string ORIGINAL_LOCATION;
   std::string META_TABLE_COLUMNS;
   std::string META_TABLE_COLUMN_TYPES;
   std::string BUCKET_FIELD_NAME;

Modified: hadoop/hive/trunk/metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/Constants.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/Constants.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/Constants.java (original)
+++ hadoop/hive/trunk/metastore/src/gen-javabean/org/apache/hadoop/hive/metastore/api/Constants.java Wed Jun  9 01:37:48 2010
@@ -18,6 +18,10 @@ public class Constants {
 
   public static final String DDL_TIME = "transient_lastDdlTime";
 
+  public static final String IS_ARCHIVED = "is_archived";
+
+  public static final String ORIGINAL_LOCATION = "original_location";
+
   public static final String META_TABLE_COLUMNS = "columns";
 
   public static final String META_TABLE_COLUMN_TYPES = "columns.types";

Modified: hadoop/hive/trunk/metastore/src/gen-php/hive_metastore_constants.php
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/gen-php/hive_metastore_constants.php?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/gen-php/hive_metastore_constants.php (original)
+++ hadoop/hive/trunk/metastore/src/gen-php/hive_metastore_constants.php Wed Jun  9 01:37:48 2010
@@ -10,6 +10,10 @@ $GLOBALS['hive_metastore_CONSTANTS'] = a
 
 $GLOBALS['hive_metastore_CONSTANTS']['DDL_TIME'] = "transient_lastDdlTime";
 
+$GLOBALS['hive_metastore_CONSTANTS']['IS_ARCHIVED'] = "is_archived";
+
+$GLOBALS['hive_metastore_CONSTANTS']['ORIGINAL_LOCATION'] = "original_location";
+
 $GLOBALS['hive_metastore_CONSTANTS']['META_TABLE_COLUMNS'] = "columns";
 
 $GLOBALS['hive_metastore_CONSTANTS']['META_TABLE_COLUMN_TYPES'] = "columns.types";

Modified: hadoop/hive/trunk/metastore/src/gen-py/hive_metastore/constants.py
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/gen-py/hive_metastore/constants.py?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/gen-py/hive_metastore/constants.py (original)
+++ hadoop/hive/trunk/metastore/src/gen-py/hive_metastore/constants.py Wed Jun  9 01:37:48 2010
@@ -9,6 +9,10 @@ from ttypes import *
 
 DDL_TIME = "transient_lastDdlTime"
 
+IS_ARCHIVED = "is_archived"
+
+ORIGINAL_LOCATION = "original_location"
+
 META_TABLE_COLUMNS = "columns"
 
 META_TABLE_COLUMN_TYPES = "columns.types"

Modified: hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java (original)
+++ hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveAlterHandler.java Wed Jun  9 01:37:48 2010
@@ -18,6 +18,8 @@
 package org.apache.hadoop.hive.metastore;
 
 import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.List;
 
 import org.apache.commons.lang.StringUtils;
@@ -148,9 +150,25 @@ public class HiveAlterHandler implements
         List<Partition> parts = msdb.getPartitions(dbname, name, 0);
         for (Partition part : parts) {
           String oldPartLoc = part.getSd().getLocation();
-          if (oldPartLoc.contains(oldTblLoc)) {
-            part.getSd().setLocation(
-                part.getSd().getLocation().replace(oldTblLoc, newTblLoc));
+          String oldTblLocPath = new Path(oldTblLoc).toUri().getPath();
+          String newTblLocPath = new Path(newTblLoc).toUri().getPath();
+          if (oldPartLoc.contains(oldTblLocPath)) {
+            URI newPartLocUri = null;
+            try {
+              URI oldPartLocUri = new URI(oldPartLoc);
+              newPartLocUri = new URI(
+                  oldPartLocUri.getScheme(),
+                  oldPartLocUri.getUserInfo(),
+                  oldPartLocUri.getHost(),
+                  oldPartLocUri.getPort(),
+                  oldPartLocUri.getPath().replace(oldTblLocPath, newTblLocPath),
+                  oldPartLocUri.getQuery(),
+                  oldPartLocUri.getFragment());
+            } catch (URISyntaxException e) {
+              throw new InvalidOperationException("Old partition location " +
+              		" is invalid. (" + oldPartLoc + ")");
+            }
+            part.getSd().setLocation(newPartLocUri.toString());
             msdb.alterPartition(dbname, name, part);
           }
         }

Modified: hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java (original)
+++ hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java Wed Jun  9 01:37:48 2010
@@ -1075,13 +1075,23 @@ public class HiveMetaStore extends Thrif
       boolean success = false;
       Path partPath = null;
       Table tbl = null;
+      Partition part = null;
+      boolean isArchived = false;
+      Path archiveParentDir = null;
+
       try {
         ms.openTransaction();
-        Partition part = get_partition(db_name, tbl_name, part_vals);
+        part = get_partition(db_name, tbl_name, part_vals);
+
         if (part == null) {
           throw new NoSuchObjectException("Partition doesn't exist. "
               + part_vals);
         }
+
+        isArchived = MetaStoreUtils.isArchived(part);
+        if (isArchived) {
+          archiveParentDir = MetaStoreUtils.getOriginalLocation(part);
+        }
         if (part.getSd() == null || part.getSd().getLocation() == null) {
           throw new MetaException("Partition metadata is corrupted");
         }
@@ -1094,9 +1104,17 @@ public class HiveMetaStore extends Thrif
       } finally {
         if (!success) {
           ms.rollbackTransaction();
-        } else if (deleteData && (partPath != null)) {
+        } else if (deleteData && ((partPath != null) || (archiveParentDir != null))) {
           if (tbl != null && !isExternal(tbl)) {
-            wh.deleteDir(partPath, true);
+            // Archived partitions have har:/to_har_file as their location.
+            // The original directory was saved in params
+            if (isArchived) {
+              assert(archiveParentDir != null);
+              wh.deleteDir(archiveParentDir, true);
+            } else {
+              assert(partPath != null);
+              wh.deleteDir(partPath, true);
+            }
             // ok even if the data is not deleted
           }
         }

Modified: hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java (original)
+++ hadoop/hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java Wed Jun  9 01:37:48 2010
@@ -834,6 +834,26 @@ public class MetaStoreUtils {
     return "TRUE".equalsIgnoreCase(params.get("EXTERNAL"));
   }
 
+  public static boolean isArchived(
+      org.apache.hadoop.hive.metastore.api.Partition part) {
+    Map<String, String> params = part.getParameters();
+    if ("true".equalsIgnoreCase(params.get(Constants.IS_ARCHIVED))) {
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  public static Path getOriginalLocation(
+      org.apache.hadoop.hive.metastore.api.Partition part) {
+    Map<String, String> params = part.getParameters();
+    assert(isArchived(part));
+    String originalLocation = params.get(Constants.ORIGINAL_LOCATION);
+    assert( originalLocation != null);
+
+    return new Path(originalLocation);
+  }
+
   public static boolean isNonNativeTable(Table table) {
     if (table == null) {
       return false;

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java Wed Jun  9 01:37:48 2010
@@ -28,6 +28,8 @@ import java.io.IOException;
 import java.io.OutputStreamWriter;
 import java.io.Serializable;
 import java.io.Writer;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.HashSet;
 import java.util.Iterator;
@@ -43,8 +45,10 @@ import org.apache.commons.logging.LogFac
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FsShell;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.TableType;
 import org.apache.hadoop.hive.metastore.Warehouse;
@@ -66,6 +70,7 @@ import org.apache.hadoop.hive.ql.metadat
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.plan.AddPartitionDesc;
 import org.apache.hadoop.hive.ql.plan.AlterTableDesc;
+import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc;
 import org.apache.hadoop.hive.ql.plan.CreateTableDesc;
 import org.apache.hadoop.hive.ql.plan.CreateTableLikeDesc;
 import org.apache.hadoop.hive.ql.plan.CreateViewDesc;
@@ -78,7 +83,7 @@ import org.apache.hadoop.hive.ql.plan.Sh
 import org.apache.hadoop.hive.ql.plan.ShowPartitionsDesc;
 import org.apache.hadoop.hive.ql.plan.ShowTableStatusDesc;
 import org.apache.hadoop.hive.ql.plan.ShowTablesDesc;
-import org.apache.hadoop.hive.ql.plan.TouchDesc;
+import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
 import org.apache.hadoop.hive.ql.plan.api.StageType;
 import org.apache.hadoop.hive.serde.Constants;
 import org.apache.hadoop.hive.serde2.Deserializer;
@@ -88,8 +93,9 @@ import org.apache.hadoop.hive.serde2.Ser
 import org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe;
 import org.apache.hadoop.hive.serde2.dynamic_type.DynamicSerDe;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
+import org.apache.hadoop.hive.shims.HadoopShims;
 import org.apache.hadoop.hive.shims.ShimLoader;
-
+import org.apache.hadoop.util.ToolRunner;
 /**
  * DDLTask implementation.
  *
@@ -102,6 +108,12 @@ public class DDLTask extends Task<DDLWor
   private static final int separator = Utilities.tabCode;
   private static final int terminator = Utilities.newLineCode;
 
+  // These are suffixes attached to intermediate directory names used in the
+  // archiving / un-archiving process.
+  private static String INTERMEDIATE_ARCHIVED_DIR_SUFFIX;
+  private static String INTERMEDIATE_ORIGINAL_DIR_SUFFIX;
+  private static String INTERMEDIATE_EXTRACTED_DIR_SUFFIX;
+
   public DDLTask() {
     super();
   }
@@ -110,6 +122,13 @@ public class DDLTask extends Task<DDLWor
   public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext ctx) {
     super.initialize(conf, queryPlan, ctx);
     this.conf = conf;
+
+    INTERMEDIATE_ARCHIVED_DIR_SUFFIX =
+      HiveConf.getVar(conf, ConfVars.METASTORE_INT_ARCHIVED);
+    INTERMEDIATE_ORIGINAL_DIR_SUFFIX =
+      HiveConf.getVar(conf, ConfVars.METASTORE_INT_ORIGINAL);
+    INTERMEDIATE_EXTRACTED_DIR_SUFFIX =
+      HiveConf.getVar(conf, ConfVars.METASTORE_INT_EXTRACTED);
   }
 
   @Override
@@ -150,12 +169,17 @@ public class DDLTask extends Task<DDLWor
         return addPartition(db, addPartitionDesc);
       }
 
-      TouchDesc touchDesc = work.getTouchDesc();
-      if (touchDesc != null) {
-        return touch(db, touchDesc);
-      }
-
+      AlterTableSimpleDesc simpleDesc = work.getAlterTblSimpleDesc();
 
+      if(simpleDesc != null) {
+        if (simpleDesc.getType() == AlterTableTypes.TOUCH) {
+          return touch(db, simpleDesc);
+        } else if (simpleDesc.getType() == AlterTableTypes.ARCHIVE) {
+          return archive(db, simpleDesc, driverContext);
+        } else if (simpleDesc.getType() == AlterTableTypes.UNARCHIVE) {
+          return unarchive(db, simpleDesc);
+        }
+      }
       MsckDesc msckDesc = work.getMsckDesc();
       if (msckDesc != null) {
         return msck(db, msckDesc);
@@ -257,7 +281,7 @@ public class DDLTask extends Task<DDLWor
    * @return
    * @throws HiveException
    */
-  private int touch(Hive db, TouchDesc touchDesc)
+  private int touch(Hive db, AlterTableSimpleDesc touchDesc)
       throws HiveException {
 
     String dbName = touchDesc.getDbName();
@@ -290,6 +314,499 @@ public class DDLTask extends Task<DDLWor
     }
     return 0;
   }
+  /**
+   * Determines whether a partition has been archived
+   *
+   * @param p
+   * @return
+   */
+
+  private boolean isArchived(Partition p) {
+    Map<String, String> params = p.getParameters();
+    if ("true".equalsIgnoreCase(params.get(
+        org.apache.hadoop.hive.metastore.api.Constants.IS_ARCHIVED))) {
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  private void setIsArchived(Partition p, boolean state) {
+    Map<String, String> params = p.getParameters();
+    if (state) {
+      params.put(org.apache.hadoop.hive.metastore.api.Constants.IS_ARCHIVED,
+          "true");
+    } else {
+      params.remove(org.apache.hadoop.hive.metastore.api.Constants.IS_ARCHIVED);
+    }
+  }
+
+  private String getOriginalLocation(Partition p) {
+    Map<String, String> params = p.getParameters();
+    return params.get(
+        org.apache.hadoop.hive.metastore.api.Constants.ORIGINAL_LOCATION);
+  }
+
+  private void setOriginalLocation(Partition p, String loc) {
+    Map<String, String> params = p.getParameters();
+    if (loc == null) {
+      params.remove(org.apache.hadoop.hive.metastore.api.Constants.ORIGINAL_LOCATION);
+    } else {
+      params.put(org.apache.hadoop.hive.metastore.api.Constants.ORIGINAL_LOCATION, loc);
+    }
+  }
+
+  // Returns only the path component of the URI
+  private String getArchiveDirOnly(Path parentDir, String archiveName) {
+    URI parentUri = parentDir.toUri();
+    Path harDir = new Path(parentUri.getPath(), archiveName);
+    return harDir.toString();
+  }
+
+  /**
+   * Sets the appropriate attributes in the supplied Partition object to mark
+   * it as archived. Note that the metastore is not touched - a separate
+   * call to alter_partition is needed.
+   *
+   * @param p - the partition object to modify
+   * @param parentDir - the parent directory of the archive, which is the
+   * original directory that the partition's files resided in
+   * @param dirInArchive - the directory within the archive file that contains
+   * the partitions files
+   * @param archiveName - the name of the archive
+   * @throws URISyntaxException
+   */
+  private void setArchived(Partition p, Path parentDir, String dirInArchive, String archiveName)
+      throws URISyntaxException {
+    assert(isArchived(p) == false);
+    Map<String, String> params = p.getParameters();
+
+    URI parentUri = parentDir.toUri();
+    String parentHost = parentUri.getHost();
+    String harHost = null;
+    if (parentHost == null) {
+     harHost = "";
+    } else {
+      harHost = parentUri.getScheme() + "-" + parentHost;
+    }
+
+    // harUri is used to access the partition's files, which are in the archive
+    // The format of the RI is something like:
+    // har://underlyingfsscheme-host:port/archivepath
+    URI harUri = null;
+    if (dirInArchive.length() == 0) {
+      harUri = new URI("har", parentUri.getUserInfo(), harHost, parentUri.getPort(),
+          getArchiveDirOnly(parentDir, archiveName),
+          parentUri.getQuery(), parentUri.getFragment());
+    } else {
+      harUri = new URI("har", parentUri.getUserInfo(), harHost, parentUri.getPort(),
+          new Path(getArchiveDirOnly(parentDir, archiveName), dirInArchive).toUri().getPath(),
+          parentUri.getQuery(), parentUri.getFragment());
+    }
+    setIsArchived(p, true);
+    setOriginalLocation(p, parentDir.toString());
+    p.setLocation(harUri.toString());
+  }
+
+  /**
+   * Sets the appropriate attributes in the supplied Partition object to mark
+   * it as not archived. Note that the metastore is not touched - a separate
+   * call to alter_partition is needed.
+   *
+   * @param p - the partition to modify
+   */
+  private void setUnArchived(Partition p) {
+    assert(isArchived(p) == true);
+    String parentDir = getOriginalLocation(p);
+    setIsArchived(p, false);
+    setOriginalLocation(p, null);
+    assert(parentDir != null);
+    p.setLocation(parentDir);
+  }
+
+  private boolean pathExists(FileSystem fs, Path p) throws HiveException {
+    try {
+      return fs.exists(p);
+    } catch (IOException e) {
+      throw new HiveException(e);
+    }
+  }
+
+  private void moveDir(FileSystem fs, Path from, Path to) throws HiveException {
+    try {
+      if (!fs.rename(from, to)) {
+        throw new HiveException("Moving " + from + " to " + to + " failed!");
+      }
+    } catch (IOException e) {
+      throw new HiveException(e);
+    }
+  }
+
+  private void deleteDir(Path dir) throws HiveException {
+    try {
+      Warehouse wh = new Warehouse(conf);
+      wh.deleteDir(dir, true);
+    } catch (MetaException e) {
+      throw new HiveException(e);
+    }
+  }
+
+  private int archive(Hive db, AlterTableSimpleDesc simpleDesc, DriverContext driverContext)
+      throws HiveException {
+    String dbName = simpleDesc.getDbName();
+    String tblName = simpleDesc.getTableName();
+
+    Table tbl = db.getTable(dbName, tblName);
+    validateAlterTableType(tbl, AlterTableDesc.AlterTableTypes.ARCHIVE);
+
+    Map<String, String> partSpec = simpleDesc.getPartSpec();
+    Partition p = db.getPartition(tbl, partSpec, false);
+
+    if (tbl.getTableType() != TableType.MANAGED_TABLE) {
+      throw new HiveException("ARCHIVE can only be performed on managed tables");
+    }
+
+    if (p == null) {
+      throw new HiveException("Specified partition does not exist");
+    }
+
+    if (isArchived(p)) {
+      // If there were a failure right after the metadata was updated in an
+      // archiving operation, it's possible that the original, unarchived files
+      // weren't deleted.
+      Path originalDir = new Path(getOriginalLocation(p));
+      Path leftOverIntermediateOriginal = new Path(originalDir.getParent(),
+          originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX);
+      try {
+        if (pathExists(leftOverIntermediateOriginal.getFileSystem(conf),
+            leftOverIntermediateOriginal)) {
+          console.printInfo("Deleting " + leftOverIntermediateOriginal +
+              " left over from a previous archiving operation");
+          deleteDir(leftOverIntermediateOriginal);
+        }
+      } catch (IOException e) {
+        throw new HiveException(e);
+      }
+      throw new HiveException("Specified partition is already archived");
+    }
+
+    Path originalDir = p.getPartitionPath();
+    Path intermediateArchivedDir = new Path(originalDir.getParent(),
+        originalDir.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
+    Path intermediateOriginalDir = new Path(originalDir.getParent(),
+        originalDir.getName() + INTERMEDIATE_ORIGINAL_DIR_SUFFIX);
+    String archiveName = "data.har";
+    FileSystem fs = null;
+    try {
+      fs = originalDir.getFileSystem(conf);
+    } catch (IOException e) {
+      throw new HiveException(e);
+    }
+
+    // The following steps seem roundabout, but they are meant to aid in
+    // recovery if a failure occurs and to keep a consistent state in the FS
+
+    // Steps:
+    // 1. Create the archive in a temporary folder
+    // 2. Move the archive dir to an intermediate dir that is in at the same
+    //    dir as the original partition dir. Call the new dir
+    //    intermediate-archive.
+    // 3. Rename the original partition dir to an intermediate dir. Call the
+    //    renamed dir intermediate-original
+    // 4. Rename intermediate-archive to the original partition dir
+    // 5. Change the metadata
+    // 6. Delete the original partition files in intermediate-original
+
+    // The original partition files are deleted after the metadata change
+    // because the presence of those files are used to indicate whether
+    // the original partition directory contains archived or unarchived files.
+
+    // Create an archived version of the partition in a directory ending in
+    // ARCHIVE_INTERMEDIATE_DIR_SUFFIX that's the same level as the partition,
+    // if it does not already exist. If it does exist, we assume the dir is good
+    // to use as the move operation that created it is atomic.
+    if (!pathExists(fs, intermediateArchivedDir) &&
+        !pathExists(fs, intermediateOriginalDir)) {
+
+      // First create the archive in a tmp dir so that if the job fails, the
+      // bad files don't pollute the filesystem
+      Path tmpDir = new Path(driverContext.getCtx().getMRScratchDir(), "partlevel");
+
+      console.printInfo("Creating " + archiveName + " for " + originalDir.toString());
+      console.printInfo("in " + tmpDir);
+      console.printInfo("Please wait... (this may take a while)");
+
+      // Create the Hadoop archive
+      HadoopShims shim = ShimLoader.getHadoopShims();
+      int ret=0;
+      try {
+        ret = shim.createHadoopArchive(conf, originalDir, tmpDir, archiveName);
+      } catch (Exception e) {
+        throw new HiveException(e);
+      }
+      if (ret != 0) {
+        throw new HiveException("Error while creating HAR");
+      }
+      // Move from the tmp dir to an intermediate directory, in the same level as
+      // the partition directory. e.g. .../hr=12-intermediate-archived
+      try {
+        console.printInfo("Moving " + tmpDir + " to " + intermediateArchivedDir);
+        if (pathExists(fs, intermediateArchivedDir)) {
+          throw new HiveException("The intermediate archive directory already exists.");
+        }
+        fs.rename(tmpDir, intermediateArchivedDir);
+      } catch (IOException e) {
+        throw new HiveException("Error while moving tmp directory");
+      }
+    } else {
+      if (pathExists(fs, intermediateArchivedDir)) {
+        console.printInfo("Intermediate archive directory " + intermediateArchivedDir +
+        " already exists. Assuming it contains an archived version of the partition");
+      }
+    }
+
+    // If we get to here, we know that we've archived the partition files, but
+    // they may be in the original partition location, or in the intermediate
+    // original dir.
+
+    // Move the original parent directory to the intermediate original directory
+    // if the move hasn't been made already
+    if (!pathExists(fs, intermediateOriginalDir)) {
+      console.printInfo("Moving " + originalDir + " to " +
+          intermediateOriginalDir);
+      moveDir(fs, originalDir, intermediateOriginalDir);
+    } else {
+      console.printInfo(intermediateOriginalDir + " already exists. " +
+          "Assuming it contains the original files in the partition");
+    }
+
+    // If there's a failure from here to when the metadata is updated,
+    // there will be no data in the partition, or an error while trying to read
+    // the partition (if the archive files have been moved to the original
+    // partition directory.) But re-running the archive command will allow
+    // recovery
+
+    // Move the intermediate archived directory to the original parent directory
+    if (!pathExists(fs, originalDir)) {
+      console.printInfo("Moving " + intermediateArchivedDir + " to " +
+          originalDir);
+      moveDir(fs, intermediateArchivedDir, originalDir);
+    } else {
+      console.printInfo(originalDir + " already exists. " +
+          "Assuming it contains the archived version of the partition");
+    }
+
+    // Record this change in the metastore
+    try {
+      boolean parentSettable =
+        conf.getBoolVar(HiveConf.ConfVars.HIVEHARPARENTDIRSETTABLE);
+
+      // dirInArchive is the directory within the archive that has all the files
+      // for this partition. With older versions of Hadoop, archiving a
+      // a directory would produce the same directory structure
+      // in the archive. So if you created myArchive.har of /tmp/myDir, the
+      // files in /tmp/myDir would be located under myArchive.har/tmp/myDir/*
+      // In this case, dirInArchive should be tmp/myDir
+
+      // With newer versions of Hadoop, the parent directory could be specified.
+      // Assuming the parent directory was set to /tmp/myDir when creating the
+      // archive, the files can be found under myArchive.har/*
+      // In this case, dirInArchive should be empty
+
+      String dirInArchive = "";
+      if (!parentSettable) {
+        dirInArchive = originalDir.toUri().getPath();
+        if(dirInArchive.length() > 1 && dirInArchive.charAt(0)=='/') {
+          dirInArchive = dirInArchive.substring(1);
+        }
+      }
+      setArchived(p, originalDir, dirInArchive, archiveName);
+      db.alterPartition(tblName, p);
+    } catch (Exception e) {
+      throw new HiveException("Unable to change the partition info for HAR", e);
+    }
+
+    // If a failure occurs here, the directory containing the original files
+    // will not be deleted. The user will run ARCHIVE again to clear this up
+    deleteDir(intermediateOriginalDir);
+
+
+    return 0;
+  }
+
+  private int unarchive(Hive db, AlterTableSimpleDesc simpleDesc)
+      throws HiveException {
+    String dbName = simpleDesc.getDbName();
+    String tblName = simpleDesc.getTableName();
+
+    Table tbl = db.getTable(dbName, tblName);
+    validateAlterTableType(tbl, AlterTableDesc.AlterTableTypes.UNARCHIVE);
+
+    // Means user specified a table, not a partition
+    if (simpleDesc.getPartSpec() == null) {
+      throw new HiveException("ARCHIVE is for partitions only");
+    }
+
+    Map<String, String> partSpec = simpleDesc.getPartSpec();
+    Partition p = db.getPartition(tbl, partSpec, false);
+
+    if (tbl.getTableType() != TableType.MANAGED_TABLE) {
+      throw new HiveException("UNARCHIVE can only be performed on managed tables");
+    }
+
+    if (p == null) {
+      throw new HiveException("Specified partition does not exist");
+    }
+
+    if (!isArchived(p)) {
+      Path location = new Path(p.getLocation());
+      Path leftOverArchiveDir = new Path(location.getParent(),
+          location.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
+
+      try {
+        if (pathExists(location.getFileSystem(conf), leftOverArchiveDir)) {
+          console.printInfo("Deleting " + leftOverArchiveDir + " left over " +
+          "from a previous unarchiving operation");
+          deleteDir(leftOverArchiveDir);
+        }
+      } catch (IOException e) {
+        throw new HiveException(e);
+      }
+      throw new HiveException("Specified partition is not archived");
+    }
+
+    Path originalLocation = new Path(getOriginalLocation(p));
+    Path sourceDir = new Path(p.getLocation());
+    Path intermediateArchiveDir = new Path(originalLocation.getParent(),
+        originalLocation.getName() + INTERMEDIATE_ARCHIVED_DIR_SUFFIX);
+    Path intermediateExtractedDir = new Path(originalLocation.getParent(),
+        originalLocation.getName() + INTERMEDIATE_EXTRACTED_DIR_SUFFIX);
+
+    Path tmpDir = new Path(driverContext.getCtx().getMRScratchDir());
+
+    FileSystem fs = null;
+    try {
+      fs = tmpDir.getFileSystem(conf);
+      // Verify that there are no files in the tmp dir, because if there are, it
+      // would be copied to the partition
+      FileStatus [] filesInTmpDir = fs.listStatus(tmpDir);
+      if (filesInTmpDir.length != 0) {
+        for (FileStatus file : filesInTmpDir) {
+          console.printInfo(file.getPath().toString());
+        }
+        throw new HiveException("Temporary directory " + tmpDir + " is not empty");
+      }
+
+    } catch (IOException e) {
+      throw new HiveException(e);
+    }
+
+    // Some sanity checks
+    if (originalLocation == null) {
+      throw new HiveException("Missing archive data in the partition");
+    }
+    if (!"har".equals(sourceDir.toUri().getScheme())) {
+      throw new HiveException("Location should refer to a HAR");
+    }
+
+    // Clarification of terms:
+    // - The originalLocation directory represents the original directory of the
+    //   partition's files. They now contain an archived version of those files
+    //   eg. hdfs:/warehouse/myTable/ds=1/
+    // - The source directory is the directory containing all the files that
+    //   should be in the partition. e.g. har:/warehouse/myTable/ds=1/myTable.har/
+    //   Note the har:/ scheme
+
+    // Steps:
+    // 1. Extract the archive in a temporary folder
+    // 2. Move the archive dir to an intermediate dir that is in at the same
+    //    dir as originalLocation. Call the new dir intermediate-extracted.
+    // 3. Rename the original partition dir to an intermediate dir. Call the
+    //    renamed dir intermediate-archive
+    // 4. Rename intermediate-extracted to the original partition dir
+    // 5. Change the metadata
+    // 6. Delete the archived partition files in intermediate-archive
+
+    if (!pathExists(fs, intermediateExtractedDir) &&
+        !pathExists(fs, intermediateArchiveDir)) {
+      try {
+
+        // Copy the files out of the archive into the temporary directory
+        String copySource = (new Path(sourceDir, "*")).toString();
+        String copyDest = tmpDir.toString();
+        List<String> args = new ArrayList<String>();
+        args.add("-cp");
+        args.add(copySource);
+        args.add(copyDest);
+
+        console.printInfo("Copying " + copySource + " to " + copyDest);
+        FsShell fss = new FsShell(conf);
+        int ret = 0;
+        try {
+          ret = ToolRunner.run(fss, args.toArray(new String[0]));
+        } catch (Exception e) {
+          throw new HiveException(e);
+        }
+        if (ret != 0) {
+          throw new HiveException("Error while copying files from archive");
+        }
+
+        console.printInfo("Moving " + tmpDir + " to " + intermediateExtractedDir);
+        if (fs.exists(intermediateExtractedDir)) {
+          throw new HiveException("Invalid state: the intermediate extracted " +
+              "directory already exists.");
+        }
+        fs.rename(tmpDir, intermediateExtractedDir);
+      } catch (Exception e) {
+        throw new HiveException(e);
+      }
+    }
+
+    // At this point, we know that the extracted files are in the intermediate
+    // extracted dir, or in the the original directory.
+
+    if (!pathExists(fs, intermediateArchiveDir)) {
+      try {
+        console.printInfo("Moving " + originalLocation + " to " + intermediateArchiveDir);
+        fs.rename(originalLocation, intermediateArchiveDir);
+      } catch (IOException e) {
+        throw new HiveException(e);
+      }
+    } else {
+      console.printInfo(intermediateArchiveDir + " already exists. " +
+      "Assuming it contains the archived version of the partition");
+    }
+
+    // If there is a failure from here to until when the metadata is changed,
+    // the partition will be empty or throw errors on read.
+
+    // If the original location exists here, then it must be the extracted files
+    // because in the previous step, we moved the previous original location
+    // (containing the archived version of the files) to intermediateArchiveDir
+    if (!pathExists(fs, originalLocation)) {
+      try {
+        console.printInfo("Moving " + intermediateExtractedDir + " to " + originalLocation);
+        fs.rename(intermediateExtractedDir, originalLocation);
+      } catch (IOException e) {
+        throw new HiveException(e);
+      }
+    } else {
+      console.printInfo(originalLocation + " already exists. " +
+      "Assuming it contains the extracted files in the partition");
+    }
+
+    setUnArchived(p);
+    try {
+      db.alterPartition(tblName, p);
+    } catch (InvalidOperationException e) {
+      throw new HiveException(e);
+    }
+    // If a failure happens here, the intermediate archive files won't be
+    // deleted. The user will need to call unarchive again to clear those up.
+    deleteDir(intermediateArchiveDir);
+
+    return 0;
+  }
 
   private void validateAlterTableType(
     Table tbl, AlterTableDesc.AlterTableTypes alterType)  throws HiveException {

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Partition.java Wed Jun  9 01:37:48 2010
@@ -51,7 +51,7 @@ import org.apache.thrift.transport.TMemo
 
 /**
  * A Hive Table Partition: is a fundamental storage unit within a Table.
- * 
+ *
  * Please note that the ql code should always go through methods of this class to access the
  * metadata, instead of directly accessing org.apache.hadoop.hive.metastore.api.Partition.
  * This helps to isolate the metastore code and the ql code.
@@ -72,7 +72,7 @@ public class Partition implements Serial
   private Class<? extends HiveOutputFormat> outputFormatClass;
   private Class<? extends InputFormat> inputFormatClass;
   private URI uri;
-  
+
   /**
    * @return The values of the partition
    * @see org.apache.hadoop.hive.metastore.api.Partition#getValues()
@@ -82,17 +82,17 @@ public class Partition implements Serial
   }
 
   /**
-   * Used only for serialization. 
+   * Used only for serialization.
    */
   public Partition() {
   }
-  
+
   /**
    * create an empty partition.
    * SemanticAnalyzer code requires that an empty partition when the table is not partitioned.
    */
   public Partition(Table tbl) throws HiveException {
-    org.apache.hadoop.hive.metastore.api.Partition tPart = 
+    org.apache.hadoop.hive.metastore.api.Partition tPart =
         new org.apache.hadoop.hive.metastore.api.Partition();
     tPart.setSd(tbl.getTTable().getSd()); // TODO: get a copy
     initialize(tbl, tPart);
@@ -105,7 +105,7 @@ public class Partition implements Serial
 
   /**
    * Create partition object with the given info.
-   * 
+   *
    * @param tbl
    *          Table the partition will be in.
    * @param partSpec
@@ -158,7 +158,7 @@ public class Partition implements Serial
 
   /**
    * Initializes this object with the given variables
-   * 
+   *
    * @param table
    *          Table the partition belongs to
    * @param tPartition
@@ -265,7 +265,7 @@ public class Partition implements Serial
         clsName = tPartition.getSd().getInputFormat();
       }
       if (clsName == null) {
-        clsName = org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName(); 
+        clsName = org.apache.hadoop.mapred.SequenceFileInputFormat.class.getName();
       }
       try {
         inputFormatClass = ((Class<? extends InputFormat>) Class.forName(clsName, true,
@@ -285,10 +285,10 @@ public class Partition implements Serial
         clsName = tPartition.getSd().getOutputFormat();
       }
       if (clsName == null) {
-        clsName = HiveSequenceFileOutputFormat.class.getName(); 
+        clsName = HiveSequenceFileOutputFormat.class.getName();
       }
       try {
-        Class<?> c = (Class<? extends HiveOutputFormat>)(Class.forName(clsName, true,
+        Class<?> c = (Class.forName(clsName, true,
             JavaUtils.getClassLoader()));
         // Replace FileOutputFormat for backward compatibility
         if (!HiveOutputFormat.class.isAssignableFrom(c)) {
@@ -312,7 +312,7 @@ public class Partition implements Serial
     /*
      * TODO: Keeping this code around for later use when we will support
      * sampling on tables which are not created with CLUSTERED INTO clause
-     * 
+     *
      * // read from table meta data int numBuckets = this.table.getNumBuckets();
      * if (numBuckets == -1) { // table meta data does not have bucket
      * information // check if file system has multiple buckets(files) in this
@@ -344,7 +344,9 @@ public class Partition implements Serial
   @SuppressWarnings("nls")
   public Path getBucketPath(int bucketNum) {
     try {
-      FileSystem fs = FileSystem.get(table.getDataLocation(), Hive.get()
+      // Previously, this got the filesystem of the Table, which could be
+      // different from the filesystem of the partition.
+      FileSystem fs = FileSystem.get(getPartitionPath().toUri(), Hive.get()
           .getConf());
       String pathPattern = getPartitionPath().toString();
       if (getBucketCount() > 0) {
@@ -445,7 +447,7 @@ public class Partition implements Serial
   public org.apache.hadoop.hive.metastore.api.Partition getTPartition() {
     return tPartition;
   }
-  
+
   /**
    * Should be only used by serialization.
    */
@@ -462,5 +464,11 @@ public class Partition implements Serial
     return tPartition.getSd().getCols();
   }
 
-  
+  public String getLocation() {
+    return tPartition.getSd().getLocation();
+  }
+
+  public void setLocation(String location) {
+    tPartition.getSd().setLocation(location);
+  }
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/DDLSemanticAnalyzer.java Wed Jun  9 01:37:48 2010
@@ -20,11 +20,14 @@ package org.apache.hadoop.hive.ql.parse;
 
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Properties;
+import java.util.Set;
+import java.util.Map.Entry;
 
 import org.antlr.runtime.tree.CommonTree;
 import org.antlr.runtime.tree.Tree;
@@ -32,6 +35,7 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
 import org.apache.hadoop.hive.metastore.MetaStoreUtils;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.Order;
@@ -40,6 +44,7 @@ import org.apache.hadoop.hive.ql.exec.Ta
 import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat;
 import org.apache.hadoop.hive.ql.plan.AddPartitionDesc;
 import org.apache.hadoop.hive.ql.plan.AlterTableDesc;
+import org.apache.hadoop.hive.ql.plan.AlterTableSimpleDesc;
 import org.apache.hadoop.hive.ql.plan.DDLWork;
 import org.apache.hadoop.hive.ql.plan.DescFunctionDesc;
 import org.apache.hadoop.hive.ql.plan.DescTableDesc;
@@ -51,7 +56,6 @@ import org.apache.hadoop.hive.ql.plan.Sh
 import org.apache.hadoop.hive.ql.plan.ShowTableStatusDesc;
 import org.apache.hadoop.hive.ql.plan.ShowTablesDesc;
 import org.apache.hadoop.hive.ql.plan.TableDesc;
-import org.apache.hadoop.hive.ql.plan.TouchDesc;
 import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
 import org.apache.hadoop.hive.serde.Constants;
 import org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe;
@@ -64,6 +68,8 @@ import org.apache.hadoop.mapred.TextInpu
 public class DDLSemanticAnalyzer extends BaseSemanticAnalyzer {
   private static final Log LOG = LogFactory.getLog("hive.ql.parse.DDLSemanticAnalyzer");
   public static final Map<Integer, String> TokenToTypeName = new HashMap<Integer, String>();
+
+  public static final Set<String> reservedPartitionValues = new HashSet<String>();
   static {
     TokenToTypeName.put(HiveParser.TOK_BOOLEAN, Constants.BOOLEAN_TYPE_NAME);
     TokenToTypeName.put(HiveParser.TOK_TINYINT, Constants.TINYINT_TYPE_NAME);
@@ -89,6 +95,12 @@ public class DDLSemanticAnalyzer extends
 
   public DDLSemanticAnalyzer(HiveConf conf) throws SemanticException {
     super(conf);
+    // Partition can't have this name
+    reservedPartitionValues.add(HiveConf.getVar(conf, ConfVars.DEFAULTPARTITIONNAME));
+    // Partition value can't end in this suffix
+    reservedPartitionValues.add(HiveConf.getVar(conf, ConfVars.METASTORE_INT_ORIGINAL));
+    reservedPartitionValues.add(HiveConf.getVar(conf, ConfVars.METASTORE_INT_ARCHIVED));
+    reservedPartitionValues.add(HiveConf.getVar(conf, ConfVars.METASTORE_INT_EXTRACTED));
   }
 
   @Override
@@ -121,6 +133,10 @@ public class DDLSemanticAnalyzer extends
       analyzeAlterTableRename(ast);
     } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_TOUCH) {
       analyzeAlterTableTouch(ast);
+    } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_ARCHIVE) {
+      analyzeAlterTableArchive(ast, false);
+    } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_UNARCHIVE) {
+      analyzeAlterTableArchive(ast, true);
     } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_ADDCOLS) {
       analyzeAlterTableModifyCols(ast, AlterTableTypes.ADDCOLS);
     } else if (ast.getToken().getType() == HiveParser.TOK_ALTERTABLE_REPLACECOLS) {
@@ -551,6 +567,7 @@ public class DDLSemanticAnalyzer extends
         break;
       case HiveParser.TOK_PARTSPEC:
         if (currentPart != null) {
+          validatePartitionValues(currentPart);
           AddPartitionDesc addPartitionDesc = new AddPartitionDesc(
               MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, currentPart,
               currentLocation, ifNotExists);
@@ -572,6 +589,7 @@ public class DDLSemanticAnalyzer extends
 
     // add the last one
     if (currentPart != null) {
+      validatePartitionValues(currentPart);
       AddPartitionDesc addPartitionDesc = new AddPartitionDesc(
           MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, currentPart,
           currentLocation, ifNotExists);
@@ -599,20 +617,50 @@ public class DDLSemanticAnalyzer extends
     List<Map<String, String>> partSpecs = getPartitionSpecs(ast);
 
     if (partSpecs.size() == 0) {
-      TouchDesc touchDesc = new TouchDesc(
-          MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, null);
+      AlterTableSimpleDesc touchDesc = new AlterTableSimpleDesc(
+          MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, null,
+          AlterTableDesc.AlterTableTypes.TOUCH);
       rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
           touchDesc), conf));
     } else {
       for (Map<String, String> partSpec : partSpecs) {
-        TouchDesc touchDesc = new TouchDesc(
-            MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, partSpec);
+        AlterTableSimpleDesc touchDesc = new AlterTableSimpleDesc(
+            MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, partSpec,
+            AlterTableDesc.AlterTableTypes.TOUCH);
         rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
             touchDesc), conf));
       }
     }
   }
 
+  private void analyzeAlterTableArchive(CommonTree ast, boolean isUnArchive)
+      throws SemanticException {
+
+    if (!conf.getBoolVar(HiveConf.ConfVars.HIVEARCHIVEENABLED)) {
+      throw new SemanticException(ErrorMsg.ARCHIVE_METHODS_DISABLED.getMsg());
+
+    }
+    String tblName = unescapeIdentifier(ast.getChild(0).getText());
+    // partition name to value
+    List<Map<String, String>> partSpecs = getPartitionSpecs(ast);
+    if (partSpecs.size() > 1 ) {
+      throw new SemanticException(isUnArchive ?
+          ErrorMsg.UNARCHIVE_ON_MULI_PARTS.getMsg() :
+          ErrorMsg.ARCHIVE_ON_MULI_PARTS.getMsg());
+    }
+    if (partSpecs.size() == 0) {
+      throw new SemanticException(ErrorMsg.ARCHIVE_ON_TABLE.getMsg());
+    }
+
+    Map<String,String> partSpec = partSpecs.get(0);
+      AlterTableSimpleDesc archiveDesc = new AlterTableSimpleDesc(
+          MetaStoreUtils.DEFAULT_DATABASE_NAME, tblName, partSpec,
+          (isUnArchive ? AlterTableTypes.UNARCHIVE : AlterTableTypes.ARCHIVE));
+      rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
+          archiveDesc), conf));
+
+  }
+
   /**
    * Verify that the information in the metastore matches up with the data on
    * the fs.
@@ -667,4 +715,25 @@ public class DDLSemanticAnalyzer extends
     }
     return partSpecs;
   }
+
+  /**
+   * Certain partition values are are used by hive. e.g. the default partition
+   * in dynamic partitioning and the intermediate partition values used in the
+   * archiving process. Naturally, prohibit the user from creating partitions
+   * with these reserved values. The check that this function is more
+   * restrictive than the actual limitation, but it's simpler. Should be okay
+   * since the reserved names are fairly long and uncommon.
+   */
+  private void validatePartitionValues(Map<String, String> partSpec)
+      throws SemanticException {
+
+    for (Entry<String, String> e : partSpec.entrySet()) {
+      for (String s : reservedPartitionValues) {
+        if (e.getValue().contains(s)) {
+          throw new SemanticException(ErrorMsg.RESERVED_PART_VAL.getMsg(
+              "(User value: " + e.getValue() + " Reserved substring: " + s + ")"));
+        }
+      }
+    }
+  }
 }

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ErrorMsg.java Wed Jun  9 01:37:48 2010
@@ -149,7 +149,16 @@ public enum ErrorMsg {
   UNSUPPORTED_TYPE("DATE, DATETIME, and TIMESTAMP types aren't supported yet. Please use "
       + "STRING instead."),
   CREATE_NON_NATIVE_AS("CREATE TABLE AS SELECT cannot be used for a non-native table"),
-  LOAD_INTO_NON_NATIVE("A non-native table cannot be used as target for LOAD");
+  LOAD_INTO_NON_NATIVE("A non-native table cannot be used as target for LOAD"),
+  OVERWRITE_ARCHIVED_PART("Cannot overwrite an archived partition. " +
+      "Unarchive before running this command."),
+  ARCHIVE_METHODS_DISABLED("Archiving methods are currently disabled. " +
+      "Please see the Hive wiki for more information about enabling archiving."),
+  ARCHIVE_ON_MULI_PARTS("ARCHIVE can only be run on a single partition"),
+  UNARCHIVE_ON_MULI_PARTS("ARCHIVE can only be run on a single partition"),
+  ARCHIVE_ON_TABLE("ARCHIVE can only be run on partitions"),
+  RESERVED_PART_VAL("Partition value contains a reserved substring"),
+      ;
 
   private String mesg;
   private String sqlState;

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/Hive.g Wed Jun  9 01:37:48 2010
@@ -95,6 +95,8 @@ TOK_ALTERTABLE_REPLACECOLS;
 TOK_ALTERTABLE_ADDPARTS;
 TOK_ALTERTABLE_DROPPARTS;
 TOK_ALTERTABLE_TOUCH;
+TOK_ALTERTABLE_ARCHIVE;
+TOK_ALTERTABLE_UNARCHIVE;
 TOK_ALTERTABLE_SERDEPROPERTIES;
 TOK_ALTERTABLE_SERIALIZER;
 TOK_ALTERTABLE_FILEFORMAT;
@@ -281,6 +283,8 @@ alterTableStatementSuffix
     | alterStatementSuffixDropPartitions
     | alterStatementSuffixAddPartitions
     | alterStatementSuffixTouch
+    | alterStatementSuffixArchive
+    | alterStatementSuffixUnArchive
     | alterStatementSuffixProperties
     | alterStatementSuffixSerdeProperties
     | alterStatementSuffixFileFormat
@@ -335,6 +339,20 @@ alterStatementSuffixTouch
     -> ^(TOK_ALTERTABLE_TOUCH Identifier (partitionSpec)*)
     ;
 
+alterStatementSuffixArchive
+@init { msgs.push("archive statement"); }
+@after { msgs.pop(); }
+    : Identifier KW_ARCHIVE (partitionSpec)*
+    -> ^(TOK_ALTERTABLE_ARCHIVE Identifier (partitionSpec)*)
+    ;
+
+alterStatementSuffixUnArchive
+@init { msgs.push("unarchive statement"); }
+@after { msgs.pop(); }
+    : Identifier KW_UNARCHIVE (partitionSpec)*
+    -> ^(TOK_ALTERTABLE_UNARCHIVE Identifier (partitionSpec)*)
+    ;
+
 partitionLocation
 @init { msgs.push("partition location"); }
 @after { msgs.pop(); }
@@ -1625,6 +1643,8 @@ KW_RECORDWRITER: 'RECORDWRITER';
 KW_SEMI: 'SEMI';
 KW_LATERAL: 'LATERAL';
 KW_TOUCH: 'TOUCH';
+KW_ARCHIVE: 'ARCHIVE';
+KW_UNARCHIVE: 'UNARCHIVE';
 
 // Operators
 // NOTE: if you add a new function/operator, add it to sysFuncNames so that describe function _FUNC_ will work.

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java Wed Jun  9 01:37:48 2010
@@ -3253,6 +3253,10 @@ public class SemanticAnalyzer extends Ba
       dest_tab = dest_part.getTable();
 
       dest_path = dest_part.getPath()[0];
+      if ("har".equalsIgnoreCase(dest_path.toUri().getScheme())) {
+        throw new SemanticException(ErrorMsg.OVERWRITE_ARCHIVED_PART
+            .getMsg());
+      }
       queryTmpdir = ctx.getExternalTmpFileURI(dest_path.toUri());
       table_desc = Utilities.getTableDesc(dest_tab);
 

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzerFactory.java Wed Jun  9 01:37:48 2010
@@ -46,6 +46,8 @@ public final class SemanticAnalyzerFacto
     commandType.put(HiveParser.TOK_ALTERTABLE_DROPPARTS, "ALTERTABLE_DROPPARTS");
     commandType.put(HiveParser.TOK_ALTERTABLE_ADDPARTS, "ALTERTABLE_ADDPARTS");
     commandType.put(HiveParser.TOK_ALTERTABLE_TOUCH, "ALTERTABLE_TOUCH");
+    commandType.put(HiveParser.TOK_ALTERTABLE_ARCHIVE, "ALTERTABLE_ARCHIVE");
+    commandType.put(HiveParser.TOK_ALTERTABLE_UNARCHIVE, "ALTERTABLE_UNARCHIVE");
     commandType.put(HiveParser.TOK_ALTERTABLE_PROPERTIES, "ALTERTABLE_PROPERTIES");
     commandType.put(HiveParser.TOK_ALTERTABLE_SERIALIZER, "ALTERTABLE_SERIALIZER");
     commandType.put(HiveParser.TOK_ALTERTABLE_SERDEPROPERTIES, "ALTERTABLE_SERDEPROPERTIES");
@@ -98,6 +100,8 @@ public final class SemanticAnalyzerFacto
       case HiveParser.TOK_ALTERTABLE_FILEFORMAT:
       case HiveParser.TOK_ALTERTABLE_CLUSTER_SORT:
       case HiveParser.TOK_ALTERTABLE_TOUCH:
+      case HiveParser.TOK_ALTERTABLE_ARCHIVE:
+      case HiveParser.TOK_ALTERTABLE_UNARCHIVE:
         return new DDLSemanticAnalyzer(conf);
       case HiveParser.TOK_CREATEFUNCTION:
       case HiveParser.TOK_DROPFUNCTION:

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableDesc.java Wed Jun  9 01:37:48 2010
@@ -42,7 +42,7 @@ public class AlterTableDesc extends DDLD
   public static enum AlterTableTypes {
     RENAME, ADDCOLS, REPLACECOLS, ADDPROPS, ADDSERDE, ADDSERDEPROPS,
     ADDFILEFORMAT, ADDCLUSTERSORTCOLUMN, RENAMECOLUMN, ADDPARTITION,
-    TOUCH
+    TOUCH, ARCHIVE, UNARCHIVE,
   };
 
   AlterTableTypes op;

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/AlterTableSimpleDesc.java Wed Jun  9 01:37:48 2010
@@ -0,0 +1,92 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+import java.util.LinkedHashMap;
+import java.util.Map;
+
+import org.apache.hadoop.hive.ql.plan.AlterTableDesc.AlterTableTypes;
+
+/**
+ * Contains information needed to modify a partition or a table
+ */
+public class AlterTableSimpleDesc extends DDLDesc {
+  private String tableName;
+  private String dbName;
+  private LinkedHashMap<String, String> partSpec;
+
+  AlterTableTypes type;
+
+  public AlterTableSimpleDesc() {
+  }
+
+  /**
+   * @param dbName
+   *          database that contains the table / partition
+   * @param tableName
+   *          table containing the partition
+   * @param partSpec
+   *          partition specification. Null if touching a table.
+   */
+  public AlterTableSimpleDesc(String dbName, String tableName,
+      Map<String, String> partSpec, AlterTableDesc.AlterTableTypes type) {
+    super();
+    this.dbName = dbName;
+    this.tableName = tableName;
+    if(partSpec == null) {
+      this.partSpec = null;
+    } else {
+      this.partSpec = new LinkedHashMap<String,String>(partSpec);
+    }
+    this.type = type;
+  }
+
+  public String getTableName() {
+    return tableName;
+  }
+
+  public void setTableName(String tableName) {
+    this.tableName = tableName;
+  }
+
+  public String getDbName() {
+    return dbName;
+  }
+
+  public void setDbName(String dbName) {
+    this.dbName = dbName;
+  }
+
+  public AlterTableDesc.AlterTableTypes getType() {
+    return type;
+  }
+
+  public void setType(AlterTableDesc.AlterTableTypes type) {
+    this.type = type;
+  }
+
+  public LinkedHashMap<String, String> getPartSpec() {
+    return partSpec;
+  }
+
+  public void setPartSpec(LinkedHashMap<String, String> partSpec) {
+    this.partSpec = partSpec;
+  }
+
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveDesc.java Wed Jun  9 01:37:48 2010
@@ -0,0 +1,27 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+/**
+ * ArchiveDesc.
+ *
+ */
+public class ArchiveDesc extends DDLDesc {
+
+}

Added: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java (added)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/ArchiveWork.java Wed Jun  9 01:37:48 2010
@@ -0,0 +1,52 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.plan;
+
+import java.io.Serializable;
+import java.util.LinkedHashMap;
+
+/**
+ * ArchiveWork.
+ *
+ */
+@Explain(displayName = "Map Reduce")
+public class ArchiveWork implements Serializable {
+  private static final long serialVersionUID = 1L;
+  private String tableName;
+  private String dbName;
+  private LinkedHashMap<String, String> partSpec;
+  private ArchiveActionType type;
+
+  public static enum ArchiveActionType {
+    ARCHIVE, UNARCHIVE
+  };
+
+
+  public ArchiveActionType getType() {
+    return type;
+  }
+
+  public void setType(ArchiveActionType type) {
+    this.type = type;
+  }
+
+  public ArchiveWork() {
+  }
+
+}

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/DDLWork.java Wed Jun  9 01:37:48 2010
@@ -42,7 +42,7 @@ public class DDLWork implements Serializ
   private ShowPartitionsDesc showPartsDesc;
   private DescTableDesc descTblDesc;
   private AddPartitionDesc addPartitionDesc;
-  private TouchDesc touchDesc;
+  private AlterTableSimpleDesc alterTblSimpleDesc;
   private MsckDesc msckDesc;
   private ShowTableStatusDesc showTblStatusDesc;
 
@@ -183,10 +183,10 @@ public class DDLWork implements Serializ
    *          information about the table/partitions that we want to touch
    */
   public DDLWork(HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs,
-      TouchDesc touchDesc) {
+      AlterTableSimpleDesc simpleDesc) {
     this(inputs, outputs);
 
-    this.touchDesc = touchDesc;
+    this.alterTblSimpleDesc = simpleDesc;
   }
 
   public DDLWork(HashSet<ReadEntity> inputs, HashSet<WriteEntity> outputs,
@@ -383,18 +383,18 @@ public class DDLWork implements Serializ
   }
 
   /**
-   * @return information about the table/partitionss we want to touch.
+   * @return information about the table/partitions we want to alter.
    */
-  public TouchDesc getTouchDesc() {
-    return touchDesc;
+  public AlterTableSimpleDesc getAlterTblSimpleDesc() {
+    return alterTblSimpleDesc;
   }
 
   /**
-   * @param touchDesc
-   *          information about the table/partitions we want to touch.
+   * @param desc
+   *          information about the table/partitions we want to alter.
    */
-  public void setTouchDesc(TouchDesc touchDesc) {
-    this.touchDesc = touchDesc;
+  public void setAlterTblSimpleDesc(AlterTableSimpleDesc desc) {
+    this.alterTblSimpleDesc = desc;
   }
 
   /**

Modified: hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java (original)
+++ hadoop/hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/plan/TouchDesc.java Wed Jun  9 01:37:48 2010
@@ -1,81 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.hadoop.hive.ql.plan;
-
-import java.util.LinkedHashMap;
-import java.util.Map;
-
-/**
- * Contains information needed to touch a partition (cause pre/post hooks to
- * fire).
- */
-public class TouchDesc extends DDLDesc {
-  private String tableName;
-  private String dbName;
-  private LinkedHashMap<String, String> partSpec;
-
-
-  public TouchDesc() {
-  }
-
-  /**
-   * @param dbName
-   *          database that contains the table / partition
-   * @param tableName
-   *          table containing the partition
-   * @param partSpec
-   *          partition specification. Null if touching a table.
-   */
-  public TouchDesc(String dbName, String tableName,
-      Map<String, String> partSpec) {
-    super();
-    this.dbName = dbName;
-    this.tableName = tableName;
-    if(partSpec == null) {
-      this.partSpec = null;
-    } else {
-      this.partSpec = new LinkedHashMap<String,String>(partSpec);
-    }
-  }
-
-  public String getTableName() {
-    return tableName;
-  }
-
-  public void setTableName(String tableName) {
-    this.tableName = tableName;
-  }
-
-  public String getDbName() {
-    return dbName;
-  }
-
-  public void setDbName(String dbName) {
-    this.dbName = dbName;
-  }
-
-  public LinkedHashMap<String, String> getPartSpec() {
-    return partSpec;
-  }
-
-  public void setPartSpec(LinkedHashMap<String, String> partSpec) {
-    this.partSpec = partSpec;
-  }
-
-}

Modified: hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java?rev=952877&r1=952876&r2=952877&view=diff
==============================================================================
--- hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (original)
+++ hadoop/hive/trunk/ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java Wed Jun  9 01:37:48 2010
@@ -29,8 +29,10 @@ import java.io.Serializable;
 import java.net.URI;
 import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.HashSet;
 import java.util.LinkedList;
 import java.util.List;
+import java.util.Set;
 import java.util.TreeMap;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -80,6 +82,7 @@ public class QTestUtil {
   private final String outDir;
   private final String logDir;
   private final TreeMap<String, String> qMap;
+  private final Set<String> qSkipSet;
   private final LinkedList<String> srcTables;
 
   private ParseDriver pd;
@@ -173,6 +176,7 @@ public class QTestUtil {
     conf = new HiveConf(Driver.class);
     this.miniMr = miniMr;
     qMap = new TreeMap<String, String>();
+    qSkipSet = new HashSet<String>();
 
     if (miniMr) {
       dfs = ShimLoader.getHadoopShims().getMiniDfs(conf, 4, true, null);
@@ -230,12 +234,44 @@ public class QTestUtil {
     DataInputStream dis = new DataInputStream(bis);
     StringBuilder qsb = new StringBuilder();
 
+    // Look for a hint to not run a test on some Hadoop versions
+    Pattern pattern = Pattern.compile("-- EXCLUDE_HADOOP_MAJOR_VERSIONS(.*)");
+
+
     // Read the entire query
+    boolean excludeQuery = false;
+    String hadoopVer = ShimLoader.getMajorVersion();
     while (dis.available() != 0) {
-      qsb.append(dis.readLine() + "\n");
+      String line = dis.readLine();
+
+      // While we are reading the lines, detect whether this query wants to be
+      // excluded from running because the Hadoop version is incorrect
+      Matcher matcher = pattern.matcher(line);
+      if (matcher.find()) {
+        String group = matcher.group();
+        int start = group.indexOf('(');
+        int end = group.indexOf(')');
+        assert end > start;
+        // versions might be something like '0.17, 0.19'
+        String versions = group.substring(start+1, end);
+
+        Set<String> excludedVersionSet = new HashSet<String>();
+        for (String s : versions.split("\\,")) {
+          s = s.trim();
+          excludedVersionSet.add(s);
+        }
+        if (excludedVersionSet.contains(hadoopVer)) {
+          excludeQuery = true;
+        }
+      }
+      qsb.append(line + "\n");
     }
     qMap.put(qf.getName(), qsb.toString());
-    
+    if(excludeQuery) {
+      System.out.println("Due to the Hadoop Version ("+ hadoopVer + "), " +
+          "adding query " + qf.getName() + " to the set of tests to skip");
+      qSkipSet.add(qf.getName());
+     }
     dis.close();
   }
 
@@ -504,6 +540,10 @@ public class QTestUtil {
     return cliDriver.processLine(qMap.get(tname));
   }
 
+  public boolean shouldBeSkipped(String tname) {
+    return qSkipSet.contains(tname);
+  }
+
   public void convertSequenceFileToTextFile() throws Exception {
     // Create an instance of hive in order to create the tables
     testWarehouse = conf.getVar(HiveConf.ConfVars.METASTOREWAREHOUSE);
@@ -745,6 +785,7 @@ public class QTestUtil {
 
   public int checkCliDriverResults(String tname) throws Exception {
     String[] cmdArray;
+    assert(qMap.containsKey(tname));
 
     cmdArray = new String[] {
         "diff", "-a",
@@ -816,7 +857,7 @@ public class QTestUtil {
 
   /**
    * QTRunner: Runnable class for running a a single query file.
-   * 
+   *
    **/
   public static class QTRunner implements Runnable {
     private final QTestUtil qt;
@@ -845,7 +886,7 @@ public class QTestUtil {
   /**
    * executes a set of query files either in sequence or in parallel. Uses
    * QTestUtil to do so
-   * 
+   *
    * @param qfiles
    *          array of input query files containing arbitrary number of hive
    *          queries
@@ -855,7 +896,7 @@ public class QTestUtil {
    * @param mt
    *          whether to run in multithreaded mode or not
    * @return true if all the query files were executed successfully, else false
-   * 
+   *
    *         In multithreaded mode each query file is run in a separate thread.
    *         the caller has to arrange that different query files do not collide
    *         (in terms of destination tables)
@@ -923,7 +964,7 @@ public class QTestUtil {
     }
     return (!failed);
   }
-  
+
   public static void outputTestFailureHelpMessage() {
     System.err.println("See build/ql/tmp/hive.log, "
         + "or try \"ant test ... -Dtest.silent=false\" to get more logs.");

Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive1.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive1.q?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive1.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive1.q Wed Jun  9 01:37:48 2010
@@ -0,0 +1,11 @@
+set hive.archive.enabled = true;
+-- Tests trying to archive a partition twice.
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19)
+
+CREATE TABLE srcpart_archived LIKE srcpart;
+
+INSERT OVERWRITE TABLE srcpart_archived PARTITION (ds='2008-04-08', hr='12')
+SELECT key, value FROM srcpart WHERE ds='2008-04-08' AND hr='12';
+
+ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12');
+ALTER TABLE srcpart_archived ARCHIVE PARTITION (ds='2008-04-08', hr='12');

Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive2.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive2.q?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive2.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive2.q Wed Jun  9 01:37:48 2010
@@ -0,0 +1,5 @@
+set hive.archive.enabled = true;
+-- Tests trying to unarchive a non-archived partition
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19)
+
+ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12');

Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive3.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive3.q?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive3.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive3.q Wed Jun  9 01:37:48 2010
@@ -0,0 +1,5 @@
+set hive.archive.enabled = true;
+-- Tests archiving a table
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19)
+
+ALTER TABLE srcpart ARCHIVE;

Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive4.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive4.q?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive4.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive4.q Wed Jun  9 01:37:48 2010
@@ -0,0 +1,5 @@
+set hive.archive.enabled = true;
+-- Tests archiving multiple partitions
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19)
+
+ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') PARTITION (ds='2008-04-08', hr='11');

Added: hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive5.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive5.q?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive5.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientnegative/archive5.q Wed Jun  9 01:37:48 2010
@@ -0,0 +1,5 @@
+set hive.archive.enabled = true;
+-- Tests creating a partition where the partition value will collide with the
+-- a intermediate directory
+
+ALTER TABLE srcpart ADD PARTITION (ds='2008-04-08', hr='14_INTERMEDIATE_ORIGINAL')

Added: hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q
URL: http://svn.apache.org/viewvc/hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q?rev=952877&view=auto
==============================================================================
--- hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q (added)
+++ hadoop/hive/trunk/ql/src/test/queries/clientpositive/archive.q Wed Jun  9 01:37:48 2010
@@ -0,0 +1,44 @@
+set hive.archive.enabled = true;
+set hive.enforce.bucketing = true;
+
+-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19)
+
+SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col 
+FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2;
+
+ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12');
+
+SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col 
+FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2;
+
+ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12');
+
+SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col 
+FROM (SELECT * FROM srcpart WHERE ds='2008-04-08') subq1) subq2;
+
+CREATE TABLE harbucket(key INT) 
+PARTITIONED by (ds STRING)
+CLUSTERED BY (key) INTO 10 BUCKETS;
+
+INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM src WHERE key < 50;
+
+SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key;
+ALTER TABLE srcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12');
+SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key;
+ALTER TABLE srcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12');
+SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key;
+
+DROP TABLE harbucket;
+
+CREATE TABLE old_name(key INT) 
+PARTITIONED by (ds STRING);
+
+INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM src WHERE key < 50;
+ALTER TABLE old_name ARCHIVE PARTITION (ds='1');
+SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col 
+FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2;
+ALTER TABLE old_name RENAME TO new_name;
+SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col 
+FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2;
+
+DROP TABLE new_name;